From 7e8fb4b6b21bb3f0cf0c8433d12ce812c3ac1e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 17:57:38 +0200 Subject: [PATCH 1/4] Fix size inference with broadcast --- src/sizes.jl | 54 ++++++++++------------------ test/ArrayDiff.jl | 4 +-- test/JuMP.jl | 91 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 110 insertions(+), 39 deletions(-) diff --git a/src/sizes.jl b/src/sizes.jl index baf8f7e..de1d39d 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -346,47 +346,29 @@ function _infer_sizes( continue end op = DEFAULT_MULTIVARIATE_OPERATORS[node.index] - if op == :+ || op == :- - # Broadcasted +/- preserves shape - _copy_size!(sizes, k, children_arr[first(children_indices)]) - elseif op == :^ - # Broadcasted ^ with scalar exponent preserves base shape - _copy_size!(sizes, k, children_arr[first(children_indices)]) - elseif op == :* - # TODO assert compatible sizes and all ndims should be 0 or 2 + if op == :+ || op == :- || op == :* + # Broadcasted +/-/* takes the largest child's shape (for + # scalar+matrix that's the matrix; for matrix+matrix the + # shapes must match). first_matrix = findfirst(children_indices) do i return !iszero(sizes.ndims[children_arr[i]]) end - if !isnothing(first_matrix) - if sizes.ndims[children_arr[first(children_indices)]] == 0 - _add_size!(sizes, k, (1, 1)) - continue - else - if sizes.ndims[children_arr[first(children_indices)]] == - 1 - nb_cols = 1 - else - nb_cols = _size( - sizes, - children_arr[first(children_indices)], - 1, - ) - end - _add_size!( - sizes, - k, - ( - _size( - sizes, - children_arr[first(children_indices)], - 1, - ), - nb_cols, - ), - ) - continue + if isnothing(first_matrix) + _copy_size!(sizes, k, children_arr[first(children_indices)]) + else + ref = children_arr[children_indices[first_matrix]] + for c_idx in children_indices + ix = children_arr[c_idx] + iszero(sizes.ndims[ix]) && continue + @assert _size(sizes, ix) == _size(sizes, ref) "Incompatible array shapes $(Tuple(_size(sizes, ix))) and $(Tuple(_size(sizes, ref))) for broadcasted operator `$op`" end + _copy_size!(sizes, k, ref) end + elseif op == :^ + # Broadcasted ^ with scalar exponent preserves base shape + @assert length(children_indices) == 2 "Expected two arguments for broadcasted operator `$op`, got $(length(children_indices))" + @assert iszero(sizes.ndims[children_arr[children_indices[2]]]) "Expected scalar exponent for broadcasted operator `$op`" + _copy_size!(sizes, k, children_arr[first(children_indices)]) end elseif node.type == NODE_CALL_UNIVARIATE if !( diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl index 99ef909..067eae8 100644 --- a/test/ArrayDiff.jl +++ b/test/ArrayDiff.jl @@ -607,9 +607,9 @@ function test_objective_broadcasted_product() evaluator = ArrayDiff.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4]) MOI.initialize(evaluator, [:Grad]) sizes = evaluator.backend.objective.expr.sizes - @test sizes.ndims == [0, 2, 1, 0, 0, 1, 0, 0] + @test sizes.ndims == [0, 1, 1, 0, 0, 1, 0, 0] @test sizes.size_offset == [0, 2, 1, 0, 0, 0, 0, 0] - @test sizes.size == [2, 2, 2, 1] + @test sizes.size == [2, 2, 2] @test sizes.storage_offset == [0, 1, 3, 5, 6, 7, 9, 10, 11] x1 = 1.0 x2 = 2.0 diff --git a/test/JuMP.jl b/test/JuMP.jl index 9901621..0867a51 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -8,7 +8,7 @@ import LinearAlgebra import MathOptInterface as MOI function runtests() - for name in names(@__MODULE__; all = true) + for name in names(@__MODULE__; all=true) if startswith("$(name)", "test_") @testset "$(name)" begin getfield(@__MODULE__, name)() @@ -428,6 +428,95 @@ function test_size_vec_vect() return end +# Build an evaluator for `expr` with `vars` as the variable order. Returns the +# inferred sizes plus the evaluated objective and gradient at `x`. +function _build_and_eval(expr, vars, x) + mode = ArrayDiff.Mode() + ad = ArrayDiff.model(mode) + MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr)) + evaluator = MOI.Nonlinear.Evaluator(ad, mode, JuMP.index.(vars)) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + val = MOI.eval_objective(evaluator, x) + g = zero(x) + MOI.eval_objective_gradient(evaluator, g, x) + return sizes, val, g +end + +# The previous size-inference code special-cased broadcasted `+/-/*` with a +# `nb_cols` formula that happened to match for square matrices. A 2x3 input +# shape exercises the bug: with the old code the result would be reported as +# (2, 2) instead of (2, 3), and `eval_objective` would read past the tape. +function test_broadcast_nonsquare_matrix() + model = Model() + @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables) + Y = [10.0 20.0 30.0; 40.0 50.0 60.0] + x = Float64.(collect(1:6)) + W_val = reshape(x, 2, 3) + @testset "$(op)" for (op, expr, ref_mat) in [ + (:+, LinearAlgebra.norm(W .+ Y), W_val .+ Y), + (:-, LinearAlgebra.norm(W .- Y), W_val .- Y), + (:*, LinearAlgebra.norm(W .* W), W_val .* W_val), + ] + sizes, val, g = _build_and_eval(expr, JuMP.all_variables(model), x) + # Outer norm scalar, then the broadcasted op produces a 2x3 matrix, + # then the two 2x3 leaves: 4 nodes, three of them ndims=2 with size + # (2, 3). The old bug would report (2, 2) for the broadcast node. + @test sizes.ndims == [0, 2, 2, 2] + @test sizes.size == [2, 3, 2, 3, 2, 3] + @test sizes.size_offset == [0, 4, 2, 0] + @test sizes.storage_offset == [0, 1, 7, 13, 19] + @test val ≈ LinearAlgebra.norm(ref_mat) + ref_g = if op == :+ + vec(W_val .+ Y) ./ LinearAlgebra.norm(ref_mat) + elseif op == :- + vec(W_val .- Y) ./ LinearAlgebra.norm(ref_mat) + else # :* + # d(norm(W .* W))/dW = 2 .* W .^ 3 / norm(W .* W) + vec(2 .* W_val .^ 3) ./ LinearAlgebra.norm(ref_mat) + end + @test g ≈ ref_g + end + return +end + +# The fix replaced the old `(1, 1)` stub for `scalar .op matrix` with a copy +# of the matrix child's full shape. Eval/reverse for these mixed-rank +# broadcasts isn't implemented yet (and is out of scope for the fix), so we +# only assert the inferred shape — that's where the previous code was wrong. +function test_broadcast_scalar_matrix_size_inference() + model = Model() + @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables) + mode = ArrayDiff.Mode() + @testset "$(name)" for (name, expr) in [ + ("scalar .* M", LinearAlgebra.norm(2.5 .* W)), + ("M .* scalar", LinearAlgebra.norm(W .* 2.5)), + ("scalar .+ M", LinearAlgebra.norm(2.5 .+ W)), + ("M .+ scalar", LinearAlgebra.norm(W .+ 2.5)), + ("scalar .- M", LinearAlgebra.norm(2.5 .- W)), + ("M .- scalar", LinearAlgebra.norm(W .- 2.5)), + ] + ad = ArrayDiff.model(mode) + MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr)) + evaluator = MOI.Nonlinear.Evaluator( + ad, + mode, + JuMP.index.(JuMP.all_variables(model)), + ) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + # Broadcast node is at index 2; it should inherit the matrix child's + # (2, 3) shape, not the old `(1, 1)` stub. + @test sizes.ndims[2] == 2 + broadcast_size_off = sizes.size_offset[2] + @test sizes.size[broadcast_size_off+1] == 2 + @test sizes.size[broadcast_size_off+2] == 3 + # And the scalar leaf among the children stays ndims=0. + @test 0 in sizes.ndims[3:4] + end + return +end + end # module TestJuMP.runtests() From 927fa69c28e6ed7c016f1ac9116e9832727d28e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 18:03:09 +0200 Subject: [PATCH 2/4] Simplify tests --- test/JuMP.jl | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/test/JuMP.jl b/test/JuMP.jl index 0867a51..78277fd 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -168,8 +168,6 @@ function _eval( model::JuMP.GenericModel{T}, func, x, - obj_val, - grad_val, ) where {T} mode = ArrayDiff.Mode{Vector{T}}() ad = ArrayDiff.model(mode) @@ -180,20 +178,20 @@ function _eval( JuMP.index.(JuMP.all_variables(model)), ) MOI.initialize(evaluator, [:Grad]) - x_grad = T.(collect(1:8)) - @test MOI.eval_objective(evaluator, x) ≈ obj_val + sizes = evaluator.backend.objective.expr.sizes + val = MOI.eval_objective(evaluator, x) if VERSION >= v"1.12" @test 0 == @allocated MOI.eval_objective(evaluator, x) end + x_grad = T.(collect(1:8)) g = zero(x) MOI.eval_objective_gradient(evaluator, g, x_grad) - @test g ≈ grad_val if VERSION >= v"1.12" @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x_grad) end MOI.Nonlinear.set_objective(ad, nothing) @test isnothing(ad.objective) - return + return sizes, val, g end function _test_neural( @@ -280,7 +278,9 @@ function _test_neural( else grad_val = grad_sumsq end - _eval(model, loss, [vec(W1_val); vec(W2_val)], obj_val, grad_val) + _, val, g = _eval(model, loss, [vec(W1_val); vec(W2_val)]) + @test obj_val ≈ val + @test grad_val ≈ g return end @@ -428,21 +428,6 @@ function test_size_vec_vect() return end -# Build an evaluator for `expr` with `vars` as the variable order. Returns the -# inferred sizes plus the evaluated objective and gradient at `x`. -function _build_and_eval(expr, vars, x) - mode = ArrayDiff.Mode() - ad = ArrayDiff.model(mode) - MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr)) - evaluator = MOI.Nonlinear.Evaluator(ad, mode, JuMP.index.(vars)) - MOI.initialize(evaluator, [:Grad]) - sizes = evaluator.backend.objective.expr.sizes - val = MOI.eval_objective(evaluator, x) - g = zero(x) - MOI.eval_objective_gradient(evaluator, g, x) - return sizes, val, g -end - # The previous size-inference code special-cased broadcasted `+/-/*` with a # `nb_cols` formula that happened to match for square matrices. A 2x3 input # shape exercises the bug: with the old code the result would be reported as @@ -458,7 +443,7 @@ function test_broadcast_nonsquare_matrix() (:-, LinearAlgebra.norm(W .- Y), W_val .- Y), (:*, LinearAlgebra.norm(W .* W), W_val .* W_val), ] - sizes, val, g = _build_and_eval(expr, JuMP.all_variables(model), x) + sizes, val, g = _eval(model, expr, x) # Outer norm scalar, then the broadcasted op produces a 2x3 matrix, # then the two 2x3 leaves: 4 nodes, three of them ndims=2 with size # (2, 3). The old bug would report (2, 2) for the broadcast node. From 55704e76644c276c9893627fc8672fa4ba67155e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 18:25:29 +0200 Subject: [PATCH 3/4] Fix format --- test/JuMP.jl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/JuMP.jl b/test/JuMP.jl index 78277fd..209bfa3 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -8,7 +8,7 @@ import LinearAlgebra import MathOptInterface as MOI function runtests() - for name in names(@__MODULE__; all=true) + for name in names(@__MODULE__; all = true) if startswith("$(name)", "test_") @testset "$(name)" begin getfield(@__MODULE__, name)() @@ -164,11 +164,7 @@ function test_parse_moi() return end -function _eval( - model::JuMP.GenericModel{T}, - func, - x, -) where {T} +function _eval(model::JuMP.GenericModel{T}, func, x) where {T} mode = ArrayDiff.Mode{Vector{T}}() ad = ArrayDiff.model(mode) MOI.Nonlinear.set_objective(ad, JuMP.moi_function(func)) From a8a4c1762bdfa22594feda437e014d3ba77f1544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 19:30:17 +0200 Subject: [PATCH 4/4] Simplify --- src/sizes.jl | 32 ++++++++++++++++++-------------- test/JuMP.jl | 8 -------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/sizes.jl b/src/sizes.jl index de1d39d..462f3b0 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -347,22 +347,26 @@ function _infer_sizes( end op = DEFAULT_MULTIVARIATE_OPERATORS[node.index] if op == :+ || op == :- || op == :* - # Broadcasted +/-/* takes the largest child's shape (for - # scalar+matrix that's the matrix; for matrix+matrix the - # shapes must match). - first_matrix = findfirst(children_indices) do i - return !iszero(sizes.ndims[children_arr[i]]) + sizes.ndims[k] = maximum(children_indices, init = 0) do i + return sizes.ndims[children_arr[i]] end - if isnothing(first_matrix) - _copy_size!(sizes, k, children_arr[first(children_indices)]) - else - ref = children_arr[children_indices[first_matrix]] - for c_idx in children_indices - ix = children_arr[c_idx] - iszero(sizes.ndims[ix]) && continue - @assert _size(sizes, ix) == _size(sizes, ref) "Incompatible array shapes $(Tuple(_size(sizes, ix))) and $(Tuple(_size(sizes, ref))) for broadcasted operator `$op`" + sizes.size_offset[k] = length(sizes.size) + for _ in 1:sizes.ndims[k] + push!(sizes.size, 1) + end + sz_parent = _size(sizes, k) + for i in children_indices + id = children_arr[i] + sz = _size(sizes, id) + for j in eachindex(sz) + if sz[j] > 1 + if sz_parent[j] == 1 + sz_parent[j] = sz[j] + else + @assert sz_parent[j] == sz[j] + end + end end - _copy_size!(sizes, k, ref) end elseif op == :^ # Broadcasted ^ with scalar exponent preserves base shape diff --git a/test/JuMP.jl b/test/JuMP.jl index 209bfa3..533e631 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -424,10 +424,6 @@ function test_size_vec_vect() return end -# The previous size-inference code special-cased broadcasted `+/-/*` with a -# `nb_cols` formula that happened to match for square matrices. A 2x3 input -# shape exercises the bug: with the old code the result would be reported as -# (2, 2) instead of (2, 3), and `eval_objective` would read past the tape. function test_broadcast_nonsquare_matrix() model = Model() @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables) @@ -461,10 +457,6 @@ function test_broadcast_nonsquare_matrix() return end -# The fix replaced the old `(1, 1)` stub for `scalar .op matrix` with a copy -# of the matrix child's full shape. Eval/reverse for these mixed-rank -# broadcasts isn't implemented yet (and is out of scope for the fix), so we -# only assert the inferred shape — that's where the previous code was wrong. function test_broadcast_scalar_matrix_size_inference() model = Model() @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables)