From 7e8fb4b6b21bb3f0cf0c8433d12ce812c3ac1e32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Legat?= <benoit.legat@gmail.com>
Date: Thu, 14 May 2026 17:57:38 +0200
Subject: [PATCH 1/4] Fix size inference with broadcast

---
 src/sizes.jl      | 54 ++++++++++------------------
 test/ArrayDiff.jl |  4 +--
 test/JuMP.jl      | 91 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 110 insertions(+), 39 deletions(-)

diff --git a/src/sizes.jl b/src/sizes.jl
index baf8f7e..de1d39d 100644
--- a/src/sizes.jl
+++ b/src/sizes.jl
@@ -346,47 +346,29 @@ function _infer_sizes(
                 continue
             end
             op = DEFAULT_MULTIVARIATE_OPERATORS[node.index]
-            if op == :+ || op == :-
-                # Broadcasted +/- preserves shape
-                _copy_size!(sizes, k, children_arr[first(children_indices)])
-            elseif op == :^
-                # Broadcasted ^ with scalar exponent preserves base shape
-                _copy_size!(sizes, k, children_arr[first(children_indices)])
-            elseif op == :*
-                # TODO assert compatible sizes and all ndims should be 0 or 2
+            if op == :+ || op == :- || op == :*
+                # Broadcasted +/-/* takes the largest child's shape (for
+                # scalar+matrix that's the matrix; for matrix+matrix the
+                # shapes must match).
                 first_matrix = findfirst(children_indices) do i
                     return !iszero(sizes.ndims[children_arr[i]])
                 end
-                if !isnothing(first_matrix)
-                    if sizes.ndims[children_arr[first(children_indices)]] == 0
-                        _add_size!(sizes, k, (1, 1))
-                        continue
-                    else
-                        if sizes.ndims[children_arr[first(children_indices)]] ==
-                           1
-                            nb_cols = 1
-                        else
-                            nb_cols = _size(
-                                sizes,
-                                children_arr[first(children_indices)],
-                                1,
-                            )
-                        end
-                        _add_size!(
-                            sizes,
-                            k,
-                            (
-                                _size(
-                                    sizes,
-                                    children_arr[first(children_indices)],
-                                    1,
-                                ),
-                                nb_cols,
-                            ),
-                        )
-                        continue
+                if isnothing(first_matrix)
+                    _copy_size!(sizes, k, children_arr[first(children_indices)])
+                else
+                    ref = children_arr[children_indices[first_matrix]]
+                    for c_idx in children_indices
+                        ix = children_arr[c_idx]
+                        iszero(sizes.ndims[ix]) && continue
+                        @assert _size(sizes, ix) == _size(sizes, ref) "Incompatible array shapes $(Tuple(_size(sizes, ix))) and $(Tuple(_size(sizes, ref))) for broadcasted operator `$op`"
                     end
+                    _copy_size!(sizes, k, ref)
                 end
+            elseif op == :^
+                # Broadcasted ^ with scalar exponent preserves base shape
+                @assert length(children_indices) == 2 "Expected two arguments for broadcasted operator `$op`, got $(length(children_indices))"
+                @assert iszero(sizes.ndims[children_arr[children_indices[2]]]) "Expected scalar exponent for broadcasted operator `$op`"
+                _copy_size!(sizes, k, children_arr[first(children_indices)])
             end
         elseif node.type == NODE_CALL_UNIVARIATE
             if !(
diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl
index 99ef909..067eae8 100644
--- a/test/ArrayDiff.jl
+++ b/test/ArrayDiff.jl
@@ -607,9 +607,9 @@ function test_objective_broadcasted_product()
     evaluator = ArrayDiff.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4])
     MOI.initialize(evaluator, [:Grad])
     sizes = evaluator.backend.objective.expr.sizes
-    @test sizes.ndims == [0, 2, 1, 0, 0, 1, 0, 0]
+    @test sizes.ndims == [0, 1, 1, 0, 0, 1, 0, 0]
     @test sizes.size_offset == [0, 2, 1, 0, 0, 0, 0, 0]
-    @test sizes.size == [2, 2, 2, 1]
+    @test sizes.size == [2, 2, 2]
     @test sizes.storage_offset == [0, 1, 3, 5, 6, 7, 9, 10, 11]
     x1 = 1.0
     x2 = 2.0
diff --git a/test/JuMP.jl b/test/JuMP.jl
index 9901621..0867a51 100644
--- a/test/JuMP.jl
+++ b/test/JuMP.jl
@@ -8,7 +8,7 @@ import LinearAlgebra
 import MathOptInterface as MOI
 
 function runtests()
-    for name in names(@__MODULE__; all = true)
+    for name in names(@__MODULE__; all=true)
         if startswith("$(name)", "test_")
             @testset "$(name)" begin
                 getfield(@__MODULE__, name)()
@@ -428,6 +428,95 @@ function test_size_vec_vect()
     return
 end
 
+# Build an evaluator for `expr` with `vars` as the variable order. Returns the
+# inferred sizes plus the evaluated objective and gradient at `x`.
+function _build_and_eval(expr, vars, x)
+    mode = ArrayDiff.Mode()
+    ad = ArrayDiff.model(mode)
+    MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr))
+    evaluator = MOI.Nonlinear.Evaluator(ad, mode, JuMP.index.(vars))
+    MOI.initialize(evaluator, [:Grad])
+    sizes = evaluator.backend.objective.expr.sizes
+    val = MOI.eval_objective(evaluator, x)
+    g = zero(x)
+    MOI.eval_objective_gradient(evaluator, g, x)
+    return sizes, val, g
+end
+
+# The previous size-inference code special-cased broadcasted `+/-/*` with a
+# `nb_cols` formula that happened to match for square matrices. A 2x3 input
+# shape exercises the bug: with the old code the result would be reported as
+# (2, 2) instead of (2, 3), and `eval_objective` would read past the tape.
+function test_broadcast_nonsquare_matrix()
+    model = Model()
+    @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables)
+    Y = [10.0 20.0 30.0; 40.0 50.0 60.0]
+    x = Float64.(collect(1:6))
+    W_val = reshape(x, 2, 3)
+    @testset "$(op)" for (op, expr, ref_mat) in [
+        (:+, LinearAlgebra.norm(W .+ Y), W_val .+ Y),
+        (:-, LinearAlgebra.norm(W .- Y), W_val .- Y),
+        (:*, LinearAlgebra.norm(W .* W), W_val .* W_val),
+    ]
+        sizes, val, g = _build_and_eval(expr, JuMP.all_variables(model), x)
+        # Outer norm scalar, then the broadcasted op produces a 2x3 matrix,
+        # then the two 2x3 leaves: 4 nodes, three of them ndims=2 with size
+        # (2, 3). The old bug would report (2, 2) for the broadcast node.
+        @test sizes.ndims == [0, 2, 2, 2]
+        @test sizes.size == [2, 3, 2, 3, 2, 3]
+        @test sizes.size_offset == [0, 4, 2, 0]
+        @test sizes.storage_offset == [0, 1, 7, 13, 19]
+        @test val ≈ LinearAlgebra.norm(ref_mat)
+        ref_g = if op == :+
+            vec(W_val .+ Y) ./ LinearAlgebra.norm(ref_mat)
+        elseif op == :-
+            vec(W_val .- Y) ./ LinearAlgebra.norm(ref_mat)
+        else  # :*
+            # d(norm(W .* W))/dW = 2 .* W .^ 3 / norm(W .* W)
+            vec(2 .* W_val .^ 3) ./ LinearAlgebra.norm(ref_mat)
+        end
+        @test g ≈ ref_g
+    end
+    return
+end
+
+# The fix replaced the old `(1, 1)` stub for `scalar .op matrix` with a copy
+# of the matrix child's full shape. Eval/reverse for these mixed-rank
+# broadcasts isn't implemented yet (and is out of scope for the fix), so we
+# only assert the inferred shape — that's where the previous code was wrong.
+function test_broadcast_scalar_matrix_size_inference()
+    model = Model()
+    @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables)
+    mode = ArrayDiff.Mode()
+    @testset "$(name)" for (name, expr) in [
+        ("scalar .* M", LinearAlgebra.norm(2.5 .* W)),
+        ("M .* scalar", LinearAlgebra.norm(W .* 2.5)),
+        ("scalar .+ M", LinearAlgebra.norm(2.5 .+ W)),
+        ("M .+ scalar", LinearAlgebra.norm(W .+ 2.5)),
+        ("scalar .- M", LinearAlgebra.norm(2.5 .- W)),
+        ("M .- scalar", LinearAlgebra.norm(W .- 2.5)),
+    ]
+        ad = ArrayDiff.model(mode)
+        MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr))
+        evaluator = MOI.Nonlinear.Evaluator(
+            ad,
+            mode,
+            JuMP.index.(JuMP.all_variables(model)),
+        )
+        MOI.initialize(evaluator, [:Grad])
+        sizes = evaluator.backend.objective.expr.sizes
+        # Broadcast node is at index 2; it should inherit the matrix child's
+        # (2, 3) shape, not the old `(1, 1)` stub.
+        @test sizes.ndims[2] == 2
+        broadcast_size_off = sizes.size_offset[2]
+        @test sizes.size[broadcast_size_off+1] == 2
+        @test sizes.size[broadcast_size_off+2] == 3
+        # And the scalar leaf among the children stays ndims=0.
+        @test 0 in sizes.ndims[3:4]
+    end
+    return
+end
+
 end  # module
 
 TestJuMP.runtests()

From 927fa69c28e6ed7c016f1ac9116e9832727d28e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Legat?= <benoit.legat@gmail.com>
Date: Thu, 14 May 2026 18:03:09 +0200
Subject: [PATCH 2/4] Simplify tests

---
 test/JuMP.jl | 31 ++++++++-----------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/test/JuMP.jl b/test/JuMP.jl
index 0867a51..78277fd 100644
--- a/test/JuMP.jl
+++ b/test/JuMP.jl
@@ -168,8 +168,6 @@ function _eval(
     model::JuMP.GenericModel{T},
     func,
     x,
-    obj_val,
-    grad_val,
 ) where {T}
     mode = ArrayDiff.Mode{Vector{T}}()
     ad = ArrayDiff.model(mode)
@@ -180,20 +178,20 @@ function _eval(
         JuMP.index.(JuMP.all_variables(model)),
     )
     MOI.initialize(evaluator, [:Grad])
-    x_grad = T.(collect(1:8))
-    @test MOI.eval_objective(evaluator, x) ≈ obj_val
+    sizes = evaluator.backend.objective.expr.sizes
+    val = MOI.eval_objective(evaluator, x)
     if VERSION >= v"1.12"
         @test 0 == @allocated MOI.eval_objective(evaluator, x)
     end
+    x_grad = T.(collect(1:8))
     g = zero(x)
     MOI.eval_objective_gradient(evaluator, g, x_grad)
-    @test g ≈ grad_val
     if VERSION >= v"1.12"
         @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x_grad)
     end
     MOI.Nonlinear.set_objective(ad, nothing)
     @test isnothing(ad.objective)
-    return
+    return sizes, val, g
 end
 
 function _test_neural(
@@ -280,7 +278,9 @@ function _test_neural(
     else
         grad_val = grad_sumsq
     end
-    _eval(model, loss, [vec(W1_val); vec(W2_val)], obj_val, grad_val)
+    _, val, g = _eval(model, loss, [vec(W1_val); vec(W2_val)])
+    @test obj_val ≈ val
+    @test grad_val ≈ g
     return
 end
 
@@ -428,21 +428,6 @@ function test_size_vec_vect()
     return
 end
 
-# Build an evaluator for `expr` with `vars` as the variable order. Returns the
-# inferred sizes plus the evaluated objective and gradient at `x`.
-function _build_and_eval(expr, vars, x)
-    mode = ArrayDiff.Mode()
-    ad = ArrayDiff.model(mode)
-    MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr))
-    evaluator = MOI.Nonlinear.Evaluator(ad, mode, JuMP.index.(vars))
-    MOI.initialize(evaluator, [:Grad])
-    sizes = evaluator.backend.objective.expr.sizes
-    val = MOI.eval_objective(evaluator, x)
-    g = zero(x)
-    MOI.eval_objective_gradient(evaluator, g, x)
-    return sizes, val, g
-end
-
 # The previous size-inference code special-cased broadcasted `+/-/*` with a
 # `nb_cols` formula that happened to match for square matrices. A 2x3 input
 # shape exercises the bug: with the old code the result would be reported as
@@ -458,7 +443,7 @@ function test_broadcast_nonsquare_matrix()
         (:-, LinearAlgebra.norm(W .- Y), W_val .- Y),
         (:*, LinearAlgebra.norm(W .* W), W_val .* W_val),
     ]
-        sizes, val, g = _build_and_eval(expr, JuMP.all_variables(model), x)
+        sizes, val, g = _eval(model, expr, x)
         # Outer norm scalar, then the broadcasted op produces a 2x3 matrix,
         # then the two 2x3 leaves: 4 nodes, three of them ndims=2 with size
         # (2, 3). The old bug would report (2, 2) for the broadcast node.

From 55704e76644c276c9893627fc8672fa4ba67155e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Legat?= <benoit.legat@gmail.com>
Date: Thu, 14 May 2026 18:25:29 +0200
Subject: [PATCH 3/4] Fix format

---
 test/JuMP.jl | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/JuMP.jl b/test/JuMP.jl
index 78277fd..209bfa3 100644
--- a/test/JuMP.jl
+++ b/test/JuMP.jl
@@ -8,7 +8,7 @@ import LinearAlgebra
 import MathOptInterface as MOI
 
 function runtests()
-    for name in names(@__MODULE__; all=true)
+    for name in names(@__MODULE__; all = true)
         if startswith("$(name)", "test_")
             @testset "$(name)" begin
                 getfield(@__MODULE__, name)()
@@ -164,11 +164,7 @@ function test_parse_moi()
     return
 end
 
-function _eval(
-    model::JuMP.GenericModel{T},
-    func,
-    x,
-) where {T}
+function _eval(model::JuMP.GenericModel{T}, func, x) where {T}
     mode = ArrayDiff.Mode{Vector{T}}()
     ad = ArrayDiff.model(mode)
     MOI.Nonlinear.set_objective(ad, JuMP.moi_function(func))

From a8a4c1762bdfa22594feda437e014d3ba77f1544 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Legat?= <benoit.legat@gmail.com>
Date: Thu, 14 May 2026 19:30:17 +0200
Subject: [PATCH 4/4] Simplify

---
 src/sizes.jl | 32 ++++++++++++++++++--------------
 test/JuMP.jl |  8 --------
 2 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/sizes.jl b/src/sizes.jl
index de1d39d..462f3b0 100644
--- a/src/sizes.jl
+++ b/src/sizes.jl
@@ -347,22 +347,26 @@ function _infer_sizes(
             end
             op = DEFAULT_MULTIVARIATE_OPERATORS[node.index]
             if op == :+ || op == :- || op == :*
-                # Broadcasted +/-/* takes the largest child's shape (for
-                # scalar+matrix that's the matrix; for matrix+matrix the
-                # shapes must match).
-                first_matrix = findfirst(children_indices) do i
-                    return !iszero(sizes.ndims[children_arr[i]])
+                sizes.ndims[k] = maximum(children_indices, init = 0) do i
+                    return sizes.ndims[children_arr[i]]
                 end
-                if isnothing(first_matrix)
-                    _copy_size!(sizes, k, children_arr[first(children_indices)])
-                else
-                    ref = children_arr[children_indices[first_matrix]]
-                    for c_idx in children_indices
-                        ix = children_arr[c_idx]
-                        iszero(sizes.ndims[ix]) && continue
-                        @assert _size(sizes, ix) == _size(sizes, ref) "Incompatible array shapes $(Tuple(_size(sizes, ix))) and $(Tuple(_size(sizes, ref))) for broadcasted operator `$op`"
+                sizes.size_offset[k] = length(sizes.size)
+                for _ in 1:sizes.ndims[k]
+                    push!(sizes.size, 1)
+                end
+                sz_parent = _size(sizes, k)
+                for i in children_indices
+                    id = children_arr[i]
+                    sz = _size(sizes, id)
+                    for j in eachindex(sz)
+                        if sz[j] > 1
+                            if sz_parent[j] == 1
+                                sz_parent[j] = sz[j]
+                            else
+                                @assert sz_parent[j] == sz[j]
+                            end
+                        end
                     end
-                    _copy_size!(sizes, k, ref)
                 end
             elseif op == :^
                 # Broadcasted ^ with scalar exponent preserves base shape
diff --git a/test/JuMP.jl b/test/JuMP.jl
index 209bfa3..533e631 100644
--- a/test/JuMP.jl
+++ b/test/JuMP.jl
@@ -424,10 +424,6 @@ function test_size_vec_vect()
     return
 end
 
-# The previous size-inference code special-cased broadcasted `+/-/*` with a
-# `nb_cols` formula that happened to match for square matrices. A 2x3 input
-# shape exercises the bug: with the old code the result would be reported as
-# (2, 2) instead of (2, 3), and `eval_objective` would read past the tape.
 function test_broadcast_nonsquare_matrix()
     model = Model()
     @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables)
@@ -461,10 +457,6 @@ function test_broadcast_nonsquare_matrix()
     return
 end
 
-# The fix replaced the old `(1, 1)` stub for `scalar .op matrix` with a copy
-# of the matrix child's full shape. Eval/reverse for these mixed-rank
-# broadcasts isn't implemented yet (and is out of scope for the fix), so we
-# only assert the inferred shape — that's where the previous code was wrong.
 function test_broadcast_scalar_matrix_size_inference()
     model = Model()
     @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables)