ahrefs
diff --git a/‎lib/operation.ml‎
Lines changed: 1 addition & 3 deletions b/‎lib/operation.ml‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎lib/ppx_cd.ml‎
Lines changed: 79 additions & 67 deletions b/‎lib/ppx_cd.ml‎
Lines changed: 79 additions & 67 deletions
diff --git a/‎test/einsum/moons_demo_variant.expected‎
Lines changed: 30 additions & 35 deletions b/‎test/einsum/moons_demo_variant.expected‎
Lines changed: 30 additions & 35 deletions
@@ -347,11 +347,9 @@ let fma ?(label = []) ~grad_spec t1 t2 t3 =
 let where ?(label = []) ~grad_spec t1 t2 t3 =
   let module NTDSL = NTDSL_before_div in
   let%cd op_asn ~v ~t1 ~t2 ~t3 ~projections = v =: where v1 v2 v3 in
-  (* Just to illustrate that both [0] and [!..0] are handled. *)
-  let zero_cst = 0 in
   let%cd grad_asn ~t:_ ~g ~t1 ~t2 ~t3 ~projections =
     g2 =+ where v1 g 0;
-    g3 =+ where v1 !..zero_cst g
+    g3 =+ where v1 0 g
   in
   Tensor.ternop ~label:("where" :: label) ~ternary_op:Pointwise_tern ~op_asn ~grad_asn ~grad_spec t1
     t2 t3
 
@@ -65,8 +65,16 @@ let make_vb ~loc ~name ~name_expr ~hint_label =
   vb
 
 (** The expression argument is of type: [Assignments.t]. *)
-let assignment ~punned ~lhs ~rhses body =
+let assignment ~punned ~lhs ~rhses ?body_for_lhs ?raw_body () =
   let setups = lhs :: rhses in
+  let body, is_for_lhs =
+    match (body_for_lhs, raw_body) with
+    | Some body_for_lhs, None ->
+        let loc = body_for_lhs.pexp_loc in
+        ([%expr Option.value ~default:Ir.Assignments.Noop [%e body_for_lhs]], true)
+    | None, Some raw_body -> (raw_body, false)
+    | _ -> assert false
+  in
   let loc = body.pexp_loc in
   let forward_args = List.filter_map setups ~f:(fun { fwd_code_or_noop; _ } -> fwd_code_or_noop) in
   let vbs, body =
@@ -107,9 +115,18 @@ let assignment ~punned ~lhs ~rhses body =
     List.fold (body :: List.rev forward_args) ~init:[%expr []] ~f:(fun xs x ->
         [%expr [%e x] :: [%e xs]])
   in
-  let expr = [%expr Ir.Assignments.sequence [%e comps]] in
+  let body = [%expr Ir.Assignments.sequence [%e comps]] in
+  let body =
+    if List.is_empty tensor_vbs then body else A.Exp.let_ ~loc Nonrecursive tensor_vbs body
+  in
   let expr =
-    if List.is_empty tensor_vbs then expr else A.Exp.let_ ~loc Nonrecursive tensor_vbs expr
+    if is_for_lhs then
+      [%expr
+        Option.value
+          ~default:
+            Ir.Assignments.{ asgns = Noop; embedded_nodes = Base.Set.empty (module Ir.Tnode) }
+        @@ Option.map [%e lhs.array_opt] ~f:(fun lhs -> [%e body])]
+    else body
   in
   {
     vbs;
@@ -519,23 +536,22 @@ let translate ?ident_label (expr : expression) : result =
             (proj_lazy, [%expr projections.Tensor.projections_debug])
       in
       (* FIXME: might be better to treat missing [rhs1, rhs2, rhs3] as zeros or errors rather than
-         eliding the code. *)
-      let body =
+         eliding the code, only lhs should decide whether to elide the code. *)
+      let body_for_lhs =
         [%expr
-          Option.value ~default:Ir.Assignments.Noop
-          @@ Option.map3 [%e setup_r1.array_opt] [%e setup_r2.array_opt] [%e setup_r3.array_opt]
-               ~f:(fun rhs1 rhs2 rhs3 ->
-                 Ir.Assignments.Accum_op
-                   {
-                     initialize_neutral = [%e initialize_neutral];
-                     accum = [%e accu_op];
-                     lhs = Option.value_exn [%e setup_l.array_opt];
-                     rhs = Ternop { op = [%e tern_op]; rhs1; rhs2; rhs3 };
-                     projections = [%e projections_lazy];
-                     projections_debug = [%e projections_debug];
-                   })]
+          Option.map3 [%e setup_r1.array_opt] [%e setup_r2.array_opt] [%e setup_r3.array_opt]
+            ~f:(fun rhs1 rhs2 rhs3 ->
+              Ir.Assignments.Accum_op
+                {
+                  initialize_neutral = [%e initialize_neutral];
+                  accum = [%e accu_op];
+                  lhs;
+                  rhs = Ternop { op = [%e tern_op]; rhs1; rhs2; rhs3 };
+                  projections = [%e projections_lazy];
+                  projections_debug = [%e projections_debug];
+                })]
       in
-      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2; setup_r3 ] body
+      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2; setup_r3 ] ~body_for_lhs ()
     in
     let process_assign_binop ~accu_op ~lhs ~bin_op ~rhs1 ~rhs2 ?projections ~proj_in_scope () =
       let initialize_neutral, accu_op = assignment_op accu_op in
@@ -582,24 +598,22 @@ let translate ?ident_label (expr : expression) : result =
             in
             (proj_lazy, [%expr projections.Tensor.projections_debug])
       in
-      (* TODO: might be better to treat missing [rhs1, rhs2] as zeros or errors rather than eliding
-         the code. *)
-      let body =
+      (* FIXME: might be better to treat missing [rhs1, rhs2] as zeros or errors rather than eliding
+         the code, only lhs should decide whether to elide the code. *)
+      let body_for_lhs =
         [%expr
-          Option.value ~default:Ir.Assignments.Noop
-          @@ Option.map3 [%e setup_l.array_opt] [%e setup_r1.array_opt] [%e setup_r2.array_opt]
-               ~f:(fun lhs rhs1 rhs2 ->
-                 Ir.Assignments.Accum_op
-                   {
-                     initialize_neutral = [%e initialize_neutral];
-                     accum = [%e accu_op];
-                     lhs;
-                     rhs = Binop { op = [%e bin_op]; rhs1; rhs2 };
-                     projections = [%e projections_lazy];
-                     projections_debug = [%e projections_debug];
-                   })]
+          Option.map2 [%e setup_r1.array_opt] [%e setup_r2.array_opt] ~f:(fun rhs1 rhs2 ->
+              Ir.Assignments.Accum_op
+                {
+                  initialize_neutral = [%e initialize_neutral];
+                  accum = [%e accu_op];
+                  lhs;
+                  rhs = Binop { op = [%e bin_op]; rhs1; rhs2 };
+                  projections = [%e projections_lazy];
+                  projections_debug = [%e projections_debug];
+                })]
       in
-      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2 ] body
+      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2 ] ~body_for_lhs ()
     in
     let process_assign_unop ~accu_op ~lhs ~un_op ~rhs ?projections ~proj_in_scope () =
       let initialize_neutral, accum = assignment_op accu_op in
@@ -644,23 +658,22 @@ let translate ?ident_label (expr : expression) : result =
             in
             (proj_lazy, [%expr projections.Tensor.projections_debug])
       in
-      (* TODO: might be better to treat missing [rhs] as zeros or errors rather than eliding the
-         code. *)
-      let body =
+      (* FIXME: might be better to treat missing [rhs] as zeros or errors rather than eliding the
+         code, only lhs should decide whether to elide the code. *)
+      let body_for_lhs =
         [%expr
-          Option.value ~default:Ir.Assignments.Noop
-          @@ Option.map2 [%e setup_l.array_opt] [%e setup_r.array_opt] ~f:(fun lhs rhs ->
-                 Ir.Assignments.Accum_op
-                   {
-                     initialize_neutral = [%e initialize_neutral];
-                     accum = [%e accum];
-                     lhs;
-                     rhs = Unop { op = [%e op]; rhs };
-                     projections = [%e projections_lazy];
-                     projections_debug = [%e projections_debug];
-                   })]
+          Option.map [%e setup_r.array_opt] ~f:(fun rhs ->
+              Ir.Assignments.Accum_op
+                {
+                  initialize_neutral = [%e initialize_neutral];
+                  accum = [%e accum];
+                  lhs;
+                  rhs = Unop { op = [%e op]; rhs };
+                  projections = [%e projections_lazy];
+                  projections_debug = [%e projections_debug];
+                })]
       in
-      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r ] body
+      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r ] ~body_for_lhs ()
     in
     let process_vec_unop ~lhs ~vec_un_op ~rhs ?projections ~proj_in_scope () =
       (* Vector unary operations do not have accumulation, they directly set values *)
@@ -700,20 +713,19 @@ let translate ?ident_label (expr : expression) : result =
             in
             (proj_lazy, [%expr projections.Tensor.projections_debug])
       in
-      let body =
+      let body_for_lhs =
         [%expr
-          Option.value ~default:Ir.Assignments.Noop
-          @@ Option.map2 [%e setup_l.array_opt] [%e setup_r.array_opt] ~f:(fun lhs rhs ->
-                 Ir.Assignments.Set_vec_unop
-                   {
-                     lhs;
-                     op = [%e op];
-                     rhs;
-                     projections = [%e projections_lazy];
-                     projections_debug = [%e projections_debug];
-                   })]
+          Option.map [%e setup_r.array_opt] ~f:(fun rhs ->
+              Ir.Assignments.Set_vec_unop
+                {
+                  lhs;
+                  op = [%e op];
+                  rhs;
+                  projections = [%e projections_lazy];
+                  projections_debug = [%e projections_debug];
+                })]
       in
-      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r ] body
+      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r ] ~body_for_lhs ()
     in
     let process_raw_ternop ~accu_op ~lhs ~tern_op ~rhs1 ~rhs2 ~rhs3 ~logic =
       let initialize_neutral, accu_op = assignment_op accu_op in
@@ -726,15 +738,15 @@ let translate ?ident_label (expr : expression) : result =
       let t1_expr, rhs1_is_grad, rhs1_is_merge = args_for ~loc setup_r1 in
       let t2_expr, rhs2_is_grad, rhs2_is_merge = args_for ~loc setup_r2 in
       let t3_expr, rhs3_is_grad, rhs3_is_merge = args_for ~loc setup_r3 in
-      let body =
+      let raw_body =
         [%expr
           Tensor.raw_ternop ~initialize_neutral:[%e initialize_neutral] ~accum:[%e accu_op]
             ~t:[%e t_expr] ~lhs_is_grad:[%e lhs_is_grad] ~op:[%e tern_op] ~t1:[%e t1_expr]
             ~rhs1_is_grad:[%e rhs1_is_grad] ~rhs1_is_merge:[%e rhs1_is_merge] ~t2:[%e t2_expr]
             ~rhs2_is_grad:[%e rhs2_is_grad] ~rhs2_is_merge:[%e rhs2_is_merge] ~t3:[%e t3_expr]
             ~rhs3_is_grad:[%e rhs3_is_grad] ~rhs3_is_merge:[%e rhs3_is_merge] ~logic:[%e logic]]
       in
-      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2; setup_r3 ] body
+      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2; setup_r3 ] ~raw_body ()
     in
     let process_raw_binop ~accu_op ~lhs ~bin_op ~rhs1 ~rhs2 ~logic =
       let initialize_neutral, accu_op = assignment_op accu_op in
@@ -745,14 +757,14 @@ let translate ?ident_label (expr : expression) : result =
       let t_expr, lhs_is_grad, _ = args_for ~loc setup_l in
       let t1_expr, rhs1_is_grad, rhs1_is_merge = args_for ~loc setup_r1 in
       let t2_expr, rhs2_is_grad, rhs2_is_merge = args_for ~loc setup_r2 in
-      let body =
+      let raw_body =
         [%expr
           Tensor.raw_binop ~initialize_neutral:[%e initialize_neutral] ~accum:[%e accu_op]
             ~t:[%e t_expr] ~lhs_is_grad:[%e lhs_is_grad] ~op:[%e bin_op] ~t1:[%e t1_expr]
             ~rhs1_is_grad:[%e rhs1_is_grad] ~rhs1_is_merge:[%e rhs1_is_merge] ~t2:[%e t2_expr]
             ~rhs2_is_grad:[%e rhs2_is_grad] ~rhs2_is_merge:[%e rhs2_is_merge] ~logic:[%e logic]]
       in
-      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2 ] body
+      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r1; setup_r2 ] ~raw_body ()
     in
     let process_raw_unop ~accu_op ~lhs ~un_op ~rhs ~logic =
       let initialize_neutral, accu_op = assignment_op accu_op in
@@ -761,13 +773,13 @@ let translate ?ident_label (expr : expression) : result =
       let initialize_neutral = if initialize_neutral then [%expr true] else [%expr false] in
       let t_expr, lhs_is_grad, _ = args_for ~loc setup_l in
       let t1_expr, rhs_is_grad, rhs_is_merge = args_for ~loc setup_r in
-      let body =
+      let raw_body =
         [%expr
           Tensor.raw_unop ~initialize_neutral:[%e initialize_neutral] ~accum:[%e accu_op]
             ~t:[%e t_expr] ~lhs_is_grad:[%e lhs_is_grad] ~op:[%e un_op] ~t1:[%e t1_expr]
             ~rhs_is_grad:[%e rhs_is_grad] ~rhs_is_merge:[%e rhs_is_merge] ~logic:[%e logic]]
       in
-      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r ] body
+      assignment ~punned ~lhs:setup_l ~rhses:[ setup_r ] ~raw_body ()
     in
     match expr with
     | { pexp_desc = Pexp_constant (Pconst_float _); _ } ->
 
@@ -40,43 +40,38 @@ n39 grad_- as n38.grad: Local/1046; single prec 10x1; mem in bytes: <not-in-yet>
 n40 relu_margin_loss as relu_margin_loss: Virt/15; single prec 10x1; mem in bytes: <not-in-yet>
 n41 grad_relu_margin_loss as relu_margin_loss.grad: Local/1046; single prec 10x1; mem in bytes: <not-in-yet>
 n42 10 as _10: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n43 => as n43: Local/1046; single prec 1; mem in bytes: <not-in-yet>
 n44 grad_=> as n43.grad: Virt/40; single prec 1; mem in bytes: <not-in-yet>
 n46 grad_/._scalar_loss as scalar_loss.grad: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n47 2 as _2: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n48 **. as n48: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n49 -1 as n49: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n50 *. as n50: Virt/152; single prec 1; mem in bytes: <not-in-yet>
-n52 1 as _1: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n53 200 as _200: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n54 !@ as n54: Virt/152; single prec 1; mem in bytes: <not-in-yet>
-n55 200 as _200: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n56 2 as _2: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n57 *. as n57: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n58 - as n58: Virt/152; single prec 1; mem in bytes: <not-in-yet>
-n59 0.1 as n59: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n60 *. as n60: Virt/152; single prec 1; mem in bytes: <not-in-yet>
-n62 sgd_delta_b1 as sgd_delta_b1: Virt/15; single prec 16; mem in bytes: <not-in-yet>
-n63 sgd_momentum_b1 as sgd_momentum_b1: unknown; single prec <not-in-yet>; mem in bytes: <not-in-yet>
-n64 0.0001 as n64: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n65 *. as n65: Virt/15; single prec 16; mem in bytes: <not-in-yet>
-n66 sgd_delta_w1 as sgd_delta_w1: Virt/15; single prec 16x2; mem in bytes: <not-in-yet>
-n67 sgd_momentum_w1 as sgd_momentum_w1: unknown; single prec <not-in-yet>; mem in bytes: <not-in-yet>
-n68 0.0001 as n68: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n69 *. as n69: Virt/15; single prec 16x2; mem in bytes: <not-in-yet>
-n70 sgd_delta_w2 as sgd_delta_w2: Virt/15; single prec 1x16; mem in bytes: <not-in-yet>
-n71 sgd_momentum_w2 as sgd_momentum_w2: unknown; single prec <not-in-yet>; mem in bytes: <not-in-yet>
-n72 0.0001 as n72: Virt/40; single prec 1; mem in bytes: <not-in-yet>
-n73 *. as n73: Virt/15; single prec 1x16; mem in bytes: <not-in-yet>
-n74 point_mlp_result as point_mlp_result: Host&shared/37039; single prec 2; mem in bytes: <not-in-yet>
-n75 * as n75: Local/1046; single prec 16; mem in bytes: <not-in-yet>
-n76 grad_* as n75.grad: unknown; single prec 16; mem in bytes: <not-in-yet>
-n77 + as n77: Virt/15; single prec 16; mem in bytes: <not-in-yet>
-n78 grad_+ as n77.grad: unknown; single prec 16; mem in bytes: <not-in-yet>
-n79 relu as relu: Virt/15; single prec 16; mem in bytes: <not-in-yet>
-n80 grad_relu as relu.grad: unknown; single prec 16; mem in bytes: <not-in-yet>
-n81 *_mlp_point_mlp_result as mlp_point_mlp_result: Host&stream/412410; single prec 1; mem in bytes: <not-in-yet>
-n82 grad_*_mlp_point_mlp_result as mlp_point_mlp_result.grad: unknown; single prec 1; mem in bytes: <not-in-yet>
+n47 1 as _1: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n48 200 as _200: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n49 !@ as n49: Virt/152; single prec 1; mem in bytes: <not-in-yet>
+n50 200 as _200: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n51 2 as _2: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n52 *. as n52: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n53 - as n53: Virt/152; single prec 1; mem in bytes: <not-in-yet>
+n54 0.1 as n54: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n55 *. as n55: Virt/152; single prec 1; mem in bytes: <not-in-yet>
+n57 sgd_delta_b1 as sgd_delta_b1: Virt/15; single prec 16; mem in bytes: <not-in-yet>
+n58 sgd_momentum_b1 as sgd_momentum_b1: unknown; single prec <not-in-yet>; mem in bytes: <not-in-yet>
+n59 0.0001 as n59: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n60 *. as n60: Virt/15; single prec 16; mem in bytes: <not-in-yet>
+n61 sgd_delta_w1 as sgd_delta_w1: Virt/15; single prec 16x2; mem in bytes: <not-in-yet>
+n62 sgd_momentum_w1 as sgd_momentum_w1: unknown; single prec <not-in-yet>; mem in bytes: <not-in-yet>
+n63 0.0001 as n63: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n64 *. as n64: Virt/15; single prec 16x2; mem in bytes: <not-in-yet>
+n65 sgd_delta_w2 as sgd_delta_w2: Virt/15; single prec 1x16; mem in bytes: <not-in-yet>
+n66 sgd_momentum_w2 as sgd_momentum_w2: unknown; single prec <not-in-yet>; mem in bytes: <not-in-yet>
+n67 0.0001 as n67: Virt/40; single prec 1; mem in bytes: <not-in-yet>
+n68 *. as n68: Virt/15; single prec 1x16; mem in bytes: <not-in-yet>
+n69 point_mlp_result as point_mlp_result: Host&shared/37039; single prec 2; mem in bytes: <not-in-yet>
+n70 * as n70: Local/1046; single prec 16; mem in bytes: <not-in-yet>
+n71 grad_* as n70.grad: unknown; single prec 16; mem in bytes: <not-in-yet>
+n72 + as n72: Virt/15; single prec 16; mem in bytes: <not-in-yet>
+n73 grad_+ as n72.grad: unknown; single prec 16; mem in bytes: <not-in-yet>
+n74 relu as relu: Virt/15; single prec 16; mem in bytes: <not-in-yet>
+n75 grad_relu as relu.grad: unknown; single prec 16; mem in bytes: <not-in-yet>
+n76 *_mlp_point_mlp_result as mlp_point_mlp_result: Host&stream/412410; single prec 1; mem in bytes: <not-in-yet>
+n77 grad_*_mlp_point_mlp_result as mlp_point_mlp_result.grad: unknown; single prec 1; mem in bytes: <not-in-yet>
 Tnode: Finished printing headers.
 mlp_result's name: mlp_point_mlp_result
 (mlp moons_input) name: mlp_moons_input