%cd syntax: Provide projections for scalar constants directly

lukstafi · lukstafi · commit 5bd3107fc82b · 2025-03-22T16:18:04.000+01:00
TODO: also special handle operators `!.` and `!..` to use the Scalar slot.
diff --git a/lib/operation.ml b/lib/operation.ml
@@ -308,8 +308,8 @@ let where ?(label = []) ~grad_spec t1 t2 t3 =
   let%cd op_asn ~v ~t1 ~t2 ~t3 ~projections = v =: where v1 v2 v3 in
   (* TODO: introduce a special-case projection for constants *)
   let%cd grad_asn ~t:_ ~g ~t1 ~t2 ~t3 ~projections =
-    g2 =+ where v1 g (t3 - t3);
-    g3 =+ where v1 (t2 - t2) g
+    g2 =+ where v1 g 0;
+    g3 =+ where v1 0 g
   in
   Tensor.ternop ~label:("where" :: label) ~ternary_op:Pointwise_tern ~op_asn ~grad_asn ~grad_spec t1
     t2 t3
diff --git a/lib/ppx_cd.ml b/lib/ppx_cd.ml
@@ -33,7 +33,8 @@ type expr_type =
 
 let is_unknown = function Unknown -> true | _ -> false
 
-type projections_slot = LHS | RHS1 | RHS2 | RHS3 | Nonslot | Undet [@@deriving equal, sexp]
+type projections_slot = LHS | RHS1 | RHS2 | RHS3 | Scalar | Nonslot | Undet
+[@@deriving equal, sexp]
 
 type result = {
   vbs : value_binding Map.M(String).t;
@@ -136,6 +137,7 @@ let project_p_slot debug loc slot =
   | RHS1 -> [%expr p.project_rhs.(0)]
   | RHS2 -> [%expr p.project_rhs.(1)]
   | RHS3 -> [%expr p.project_rhs.(2)]
+  | Scalar -> [%expr [| Arrayjit.Indexing.Fixed_idx 0 |]]
   | Nonslot ->
       Ast_builder.Default.pexp_extension ~loc
       @@ Location.error_extensionf ~loc
@@ -152,6 +154,7 @@ let project_p_dims debug loc slot =
   | RHS1 -> [%expr p.rhs_dims.(0)]
   | RHS2 -> [%expr p.rhs_dims.(1)]
   | RHS3 -> [%expr p.rhs_dims.(2)]
+  | Scalar -> [%expr [| 1 |]]
   | Nonslot ->
       Ast_builder.Default.pexp_extension ~loc
       @@ Location.error_extensionf ~loc
@@ -276,7 +279,7 @@ let setup_array ~punned ~bad_pun_hints ~is_lhs
         | RHS1 -> [%pat? nondiff__rhs1]
         | RHS2 -> [%pat? nondiff__rhs2]
         | RHS3 -> [%pat? nondiff__rhs3]
-        | Nonslot | Undet -> [%pat? nondiff__tensor]
+        | Scalar | Nonslot | Undet -> [%pat? nondiff__tensor]
       in
       let t = pat2expr v in
       let vb = Some (A.Vb.mk ~loc v filler) in
@@ -659,16 +662,20 @@ let translate (expr : expression) : result =
     in
     match expr with
     | { pexp_desc = Pexp_constant (Pconst_float _); _ } ->
-        { default_result with expr = [%expr NTDSL.number [%e expr]] }
+        { default_result with expr = [%expr NTDSL.number [%e expr]]; slot = Scalar }
     | { pexp_desc = Pexp_constant (Pconst_integer _); _ } ->
-        { default_result with expr = [%expr NTDSL.number (Float.of_int [%e expr])] }
+        { default_result with expr = [%expr NTDSL.number (Float.of_int [%e expr])]; slot = Scalar }
     | [%expr
         [%e? { pexp_desc = Pexp_constant (Pconst_char ch); pexp_loc; _ }]
           [%e? { pexp_desc = Pexp_constant (Pconst_float _); _ } as f]] ->
         let axis =
           Ast_helper.Exp.constant ~loc:pexp_loc (Pconst_string (String.of_char ch, pexp_loc, None))
         in
-        { default_result with expr = [%expr NTDSL.number ~axis_label:[%e axis] [%e f]] }
+        {
+          default_result with
+          expr = [%expr NTDSL.number ~axis_label:[%e axis] [%e f]];
+          slot = Scalar;
+        }
     | [%expr
         [%e? { pexp_desc = Pexp_constant (Pconst_char ch); pexp_loc; _ }]
           [%e? { pexp_desc = Pexp_constant (Pconst_integer _); _ } as i]] ->
@@ -678,6 +685,7 @@ let translate (expr : expression) : result =
         {
           default_result with
           expr = [%expr NTDSL.number ~axis_label:[%e axis] (Float.of_int [%e i])];
+          slot = Scalar;
         }
     | { pexp_desc = Pexp_constant (Pconst_string (name, str_loc, _)); _ } ->
         {
diff --git a/lib/syntax_extensions.md b/lib/syntax_extensions.md
@@ -205,7 +205,7 @@ type Assignments.t =
 
 For example the binary case in pseudocode: `if initialize_neutral then lhs = 0; lhs = lhs accum (rhs1 op rhs2)` (assuming the neutral element of `accum` is 0). The representation also has a field `projections` which determines which loops should be run and how the tensor nodes should be indexed to perform the computation.
 
-The basic `%cd` syntax for assignments has the form: `<lhs> <asgn-op> <primitive-op-application[rhs1, rhs2?, rhs3?]>`. See [Primitive operations](#primitive-operations) for the syntax of primitive operation application, where `<rhs1>`, `<rhs2>` (for binary and ternary ops), `<rhs3>` (for ternary ops) are subexpressions. `<asgn-op>` starts with `=`, followed by `:` only if `initialize_neutral` is true, then followed by the operator syntax variant of a binary primitive operation. The fields `<lhs>`, `<rhs1>`, `<rhs2>`, `<rhs3>` will often be either special-purpose identifiers (e.g. `t`, `t1`, `t2`, `t3`, `g`, `g1`, `g2`, `g3`) or identifiers bound to tensors. `<rhs1>`, `<rsh2>`, `<rsh3>` will also often be (non-differentiable) tensor expressions. The notation `<tensor>.grad` stands for the gradient node of the given tensor. For more about "slot fillers", and to learn about the operators `*+` and `++`, see the section [further features of the syntax extension %cd](#further-features-of-the-syntax-extension-cd).
+The basic `%cd` syntax for assignments has the form: `<lhs> <asgn-op> <primitive-op-application[rhs1, rhs2?, rhs3?]>`. See [Primitive operations](#primitive-operations) for the syntax of primitive operation application, where `<rhs1>`, `<rhs2>` (for binary and ternary ops), `<rhs3>` (for ternary ops) are subexpressions. `<asgn-op>` starts with `=`, followed by `:` only if `initialize_neutral` is true, then followed by the operator syntax variant of a binary primitive operation. The fields `<lhs>`, `<rhs1>`, `<rhs2>`, `<rhs3>` will often be either special-purpose identifiers (specifically `v`, `t`, `t1`, `t2`, `t3`, `g`, `g1`, `g2`, `g3`) or identifiers bound to tensors. `<rhs1>`, `<rsh2>`, `<rsh3>` will also often be (non-differentiable) tensor expressions. The notation `<tensor>.grad` stands for the gradient node of the given tensor. For more about "slot fillers", and to learn about the operators `*+` and `++`, see the section [further features of the syntax extension %cd](#further-features-of-the-syntax-extension-cd).
 
 How is the `projections` field determined? `projections` can be given explicitly as a labeled argument `~projections`. If they aren't but `%cd` realizes there is a `~projections` parameter in scope, it uses it -- see `lib/operation.ml` where this option is used to define tensor operations. If instead of `~projections` a `~logic` labeled argument is given, the string passed is used to determine projections. `~logic:"."` means a pointwise operation. `~logic:"@"` means an "output axes of rhs2 match input axes of rhs1" operation (matrix multiplication is a special case). `~logic:"T"` means transpose of input and output axes. The string passed to `~logic` can also use OCANNL's generalization of the einsum notation, allowing arbitrary permutations and reductions of axes. If no information is given, the default depends on the primitive operation, but it is almost always a pointwise operation.
 
@@ -229,7 +229,7 @@ p =+ learning_rate *. p.grad
 
 In the first case, we have a binary assignment calculated pointwise. The resulting representation is `Accum_binop` where `accum` is `Add` and `op` is `Mul` (multiplication). In the second case, `*.` is not recognized as one of the built-in operators. This leaves the expression `learning_rate *. p.grad` un-transformed. Since `(*.)` is bound in `NTDSL.O` to pointwise tensor multiplication, this creates an intermediate tensor, that is then added onto p. The resulting representation is `Accum_unop` where `accum` is `Add` and `op` is `Identity`. Both variants end up with the same result, and even with the same computation, because the second variant's computation will get optimized (unless configured not to).
 
-Advanced note: when a `~projections` parameter is in scope but no assignment-specific `~projections` argument is given -- the typical case in `lib/operation.ml` -- the actual projections field for an assignment is computed by transforming the projections parameter according to hints regarding how tensor nodes relate to the given projections. Specifically, the identifiers `rhs1`, `t1`, `v1`, `g1` are "slot RHS1" of the projections, `rhs2`, `t2`, `v2`, `g2` are "slot RHS2", `lhs,`, `t`, `v`, `g` are "slot LHS".
+Advanced note: when a `~projections` parameter is in scope but no assignment-specific `~projections` argument is given -- the typical case in `lib/operation.ml` -- the actual projections field for an assignment is computed by transforming the projections parameter according to hints regarding how tensor nodes relate to the given projections. Specifically, the identifiers `rhs1`, `t1`, `v1`, `g1` are "slot RHS1" of the projections, `rhs2`, `t2`, `v2`, `g2` are "slot RHS2", `lhs,`, `t`, `v`, `g` are "slot LHS". Scalar constants are provided the projection directly, to make the automated derivation more expressive.
 
 ## Numeric and N-dimensional array literals