More %cd flexibility: derive projections for !. and !..

lukstafi · lukstafi · commit 2032408f0ade · 2025-03-22T16:30:02.000+01:00
diff --git a/lib/operation.ml b/lib/operation.ml
@@ -306,10 +306,11 @@ let fma ?(label = []) ~grad_spec t1 t2 t3 =
 let where ?(label = []) ~grad_spec t1 t2 t3 =
   let module NTDSL = NTDSL_before_div in
   let%cd op_asn ~v ~t1 ~t2 ~t3 ~projections = v =: where v1 v2 v3 in
-  (* TODO: introduce a special-case projection for constants *)
+  (* Just to illustrate that both [0] and [!..0] are handled. *)
+  let zero_cst = 0 in
   let%cd grad_asn ~t:_ ~g ~t1 ~t2 ~t3 ~projections =
     g2 =+ where v1 g 0;
-    g3 =+ where v1 0 g
+    g3 =+ where v1 !..zero_cst g
   in
   Tensor.ternop ~label:("where" :: label) ~ternary_op:Pointwise_tern ~op_asn ~grad_asn ~grad_spec t1
     t2 t3
@@ -410,9 +411,9 @@ module DO = struct
   let recip_sqrt = recip_sqrt ~grad_spec:If_needed
   let tanh = tanh ~grad_spec:If_needed
   let where = where ~grad_spec:If_needed
-  let (<) = lt ~grad_spec:Prohibit_grad
-  let (=) = eq ~grad_spec:Prohibit_grad
-  let (<>) = ne ~grad_spec:Prohibit_grad
+  let ( < ) = lt ~grad_spec:Prohibit_grad
+  let ( = ) = eq ~grad_spec:Prohibit_grad
+  let ( <> ) = ne ~grad_spec:Prohibit_grad
 end
 
 module NDO = struct
@@ -435,9 +436,9 @@ module NDO = struct
   let recip_sqrt = recip_sqrt ~grad_spec:Prohibit_grad
   let tanh = tanh ~grad_spec:Prohibit_grad
   let where = where ~grad_spec:Prohibit_grad
-  let (<) = lt ~grad_spec:Prohibit_grad
-  let (=) = eq ~grad_spec:Prohibit_grad
-  let (<>) = ne ~grad_spec:Prohibit_grad
+  let ( < ) = lt ~grad_spec:Prohibit_grad
+  let ( = ) = eq ~grad_spec:Prohibit_grad
+  let ( <> ) = ne ~grad_spec:Prohibit_grad
 end
 
 module TDSL = struct
diff --git a/lib/ppx_cd.ml b/lib/ppx_cd.ml
@@ -741,6 +741,13 @@ let translate (expr : expression) : result =
         }
     | { pexp_desc = Pexp_ident { txt = Lident op_ident; _ }; _ } when is_primitive_op op_ident ->
         default_result
+    | [%expr !.[%e? expr1]] ->
+        (* Hardcoding these two patterns to improve projection derivation expressivity. *)
+        let res1 = loop ~proj_in_scope expr1 in
+        { res1 with typ = Tensor; slot = Scalar; expr = [%expr NTDSL.O.( !. ) [%e res1.expr]] }
+    | [%expr !..[%e? expr1]] ->
+        let res1 = loop ~proj_in_scope expr1 in
+        { res1 with typ = Tensor; slot = Scalar; expr = [%expr NTDSL.O.( !.. ) [%e res1.expr]] }
     | [%expr [%e? expr1] **. [%e? { pexp_desc = Pexp_constant (Pconst_integer _); _ } as i]] ->
         (* FIXME: `**.` should take a tensor and require that it's a literal. *)
         (* We need to hardcode these two patterns to prevent the numbers from being converted to tensors. *)
diff --git a/lib/syntax_extensions.md b/lib/syntax_extensions.md
@@ -229,7 +229,7 @@ p =+ learning_rate *. p.grad
 
 In the first case, we have a binary assignment calculated pointwise. The resulting representation is `Accum_binop` where `accum` is `Add` and `op` is `Mul` (multiplication). In the second case, `*.` is not recognized as one of the built-in operators. This leaves the expression `learning_rate *. p.grad` un-transformed. Since `(*.)` is bound in `NTDSL.O` to pointwise tensor multiplication, this creates an intermediate tensor, that is then added onto p. The resulting representation is `Accum_unop` where `accum` is `Add` and `op` is `Identity`. Both variants end up with the same result, and even with the same computation, because the second variant's computation will get optimized (unless configured not to).
 
-Advanced note: when a `~projections` parameter is in scope but no assignment-specific `~projections` argument is given -- the typical case in `lib/operation.ml` -- the actual projections field for an assignment is computed by transforming the projections parameter according to hints regarding how tensor nodes relate to the given projections. Specifically, the identifiers `rhs1`, `t1`, `v1`, `g1` are "slot RHS1" of the projections, `rhs2`, `t2`, `v2`, `g2` are "slot RHS2", `lhs,`, `t`, `v`, `g` are "slot LHS". Scalar constants are provided the projection directly, to make the automated derivation more expressive.
+Advanced note: when a `~projections` parameter is in scope but no assignment-specific `~projections` argument is given -- the typical case in `lib/operation.ml` -- the actual projections field for an assignment is computed by transforming the projections parameter according to hints regarding how tensor nodes relate to the given projections. Specifically, the identifiers `rhs1`, `t1`, `v1`, `g1` are "slot RHS1" of the projections, `rhs2`, `t2`, `v2`, `g2` are "slot RHS2", `lhs,`, `t`, `v`, `g` are "slot LHS". Scalar constants are provided the projection directly, to make the automated derivation more expressive; this is supported both for literals, and (heuristically) for `!.` and `!..` embedding operators.
 
 ## Numeric and N-dimensional array literals