In progress: embedding of dimensions in tensor expressions

lukstafi · lukstafi · commit 2a153017174e · 2025-09-01T22:52:10.000+02:00
diff --git a/arrayjit/lib/assignments.ml b/arrayjit/lib/assignments.ml
@@ -37,6 +37,7 @@ type fetch_op =
   | Slice of { batch_idx : Indexing.static_symbol; sliced : Tn.t }
   | Embed_symbol of Indexing.static_symbol
   | Embed_self_id  (** Embeds the id of the [array] field of the [Fetch] constructor. *)
+  | Embed_dim of Indexing.variable_ref
 [@@deriving sexp_of, equal]
 
 type accum_rhs =
@@ -345,6 +346,10 @@ let%track4_sexp to_low_level code =
     | Fetch { array; fetch_op = Embed_self_id; dims } ->
         Low_level.loop_over_dims (Lazy.force dims) ~body:(fun idcs ->
             set array idcs @@ Constant_bits (Int64.of_int array.id))
+    | Fetch { array; fetch_op = Embed_dim variable_ref; dims } ->
+        (* Note: we are guaranteed all shape inference is forced before we access variable_ref. *)
+        Low_level.loop_over_dims (Lazy.force dims) ~body:(fun idcs ->
+            set array idcs @@ Constant (Float.of_int @@ Option.value_exn variable_ref.solved_dim))
     | Fetch { array; fetch_op = Range_over_offsets; dims = (lazy dims) } ->
         Low_level.loop_over_dims dims ~body:(fun idcs ->
             let offset = Indexing.reflect_projection ~dims ~projection:idcs in
@@ -443,6 +448,7 @@ let to_doc ?name ?static_indices () c =
     | Embed_symbol { static_symbol; static_range = _ } ->
         string ("!@" ^ Indexing.symbol_ident static_symbol)
     | Embed_self_id -> string "!@self_id"
+    | Embed_dim { ref_label; _ } -> string ("(dim " ^ ref_label ^ ")")
   in
 
   let rec doc_of_code = function
diff --git a/arrayjit/lib/indexing.ml b/arrayjit/lib/indexing.ml
@@ -302,6 +302,9 @@ let reflect_projection ~(dims : int array) ~(projection : axis_index array) =
          | Sub_axis -> (stride * dim, symbols, offset))
   |> fun (_, symbols, offset) -> Affine { symbols; offset }
 
+type variable_ref = { ref_label : string; mutable solved_dim : int option }
+[@@deriving sexp_of, equal]
+
 module Doc_helpers = struct
   let ( ^^ ) = PPrint.( ^^ )
   let ( !^ ) = PPrint.( !^ )
diff --git a/lib/operation.ml b/lib/operation.ml
@@ -85,35 +85,37 @@ let matmul ?(label = []) =
 
     Note that ["a,b->c"] from [numpy] is ["a;b=>c"] in OCANNL, since ["->"] is used to separate the
     input and the output axes. *)
-let einsum ?(label = []) spec =
+let einsum ?(label = []) ?(capture_dims = []) spec =
   let module NTDSL = Initial_NTDSL in
   let%cd op_asn ~v ~t1 ~t2 ~projections = v =:+ v1 * v2 in
   let%cd grad_asn ~t:_ ~g ~t1 ~t2 ~projections =
     g1 =+ g * v2;
     g2 =+ v1 * g
   in
-  Tensor.binop ~label:(";=>" :: label) ~compose_op:(Einsum spec) ~op_asn ~grad_asn
+  Tensor.binop ~label:(";=>" :: label) ~compose_op:(Einsum (spec, capture_dims)) ~op_asn ~grad_asn
 
 (** Like [einsum], but adds instead than multiplying the resulting values. *)
-let outer_sum ?(label = []) spec =
+let outer_sum ?(label = []) ?(capture_dims = []) spec =
   let module NTDSL = Initial_NTDSL in
   let%cd op_asn ~v ~t1 ~t2 ~projections = v =:+ v1 + v2 in
   let%cd grad_asn ~t:_ ~g ~t1 ~t2 ~projections =
     g1 =+ g;
     g2 =+ g
   in
-  Tensor.binop ~label:(";=>+" :: label) ~compose_op:(Einsum spec) ~op_asn ~grad_asn
+  Tensor.binop ~label:(";=>+" :: label) ~compose_op:(Einsum (spec, capture_dims)) ~op_asn ~grad_asn
 
 (** Similar to the explicit mode of [numpy.einsum], the unary variant. Can permute axes, extract
     diagonals, compute traces etc.
 
     Note that ["a->c"] from [numpy] is ["a=>c"] in OCANNL, since ["->"] is used to separate the
     input and the output axes. *)
-let einsum1 ?(label = []) spec =
+let einsum1 ?(label = []) ?(capture_dims = []) spec =
   let module NTDSL = Initial_NTDSL in
   let%cd op_asn ~v ~t1 ~projections = v =:+ v1 in
   let%cd grad_asn ~t:_ ~g ~t1 ~projections = g1 =+ g in
-  Tensor.unop ~transpose_op:(Shape.Permute spec) ~op_asn ~grad_asn ~label:("=>" :: label)
+  Tensor.unop
+    ~transpose_op:(Shape.Permute (spec, capture_dims))
+    ~op_asn ~grad_asn ~label:("=>" :: label)
 
 module NDO_before_pow = struct
   let ( * ) t1 t2 = matmul ~grad_spec:Prohibit_grad t1 t2 ()
@@ -455,11 +457,15 @@ let slice (batch_idx : Idx.static_symbol) =
     Tensor.unop ~transpose_op:(Batch_slice batch_idx) ~op_asn ~grad_asn ~label:("@|" :: label)
 
 let embed_symbol ?grad_spec ?(label = []) static_sym =
-  Tensor.term ~fetch_op:(Embed_symbol static_sym) ?grad_spec ~label:("!@" :: label)
-    ~batch_dims:[] ~input_dims:[] ~output_dims:[ 1 ] ()
+  Tensor.term ~fetch_op:(Embed_symbol static_sym) ?grad_spec ~label:("!@" :: label) ~batch_dims:[]
+    ~input_dims:[] ~output_dims:[ 1 ] ()
 
 let embed_self_id ?grad_spec ?(label = []) () =
-  Tensor.term ~fetch_op:Embed_self_id ?grad_spec ~label:("!@self_id" :: label)
+  Tensor.term ~fetch_op:Embed_self_id ?grad_spec ~label:("!@self_id" :: label) ~batch_dims:[]
+    ~input_dims:[] ~output_dims:[ 1 ] ()
+
+let embed_dim ?grad_spec ?(label = []) variable_ref =
+  Tensor.term ~fetch_op:(Embed_dim variable_ref) ?grad_spec ~label:("!@self_id" :: label)
     ~batch_dims:[] ~input_dims:[] ~output_dims:[ 1 ] ()
 
 let uniform ?grad_spec () =
@@ -590,6 +596,7 @@ struct
     Tensor.number ?label ?axis_label ~grad_spec:Grad_spec.grad_spec (Float.of_int i)
 
   let embed_symbol = embed_symbol ~grad_spec:Grad_spec.grad_spec
+  let embed_dim = embed_dim ~grad_spec:Grad_spec.grad_spec
   let sub = sub ~grad_spec:Grad_spec.grad_spec
   let pointdiv = pointdiv ~grad_spec:Grad_spec.grad_spec
   let slice = slice ~grad_spec:Grad_spec.grad_spec
@@ -627,6 +634,7 @@ struct
     let ( !.. ) ?label i = number ?label @@ Float.of_int i
     let ( !% ) ?label i = bits ?label i
     let ( !@ ) = embed_symbol
+    let dim = embed_dim
     let ( - ) ?label t1 t2 = sub ?label t1 t2 ()
     let ( ~- ) ?label t = pointmul ?label (number (-1.)) t ()
     let ( /. ) ?label t1 t2 = pointdiv ?label t1 t2 ()
diff --git a/lib/ppx_cd.ml b/lib/ppx_cd.ml
@@ -1292,7 +1292,7 @@ let translate ?ident_label (expr : expression) : result =
           let loc = s_loc in
           if String.equal spec "." then [%expr Shape.Pointwise_bin]
           else if String.equal spec "@" then [%expr Shape.Compose]
-          else [%expr Shape.Einsum [%e logic]]
+          else [%expr Shape.Einsum ([%e logic], [])]
         in
         let _, bin_op = binary_op bin_op in
         process_raw_binop ~accu_op ~lhs ~bin_op ~rhs1 ~rhs2 ~logic
@@ -1348,7 +1348,7 @@ let translate ?ident_label (expr : expression) : result =
           let loc = s_loc in
           if String.equal spec "." then [%expr Shape.Pointwise_un]
           else if String.equal spec "T" then [%expr Shape.Transpose]
-          else [%expr Shape.Permute [%e logic]]
+          else [%expr Shape.Permute ([%e logic], [])]
         in
         let _, un_op = Hashtbl.find_exn unary_ops unop_ident loc in
         process_raw_unop ~accu_op ~lhs ~un_op ~rhs ~logic
diff --git a/lib/ppx_op.ml b/lib/ppx_op.ml
@@ -22,6 +22,7 @@ let operators =
       ("!..", "number_int");
       ("!%", "bits");
       ("!@", "embed_symbol");
+      ("dim", "embed_dim");
       ("-", "sub");
       ("~-", "num_neg");
       ("/.", "pointdiv");
@@ -212,6 +213,29 @@ let rec translate ~num_configs ~is_toplevel ~opt_label ?label expr =
       let vbs1, e1 = loop expr1 in
       let spec = substitute_identifiers_in_einsum_spec ~loc spec_str in
       (vbs1, [%expr einsum1 ?label:[%e opt_expr ~loc label] [%e spec] [%e e1]])
+  | [%expr
+      [%e? expr1]
+      *+ [%e? { pexp_desc = Pexp_constant (Pconst_string (spec_str, _, _)); _ }]
+           [%e? { pexp_desc = Pexp_construct ({ txt = Lident "::"; _ }, _); _ }]
+           [%e? expr2]]
+    when String.contains spec_str '>' ->
+      (* FIXME: introduce inline definitions for new Indexing.variable_ref objects corresponding to
+         the strings in the list, and pass them as ~capture_dims *)
+      let vbs1, e1 = loop expr1 in
+      let vbs2, e2 = loop expr2 in
+      let spec = substitute_identifiers_in_einsum_spec ~loc spec_str in
+      ( reduce_vbss [ vbs1; vbs2 ],
+        [%expr einsum ?label:[%e opt_expr ~loc label] [%e spec] [%e e1] [%e e2]] )
+  | [%expr
+      [%e? expr1]
+      ++ [%e? { pexp_desc = Pexp_constant (Pconst_string (spec_str, _, _)); _ }]
+           [%e? { pexp_desc = Pexp_construct ({ txt = Lident "::"; _ }, _); _ }]]
+    when String.contains spec_str '>' ->
+      (* FIXME: introduce inline definitions for new Indexing.variable_ref objects corresponding to
+         the strings in the list, and pass them as ~capture_dims *)
+      let vbs1, e1 = loop expr1 in
+      let spec = substitute_identifiers_in_einsum_spec ~loc spec_str in
+      (vbs1, [%expr einsum1 ?label:[%e opt_expr ~loc label] [%e spec] [%e e1]])
   | { pexp_desc = Pexp_record ([], _); _ } ->
       (* Empty record - not a tensor definition *)
       (no_vbs, expr)
diff --git a/lib/shape.ml b/lib/shape.ml
@@ -27,7 +27,9 @@ module AxisKey = struct
 
     type t = {
       in_axes : kind;
-      pos : int;  (** Indices start at [1], counted from the end if [from_end] is true. *)
+      pos : int;
+          (** Indices start at [1] (note this is axis index, dimension indices are always 0-based),
+              counted from the end if [from_end] is true. *)
       from_end : bool;
           (** Axes are indexed from the front (rarely) or from the end (typically), to avoid
               reindexing when broadcasting. *)
@@ -90,15 +92,16 @@ let row_of_kind = function `Batch -> batch | `Input -> input | `Output -> output
 type deduce_within_shape = Not_constrained | Input_equals_output
 [@@deriving compare, sexp, variants]
 
-type compose_type = Pointwise_bin | Compose | Einsum of string [@@deriving sexp, equal]
+type compose_type = Pointwise_bin | Compose | Einsum of string * Idx.variable_ref list
+[@@deriving sexp_of, equal]
 
 type transpose_type =
   | Transpose
   | Pointwise_un
-  | Permute of string
+  | Permute of string * Idx.variable_ref list
   | Batch_slice of Idx.static_symbol
   | Uint4x32_to_prec of Ir.Ops.prec Lazy.t
-[@@deriving equal, sexp]
+[@@deriving equal, sexp_of]
 
 type terminal_type = Data of Ir.Assignments.init_data | Fetch of Ir.Assignments.fetch_op
 [@@deriving equal, sexp_of]
@@ -260,7 +263,7 @@ let logic_to_spec = function
   | Broadcast_tern (Pointwise_tern, _, _, _) ->
       "."
   | Broadcast (Compose, _, _) | Broadcast_tern (Compose_accumulate, _, _, _) -> "@"
-  | Broadcast (Einsum spec, _, _) | Transpose (Permute spec, _) -> spec
+  | Broadcast (Einsum (spec, _), _, _) | Transpose (Permute (spec, _), _) -> spec
   | Transpose (Transpose, _) -> "T"
   | Transpose (Batch_slice _, _) -> "@|"
   | Transpose (Uint4x32_to_prec _, _) -> "U4x32"
@@ -470,6 +473,7 @@ let%debug4_sexp get_inequalities ({ shape = cur_sh; logic; id = _ } as _upd : up
           :: mark_terminal () )
       else (Row.dim_map_empty, mark_terminal ())
   | Terminal (Fetch (Embed_symbol _)) -> (Row.dim_map_empty, mark_terminal ())
+  | Terminal (Fetch (Embed_dim _)) -> (Row.dim_map_empty, mark_terminal ())
   | Terminal (Fetch Embed_self_id) -> (Row.dim_map_empty, mark_terminal ())
   | Transpose (Transpose, sh) ->
       ( Row.dim_map_empty,
@@ -560,7 +564,8 @@ let%debug4_sexp get_inequalities ({ shape = cur_sh; logic; id = _ } as _upd : up
             Row_eq { r1 = cur_sh.input; r2 = sh.input };
             Row_eq { r1 = cur_sh.output; r2 = sh.output };
           ] )
-  | Transpose (Permute spec, sh) ->
+  | Transpose (Permute (spec, _dim_refs), sh) ->
+      (* FIXME: support dim_refs *)
       let ls_rhs, ls_lhs =
         match einsum_of_spec spec with
         | ls_rhs, None, ls_lhs -> (ls_rhs, ls_lhs)
@@ -610,7 +615,8 @@ let%debug4_sexp get_inequalities ({ shape = cur_sh; logic; id = _ } as _upd : up
                   { numerator = Row.Strided_var { coeff; var; denom = 1 }; divided_by = [] };
             };
         ] )
-  | Broadcast (Einsum spec, sh1, sh2) ->
+  | Broadcast (Einsum (spec, _dim_refs), sh1, sh2) ->
+      (* FIXME: support dim_refs *)
       let ls_rhs1, ls_rhs2, ls_lhs =
         match einsum_of_spec spec with
         | ls_rhs1, Some ls_rhs2, ls_lhs -> (ls_rhs1, ls_rhs2, ls_lhs)
diff --git a/lib/shape.mli b/lib/shape.mli
@@ -87,34 +87,41 @@ type compose_type =
       (** Compose the outputs of the second shape with the inputs of the first shape, i.e. the shape
           of [fun x -> s1(s2(x))], or [s1 * s2] where [*] is the inner product (e.g. matrix
           multiply). *)
-  | Einsum of string
+  | Einsum of string * Ir.Indexing.variable_ref list
       (** The binary "einsum" syntax: RHS1;RHS2=>LHS, where RHSi, LHS are labels specifications.
           Since OCANNL's extended einsum notation supports both axis variables and row variables, it
           makes other compose types redundant. The [axis_labels] use pseudo-labels local to the
           notation, to line up the axes and row variables. The symmetric difference / disjunctive
           union of RHS1 and RHS2's pseudo-labels should be equal to LHS pseudo-labels.
 
+          The optional {!Ir.Indexing.variable_ref}s will capture the solutions of the dimensions
+          corresponding to the specification labels equal to [ref_label] of a reference.
+
           Note: The "right-hand-side" is on the left! I.e. the syntax is "rhs=>lhs",
           "rhs1;rhs2=>lhs". *)
-[@@deriving sexp, equal]
+[@@deriving sexp_of, equal]
 
 type transpose_type =
   | Transpose  (** Swaps inputs and outputs of a shape, preserves batch axes. *)
   | Pointwise_un  (** Preserves the shape. *)
-  | Permute of string  (** The unary "einsum" syntax: RHS1=>LHS. *)
+  | Permute of string * Ir.Indexing.variable_ref list
+      (** The unary "einsum" syntax: RHS1=>LHS.
+
+          The optional {!Ir.Indexing.variable_ref}s will capture the solutions of the dimensions
+          corresponding to the specification labels equal to [ref_label] of a reference. *)
   | Batch_slice of Ir.Indexing.static_symbol  (** Removes the leftmost batch axis. *)
   | Uint4x32_to_prec of Ir.Ops.prec Lazy.t
       (** Converts precision in a bit-effient way, with a corresponding conversion in total number
           of elements. Currently, assumes the incoming tensor (RHS) has just a single axis to not
           force unnecessary minimum sizes on output axes. *)
-[@@deriving equal, sexp]
+[@@deriving equal, sexp_of]
 
 (** If you miss expressivity here, leave a note on
     {{:https://github.com/ahrefs/ocannl/issues/305}issue 305}. *)
 type ternary_type =
   | Pointwise_tern  (** As in the operation [Where]. *)
   | Compose_accumulate  (** As in the operation [FMA]. *)
-[@@deriving equal, sexp]
+[@@deriving equal, sexp_of]
 
 (** Extracts any available shape information from the initialization or fetch. *)
 type terminal_type = Data of Ir.Assignments.init_data | Fetch of Ir.Assignments.fetch_op
diff --git a/lib/shape_inference.md b/lib/shape_inference.md
@@ -104,20 +104,28 @@ type compose_type =
       (** Compose the outputs of the second shape with the inputs of the first shape, i.e. the shape
           of [fun x -> s1(s2(x))], or [s1 * s2] where [*] is the inner product (e.g. matrix
           multiply). *)
-  | Einsum of string
+  | Einsum of string * Ir.Indexing.variable_ref list
       (** The binary "einsum" syntax: RHS1;RHS2=>LHS, where RHSi, LHS are labels specifications.
           Since OCANNL's extended einsum notation supports both axis variables and row variables, it
           makes other compose types redundant. The [axis_labels] use pseudo-labels local to the
           notation, to line up the axes and row variables. The symmetric difference / disjunctive
           union of RHS1 and RHS2's pseudo-labels should be equal to LHS pseudo-labels.
 
+          The optional {!Ir.Indexing.variable_ref}s will capture the solutions of the dimensions
+          corresponding to the specification labels equal to [ref_label] of a reference.
+
           Note: The "right-hand-side" is on the left! I.e. the syntax is "rhs=>lhs",
           "rhs1;rhs2=>lhs". *)
+[@@deriving sexp, equal]
 
 type transpose_type =
   | Transpose  (** Swaps inputs and outputs of a shape, preserves batch axes. *)
   | Pointwise_un  (** Preserves the shape. *)
-  | Permute of string  (** The unary "einsum" syntax: RHS1=>LHS. *)
+  | Permute of string * Ir.Indexing.variable_ref list
+      (** The unary "einsum" syntax: RHS1=>LHS.
+
+          The optional {!Ir.Indexing.variable_ref}s will capture the solutions of the dimensions
+          corresponding to the specification labels equal to [ref_label] of a reference. *)
   | Batch_slice of Ir.Indexing.static_symbol  (** Removes the leftmost batch axis. *)
   | Uint4x32_to_prec of Ir.Ops.prec Lazy.t
       (** Converts precision in a bit-effient way, with a corresponding conversion in total number
diff --git a/lib/syntax_extensions.md b/lib/syntax_extensions.md
@@ -383,6 +383,10 @@ Examples:
 - `..v..|...ijk => ..v..kji`: reverse the three rightmost output axes, reduce any other output axes, pointwise for batch axes, pairing the batch axes with the leftmost output axes of the result. Fails if the argument has input axes.
 - `2..v..|... => ..v..`: slice the tensor at dimension 2 of the leftmost batch axis, reduce all its output axes, preserve its other batch axes as output axes. Fails if the argument has input axes.
 
+### Capturing the dimensions of selected axes for further computation
+
+The syntaxes `*+` and `++` accept an optional list of strings argument after the specification string. When passed, the strings should be some of the identifiers used in the specification. Both dimension variable and row variable labels are supported. This will introduce bindings for `Indexing.variable_ref` objects at the same point as the inline parameter definition bindings, and will pass these objects with the `~capture_dims` argument to `einsum` resp. `einsum1`. The bound objects can later be used with `Operation.embed_dim` or its alias `Operation.TDSL.O.dim` to embed the solved dimension of the corresponding variable (as a number) into a tensor expression. For a row variable, the number will be the product of the dimensions it resolved into.
+
 ## Further features of the syntax extension %cd
 
 ### Referencing arrays: tensor value, tensor gradient, merge buffer of a tensor node
diff --git a/lib/tensor.ml b/lib/tensor.ml
@@ -482,7 +482,7 @@ let%track7_sexp term ?init_data ?fetch_op ?grad_spec ?(label = []) ?(top_down_pr
     match fetch_op with
     | None -> Asgns.empty_comp
     | Some
-        (( Constant _ | Constant_bits _ | Slice _ | Embed_symbol _ | Embed_self_id
+        (( Constant _ | Constant_bits _ | Slice _ | Embed_symbol _ | Embed_dim _ | Embed_self_id
          | Range_over_offsets | Constant_fill _ ) as fetch_op) ->
         Asgns.to_comp @@ Fetch { array = v; fetch_op; dims }
   in