Be lenient about pre-filling params with values but special-case filling with a single value

lukstafi · lukstafi · commit 1fdbc4bd3c3a · 2025-06-29T19:06:28.000+02:00
The multiple-values was intended to add shape constraint but apparently is leaky.
diff --git a/arrayjit/lib/assignments.ml b/arrayjit/lib/assignments.ml
@@ -266,8 +266,10 @@ let%diagn2_sexp to_low_level code =
             let offset = Indexing.reflect_projection ~dims ~projection:idcs in
             set array idcs @@ Embed_index offset)
     | Fetch { array; fetch_op = Constant_fill values; dims = (lazy dims) } ->
+        (* TODO: consider failing here and strengthening shape inference. *)
+        let size = Array.length values in
         Low_level.unroll_dims dims ~body:(fun idcs ~offset ->
-            set array idcs @@ Constant values.(offset))
+            set array idcs @@ Constant values.(offset % size))
   in
   loop code
 
diff --git a/bin/hello_world_op.ml b/bin/hello_world_op.ml
@@ -189,7 +189,7 @@ let%track2_sexp _Big_matrix (() : unit) : unit =
   let ctx = Backend.make_context stream in
   Rand.init 0;
   (* Hey is inferred to be a matrix. *)
-  let hey = Tensor.param ~values:[| 0.5 |] "hey" in
+  let hey = Tensor.param ~value:0.5 "hey" in
   let zero_to_twenty = TDSL.range 20 in
   let%op yd = (hey * zero_to_twenty) + zero_to_twenty in
   Train.forward_and_forget backend ctx yd;
diff --git a/lib/tensor.ml b/lib/tensor.ml
@@ -409,10 +409,14 @@ let ndarray ?(label = []) ?(grad_spec = Prohibit_grad) ?batch_dims ?input_dims ?
       Tn.update_prec ~only_if:is_up_to_fp16 t.value single);
   t
 
-let param ?(more_label = []) ?input_dims ?output_dims ?input_axes ?output_axes ?deduced ?values
+let param ?(more_label = []) ?input_dims ?output_dims ?input_axes ?output_axes ?deduced ?value ?values
     label =
   let fetch_op_fn ~v:_ =
-    match values with Some values -> Asgns.Constant_fill values | None -> Asgns.Range_over_offsets
+    match values, value with
+    | Some values, None -> Asgns.Constant_fill values
+    | None, Some value -> Asgns.Constant value
+    | None, None -> Asgns.Range_over_offsets
+    | Some _, Some _ -> invalid_arg "Tensor.param: both values and value are set"
   in
   let t =
     term ~label:(label :: more_label) ~grad_spec:Require_grad ~batch_dims:[] ?input_dims
diff --git a/lib/tensor.mli b/lib/tensor.mli
@@ -214,12 +214,16 @@ val param :
   ?input_axes:(string * int) list ->
   ?output_axes:(string * int) list ->
   ?deduced:Shape.deduce_within_shape ->
+  ?value:float ->
   ?values:float array ->
   string ->
   t
 (* A tensor with no batch axes; input and output axes are by default inferred. [grad_spec] is set to
    [Require_grad]. The resulting tensor's label is the passed string, appended by [more_label] if
-   any. *)
+   any. If [value] is provided, the tensor is initialized to the given value. If [values] is
+   provided, the tensor is initialized to the given values. At most one of [value] or [values] can
+   be provided. Note: [values] will be looped over if necessary, but shape inference will try
+   incorporating the number of values as tensor size. *)
 
 val consume_forward_code : t -> comp
 (** A forward root is a tensor that is not (currently) used to compute another tensor.
diff --git a/test/hello_world_op.ml b/test/hello_world_op.ml
@@ -104,6 +104,7 @@ let%expect_test "Print constant tensor" =
 
   let%op hey = [ (1, 2, 3); (4, 5, 6) ] in
   Train.forward_and_forget backend ctx hey;
+  (* ignore (failwith @@ Tn.debug_memory_mode hey.value.memory_mode); *)
   Tensor.print ~with_code:false ~with_grad:false `Inline @@ hey;
   [%expect
     {|
@@ -509,7 +510,7 @@ let%expect_test "Big matrix" =
   let ctx = Backend.make_context stream in
   Rand.init 0;
   (* Hey is inferred to be a matrix. *)
-  let hey = Tensor.param ~values:[| 0.5 |] "hey" in
+  let hey = Tensor.param ~value:0.5 "hey" in
   let zero_to_twenty = TDSL.range 20 in
   let y = TDSL.O.((hey * zero_to_twenty) + zero_to_twenty) in
   Train.forward_and_forget backend ctx y;