Cleanup to the param interface

lukstafi · lukstafi · commit f24a1e671a0a · 2025-07-05T13:35:03.000+02:00
diff --git a/bin/hello_world.ml b/bin/hello_world.ml
@@ -50,7 +50,7 @@ let hello3 () =
   let stream = Backend.(new_stream @@ get_device ~ordinal:0) in
   let ctx = Backend.make_context stream in
   (* Hey is inferred to be a matrix. *)
-  let hey = Tensor.param "hey" in
+  let hey = TDSL.param "hey" in
   let zero_to_twenty = TDSL.range 20 in
   let y = TDSL.O.(( + ) ~label:[ "y" ] (hey * zero_to_twenty) zero_to_twenty) in
   Train.set_hosted hey.value;
diff --git a/lib/operation.ml b/lib/operation.ml
@@ -478,13 +478,16 @@ module TDSL = struct
   let number = Tensor.number ~grad_spec:If_needed
   let ndarray = Tensor.ndarray ~grad_spec:If_needed
 
+  (** The default initialization operation for {!param} calls. *)
+  let default_param_init = ref @@ Tensor.fetch_param_init (Asgns.Constant 0.0)
+
   let param ?value ?values =
     let t =
       match (value, values) with
       | Some _, Some _ -> invalid_arg "TDSL.param: both value and values are set"
       | Some value, None -> Tensor.fetch_param_init (Asgns.Constant value)
       | None, Some values -> Tensor.fetch_param_init (Asgns.Constant_fill values)
-      | None, None -> !Tensor.default_param_init
+      | None, None -> !default_param_init
     in
     Tensor.param ~t
 
@@ -502,15 +505,15 @@ module TDSL = struct
   (** The input and output dimensions will be inferred if omitted. See {!reshape}. *)
   let reshape_param ~l ?i ?o ndarray =
     let t =
-      Tensor.term ~grad_spec:Require_grad ~batch_dims:[] ~batch_axes:[] ~init_data:(Reshape ndarray)
-        ?fetch_op:None
+      Tensor.term ~grad_spec:Require_grad ~batch_dims:[] ?batch_axes:None
+        ~init_data:(Reshape ndarray) ?fetch_op:None
     in
     Tensor.param ?input_dims:i ?output_dims:o ~t l
 
   (** See {!wrap}. *)
   let wrap_param ~l ?i ?o ndarray =
     let t =
-      Tensor.term ~grad_spec:Require_grad ~batch_dims:[] ~batch_axes:[]
+      Tensor.term ~grad_spec:Require_grad ~batch_dims:[] ?batch_axes:None
         ~init_data:(Keep_shape_no_padding ndarray) ?fetch_op:None
     in
     Tensor.param ?input_dims:i ?output_dims:o ~t l
diff --git a/lib/shape.ml b/lib/shape.ml
@@ -462,10 +462,8 @@ let%debug4_sexp get_inequalities ({ shape = cur_sh; logic; id = _ } as _upd : up
               r = [ cur_sh.batch; cur_sh.output; cur_sh.input ];
               constr =
                 Exact
-                  (Lazy.force tn.dims
-                  |> Array.to_list |> List.tl_exn
-                  |> List.map ~f:(fun d -> get_dim ~d ())
-                  );
+                  (Lazy.force tn.dims |> Array.to_list |> List.tl_exn
+                  |> List.map ~f:(fun d -> get_dim ~d ()));
             }
           :: mark_terminal () )
       else (Row.dim_map_empty, mark_terminal ())
diff --git a/lib/syntax_extensions.md b/lib/syntax_extensions.md
@@ -149,8 +149,8 @@ let interpret_ternop op v1 v2 v3 =
 
 ```ocaml
   let hid_dim = 8 in
-  let w = Tensor.param "w" in
-  let b = Tensor.param ~output_dims:[ hid_dim ] "b" in
+  let w = TDSL.param "w" in
+  let b = TDSL.param ~output_dims:[ hid_dim ] "b" in
   let layer x = TDSL.O.( relu(w * x + b) ) in
   ...
 ```
@@ -159,8 +159,8 @@ Since `TDSL.O` is opened for the scope of an extension point `%op`:
 
 ```ocaml
   let hid_dim = 8 in
-  let w = Tensor.param "w" in
-  let b = Tensor.param ~output_dims:[ hid_dim ] "b" in
+  let w = TDSL.param "w" in
+  let b = TDSL.param ~output_dims:[ hid_dim ] "b" in
   let%op layer x = relu(w * x + b) in
   ...
 ```
@@ -413,7 +413,7 @@ If you recall, inline declared param tensors get lifted out of functions except
 
 ```ocaml
 let mlp_layer ~config =
-  let w = Tensor.param "w" and b = Tensor.param ~output_dims:[ config.hid_dim ] in
+  let w = TDSL.param "w" and b = TDSL.param ~output_dims:[ config.hid_dim ] in
   fun x -> TDSL.O.(w * x + b)
 ```
 
diff --git a/lib/tensor.ml b/lib/tensor.ml
@@ -440,18 +440,11 @@ let ndarray ?(label = []) ?(grad_spec = Prohibit_grad) ?batch_dims ?input_dims ?
   t
 
 let fetch_param_init fetch_op =
-  term ~grad_spec:Require_grad ~batch_dims:[] ~batch_axes:[] ?init_data:None ~fetch_op
+  term ~grad_spec:Require_grad ~batch_dims:[] ?batch_axes:None ?init_data:None ~fetch_op
 
-let default_param_init = ref @@ fetch_param_init (Asgns.Constant 0.0)
-
-let param ?(more_label = []) ?input_dims ?output_dims ?input_axes ?output_axes ?deduced ?t label =
+let param ?(more_label = []) ?input_dims ?output_dims ?input_axes ?output_axes ?deduced ~t label =
   let t =
-    match t with
-    | Some t ->
-        t ~label:(label :: more_label) ?input_dims ?output_dims ?input_axes ?output_axes ?deduced ()
-    | None ->
-        !default_param_init ~label:(label :: more_label) ?input_dims ?output_dims ?input_axes
-          ?output_axes ?deduced ()
+    t ~label:(label :: more_label) ?input_dims ?output_dims ?input_axes ?output_axes ?deduced ()
   in
   let v = t.value in
   (* It is convenient to use the param syntax for volatiles (mutable embedded_nodes). *)
diff --git a/lib/tensor.mli b/lib/tensor.mli
@@ -217,18 +217,6 @@ val ndarray :
     given values must fill the tensor's [value] node precisely; otherwise, the values will be looped
     over to populate the [value] node. *)
 
-val default_param_init :
-  (label:string list ->
-  ?input_dims:int list ->
-  ?output_dims:int list ->
-  ?input_axes:(string * int) list ->
-  ?output_axes:(string * int) list ->
-  ?deduced:Shape.deduce_within_shape ->
-  unit ->
-  t)
-  ref
-(** The default initialization operation for {!param} calls that do not pass a [t]. *)
-
 val fetch_param_init :
   fetch_op ->
   label:string list ->
@@ -239,7 +227,7 @@ val fetch_param_init :
   ?deduced:Shape.deduce_within_shape ->
   unit ->
   t
-(** Helper for {!param} wrappers or to set {!default_param_init}. *)
+(** Helper for {!param} wrappers. *)
 
 val param :
   ?more_label:string list ->
@@ -248,7 +236,7 @@ val param :
   ?input_axes:(string * int) list ->
   ?output_axes:(string * int) list ->
   ?deduced:Shape.deduce_within_shape ->
-  ?t:
+  t:
     (label:string list ->
     ?input_dims:int list ->
     ?output_dims:int list ->
@@ -261,10 +249,9 @@ val param :
   t
 (** For proper parameters, [t] should produce a tensor with no batch axes; input and output axes
     should by default be inferred; [grad_spec] should be [Require_grad]. [t]'s label is the passed
-    string, appended by [more_label] if any, other parameters are forwarded to [t]. If [t] is not
-    provided, {!default_param_init} is used. This function returns [t]'s result with the field
-    {!field:params} replaced by a singleton set containing that result, and it also updates the
-    memory modes. *)
+    string, appended by [more_label] if any, other parameters are forwarded to [t]. This function
+    returns [t]'s result with the field {!field:params} replaced by a singleton set containing that
+    result, and it also updates the memory modes. *)
 
 val consume_forward_code : t -> comp
 (** A forward root is a tensor that is not (currently) used to compute another tensor.