Update the moons_demo test and signal shortcoming in consume_forward_code

lukstafi · lukstafi · commit 29a8d501f3ef · 2025-07-12T19:05:31.000+02:00
diff --git a/lib/tensor.ml b/lib/tensor.ml
@@ -205,8 +205,8 @@ let raw_unop ~initialize_neutral ~accum ~(t : t) ~(lhs_is_grad : bool) ~op ~(t1
 type grad_spec = Require_grad | Prohibit_grad | If_needed [@@deriving sexp, equal, variants]
 
 let op ~(label : string list) ?(ternary_op = Shape.Pointwise_tern)
-    ?(compose_op = Shape.Pointwise_bin) ?(transpose_op = Shape.Pointwise_un) ?terminal_op
-    ~op_asn ~grad_asn ?(grad_spec = If_needed) make_shape (orig_ts : t list) : t =
+    ?(compose_op = Shape.Pointwise_bin) ?(transpose_op = Shape.Pointwise_un) ?terminal_op ~op_asn
+    ~grad_asn ?(grad_spec = If_needed) make_shape (orig_ts : t list) : t =
   (* The code needs to be included in the order it was computed due to potential non-tree DAGs. *)
   let ordered_ts = List.dedup_and_sort orig_ts ~compare:(fun t1 t2 -> Int.ascending t1.id t2.id) in
   let id = session_state.next_id in
@@ -250,8 +250,7 @@ let op ~(label : string list) ?(ternary_op = Shape.Pointwise_tern)
     | Some (Shape.Data (Asgns.Padded { data; padding = padding_spec; padded_value })) ->
         let padding = Some (padding_spec, padded_value) in
         Tn.create_from_padded ~id ~label ~ndarray:data ~padding ()
-    | Some (Shape.Fetch _) | None ->
-        Tn.create ~default_prec ~id ~label ~dims ~padding ()
+    | Some (Shape.Fetch _) | None -> Tn.create ~default_prec ~id ~label ~dims ~padding ()
   in
   let embedded_nodes = ref @@ Set.singleton (module Tn) v in
   let children =
@@ -358,7 +357,7 @@ let unop ~label ?transpose_op ~op_asn ~grad_asn ?grad_spec t1 =
 let term ~label ~grad_spec ?batch_dims ?input_dims ?output_dims ?batch_axes ?input_axes ?output_axes
     ?deduced ?init_data ?fetch_op () =
   let terminal_op =
-    match init_data, fetch_op with
+    match (init_data, fetch_op) with
     | Some _, Some _ -> invalid_arg "Tensor.term: both init_data and fetch_op are provided"
     | Some init_data, None -> Some (Shape.Data init_data)
     | None, Some fetch_op -> Some (Shape.Fetch fetch_op)
@@ -369,16 +368,18 @@ let term ~label ~grad_spec ?batch_dims ?input_dims ?output_dims ?batch_axes ?inp
     let dims = lazy (Lazy.force projections).Idx.lhs_dims in
     match fetch_op with
     | None -> Asgns.empty_comp
-    | Some (( Constant _ | Slice _ | Embed_symbol _ | Range_over_offsets | Constant_fill _ ) as fetch_op) ->
+    | Some
+        ((Constant _ | Slice _ | Embed_symbol _ | Range_over_offsets | Constant_fill _) as fetch_op)
+      ->
         Asgns.to_comp @@ Fetch { array = v; fetch_op; dims }
   in
   let grad_asn ~t:_ ~g:_ ~projections:_ = Asgns.empty_comp in
   let make_shape =
     Shape.make ?batch_dims ?input_dims ?output_dims ?batch_axes ?input_axes ?output_axes ?deduced ()
   in
   (* Note: terminal_op is used for both tensor creation and shape inference. *)
-  op ~label ?compose_op:None ?transpose_op:None ?terminal_op ~op_asn ~grad_asn ~grad_spec
-    make_shape []
+  op ~label ?compose_op:None ?transpose_op:None ?terminal_op ~op_asn ~grad_asn ~grad_spec make_shape
+    []
 
 let float_to_label v = Float.to_string v
 
@@ -467,6 +468,8 @@ let consume_forward_code t =
     @@ Session_error
          ( "Tensor.consume_forward_code: tensor is not a root for tnode: " ^ Tn.debug_name t.value,
            Some t );
+  (* FIXME(#321): this is too aggressive, instead we should check if the code contains any
+     non-embedded nodes that are embedded nodes of the other roots. *)
   let unsafe_roots =
     Map.data session_state.forward_roots
     |> List.filter ~f:(fun r -> not (List.is_empty r.children || r.id = t.id))
diff --git a/test/training/moons_demo.ml b/test/training/moons_demo.ml
@@ -42,15 +42,21 @@ let main () =
      computation. *)
   let weight_decay = 0.0001 in
   let%op scalar_loss = (margin_loss ++ "...|... => 0") /. !..batch_size in
+  let init_params = Tensor.init_params scalar_loss in
   let update = Train.grad_update scalar_loss in
+  (* TODO(#321): Define learning_rate above the call to grad_update to test the consume_forward_code
+     fix *)
   let%op learning_rate = 0.1 *. ((2 *. !..steps) - !@step_n) /. !..steps in
+  (* TODO: is set_hosted needed? *)
   Train.set_hosted learning_rate.value;
   let sgd = Train.sgd_update ~learning_rate ~weight_decay scalar_loss in
+  let init_routine = Train.to_routine (module Backend) ctx bindings init_params in
   let sgd_routine =
-    Train.to_routine (module Backend) ctx bindings (Asgns.sequence [ update; sgd ])
+    Train.to_routine (module Backend) init_routine.context bindings (Asgns.sequence [ update; sgd ])
   in
   let step_ref = IDX.find_exn sgd_routine.bindings step_n in
   step_ref := 0;
+  Train.run init_routine;
   for _epoch = 1 to epochs do
     Train.sequential_loop sgd_routine.bindings ~f:(fun () ->
         Train.run sgd_routine;
@@ -65,7 +71,8 @@ let main () =
   let points = Tn.points_2d ~xdim:0 ~ydim:1 moons_flat.value in
   let classes = Tn.points_1d ~xdim:0 moons_classes.value in
   let points1, points2 = Array.partitioni_tf points ~f:Float.(fun i _ -> classes.(i) > 0.) in
-  let%op mlp_result = mlp "point" in
+  (* %cd instead of %op to not get complaints about uninitialized point tensor node. *)
+  let%cd mlp_result = mlp "point" in
   Train.set_on_host mlp_result.value;
   let result_routine =
     Train.to_routine
@@ -114,7 +121,7 @@ let main () =
   Stdio.printf "mlp_result's name: %s\n%!" @@ Tensor.debug_name mlp_result;
   (* Note: mlp_result is not included in the resulting tensor's label, because the identifier label
      does not propagate across function calls. *)
-  (Stdio.printf "(mlp moons_input) name: %s\n%!"
+  Stdio.printf "(mlp moons_input) name: %s\n%!"
   @@ Tensor.debug_name
   @@
   match margin_loss.children with
@@ -126,6 +133,6 @@ let main () =
    };
   ] ->
       subtensor
-  | _ -> assert false)
+  | _ -> assert false
 
 let () = main ()