a TODO file

lukstafi · lukstafi · commit 9dd686b6c310 · 2024-11-22T21:26:49.000+01:00
diff --git a/bin/moons_benchmark.ml b/bin/moons_benchmark.ml
@@ -47,9 +47,9 @@ let classify_moons ~seed ~on_device ~inlining_cutoff ~num_streams ~batch_size ~b
   let flat_len = data_len / 2 in
   (* Note: [minibatch_size = batch_size / num_streams] is the actual per-device batch used. *)
   (* let epochs = 200 in *)
-  (* let epochs = 100 in *)
+  let epochs = 100 in
   (* TINY for debugging: *)
-  let epochs = 2 in
+  (* let epochs = 2 in *)
   (* let epochs = 1 in *)
   (* let init_lr = 0.1 in *)
   let init_lr = 0.01 in
@@ -214,8 +214,13 @@ let _mem_benchmarks =
         ~f:(fun batch_size ->
           List.concat_map [ 0; (* 1; 2; *) 3 ] ~f:(fun inlining_cutoff ->
               List.concat_map [ (* 1; 3; *) 7 (* *) ] ~f:(fun seed ->
-                  List.concat_map [ (* "gccjit" ; *) "cc"; "cuda" ] ~f:(fun backend_name ->
-                      List.concat_map [ (* CDSL.double; *) CDSL.single; CDSL.half ]
+                  List.concat_map
+                    [
+                      (* "gccjit" ; *)
+                      (* "cc"; *)
+                      "cuda";
+                    ] ~f:(fun backend_name ->
+                      List.concat_map [ (* CDSL.double; *) CDSL.single (* ; CDSL.half *) ]
                         ~f:(fun value_prec ->
                           [
                             classify_moons ~seed ~on_device:true ~inlining_cutoff ~num_streams
diff --git a/todo.md b/todo.md
@@ -0,0 +1,5 @@
+# This file is for tasks with a smaller granularity than issues, typically immediate tasks.
+(B) bin/moons_benchmark with the cc backend crashes with half-prec overflow
+(B) remove syncing from the data parallel algo: stream-to-stream syncing is now automatic
+(A) cuda backend crashes in bin/moons_benchmark {cm:2024-11-22}
+(B) figure out why cuda backend parallelism slows down in later epochs