Fix enabling of zero-dimension scalars in metal and cuda backends

lukstafi · lukstafi · commit 2eb8cc109d9f · 2025-08-03T13:48:50.000+02:00
diff --git a/arrayjit/lib/cuda_backend.ml b/arrayjit/lib/cuda_backend.ml
@@ -69,9 +69,7 @@ module Alloc_buffer = struct
         { ptr = Cu.Deviceptr.mem_alloc ~size_in_bytes; size_in_bytes }
 
   let alloc_zero_init_array prec ~dims stream =
-    let size_in_bytes =
-      (if Array.length dims = 0 then 0 else Array.reduce_exn dims ~f:( * )) * Ops.prec_in_bytes prec
-    in
+    let size_in_bytes = Array.fold dims ~init:1 ~f:( * ) * Ops.prec_in_bytes prec in
     set_ctx stream.device.dev.primary_context;
     let ptr = Cu.Deviceptr.mem_alloc ~size_in_bytes in
     (* TODO: consider using memset_d8 to zero-initialize the memory. *)
@@ -588,13 +586,17 @@ end) : Ir.Backend_impl.Lowered_backend = struct
       | Cmpeq, _ -> f "=="
       | Or, _ -> f "||"
       | And, _ -> f "&&"
-      | Threefry4x32, _ ->
+      | Threefry4x32, _ -> (
           (* Threefry4x32 must output to uint4x32 precision *)
-          (match prec with
+          match prec with
           | Ops.Uint4x32_prec _ -> func "arrayjit_threefry4x32"
-          | _ -> raise @@ Utils.User_error 
-              (Printf.sprintf "CUDA backend: Threefry4x32 requires target precision to be uint4x32, but got %s"
-                 (Ops.prec_string prec)))
+          | _ ->
+              raise
+              @@ Utils.User_error
+                   (Printf.sprintf
+                      "CUDA backend: Threefry4x32 requires target precision to be uint4x32, but \
+                       got %s"
+                      (Ops.prec_string prec)))
 
     let unop_syntax prec v =
       let open PPrint in
diff --git a/arrayjit/lib/metal_backend.ml b/arrayjit/lib/metal_backend.ml
@@ -80,10 +80,8 @@ module Alloc_buffer = struct
         track_allocation new_buffer_obj;
         { ptr = new_buffer_obj; size_in_bytes }
 
-  let alloc_zero_init_array prec ~dims (stream : stream) =
-    let size_in_bytes =
-      (if Array.length dims = 0 then 0 else Array.reduce_exn dims ~f:( * )) * Ops.prec_in_bytes prec
-    in
+  let%track7_sexp alloc_zero_init_array (prec : Ops.prec) ~(dims : int array) (stream : stream) =
+    let size_in_bytes = Array.fold dims ~init:1 ~f:( * ) * Ops.prec_in_bytes prec in
     let device = stream.device.dev in
     let buffer = Me.Buffer.on_device device ~length:size_in_bytes resource_options in
     track_allocation buffer;
@@ -448,13 +446,15 @@ end) : Ir.Backend_impl.Lowered_backend = struct
       | Ops.Bfloat16_prec _ -> "bfloat" (* Metal supports bfloat16 natively *)
       | Ops.Fp8_prec _ -> invalid_arg "Metal backend does not support FP8 precision"
       | Ops.Single_prec _ -> "float"
-      | Ops.Double_prec _ -> raise @@ Utils.User_error "Metal backend does not support double precision"
+      | Ops.Double_prec _ ->
+          raise @@ Utils.User_error "Metal backend does not support double precision"
       | Ops.Void_prec -> "void"
 
     let vec_typ_of_prec ~length prec =
       match (prec, length) with
       | Ops.Single_prec _, 4 -> "float4_t"
-      | Ops.Double_prec _, 2 -> raise @@ Utils.User_error "Metal backend does not support double precision"
+      | Ops.Double_prec _, 2 ->
+          raise @@ Utils.User_error "Metal backend does not support double precision"
       | Ops.Int32_prec _, 4 -> "int32x4_t"
       | (Ops.Byte_prec _ | Ops.Fp8_prec _), 16 -> "int8x16_t"
       | (Ops.Uint16_prec _ | Ops.Bfloat16_prec _), 8 -> "uint16x8_t"
@@ -472,7 +472,8 @@ end) : Ir.Backend_impl.Lowered_backend = struct
       | Ops.Bfloat16_prec _ -> "bf" (* TODO: Verify actual Metal suffix for bfloat16 *)
       | Ops.Fp8_prec _ -> invalid_arg "Metal backend does not support FP8 precision"
       | Ops.Single_prec _ -> "f"
-      | Ops.Double_prec _ -> raise @@ Utils.User_error "Metal backend does not support double precision"
+      | Ops.Double_prec _ ->
+          raise @@ Utils.User_error "Metal backend does not support double precision"
       | Ops.Void_prec -> ""
 
     let ternop_syntax _prec op =
@@ -532,13 +533,17 @@ end) : Ir.Backend_impl.Lowered_backend = struct
                  ^^ space ^^ string "?" ^^ space ^^ v2 ^^ space ^^ string ":" ^^ space
                  ^^ string ("0.0" ^ s)))
       | ToPowOf, _ -> func "pow"
-      | Threefry4x32, _ ->
+      | Threefry4x32, _ -> (
           (* Threefry4x32 must output to uint4x32 precision *)
-          (match prec with
+          match prec with
           | Ops.Uint4x32_prec _ -> func "arrayjit_threefry4x32"
-          | _ -> raise @@ Utils.User_error 
-              (Printf.sprintf "Metal backend: Threefry4x32 requires target precision to be uint4x32, but got %s"
-                 (Ops.prec_string prec)))
+          | _ ->
+              raise
+              @@ Utils.User_error
+                   (Printf.sprintf
+                      "Metal backend: Threefry4x32 requires target precision to be uint4x32, but \
+                       got %s"
+                      (Ops.prec_string prec)))
       | Arg1, _ | Arg2, _ -> invalid_arg "Metal C_syntax_config: Arg1/Arg2 not operators"
 
     let unop_syntax prec op =
@@ -555,7 +560,8 @@ end) : Ir.Backend_impl.Lowered_backend = struct
       | Sqrt, _ -> func_doc "sqrt"
       | Relu, Ops.Half_prec _ -> fun v -> func_doc "max" (separate comma_sep [ string "0.0h"; v ])
       | Relu, Ops.Single_prec _ -> fun v -> func_doc "max" (separate comma_sep [ string "0.0f"; v ])
-      | Relu, Ops.Double_prec _ -> raise @@ Utils.User_error "Metal backend does not support double precision"
+      | Relu, Ops.Double_prec _ ->
+          raise @@ Utils.User_error "Metal backend does not support double precision"
       | Relu, _ (* Byte_prec, Void_prec *) ->
           fun v -> func_doc "max" (separate comma_sep [ string "0"; v ])
       | Satur01, p ->
diff --git a/test/einsum/moons_demo_variant.expected b/test/einsum/moons_demo_variant.expected
@@ -94,7 +94,7 @@ n93 sgd_delta_w3 as sgd_delta_w3: Virt/15; single prec 1x16; mem in bytes: <not-
 n94 sgd_momentum_w3 as sgd_momentum_w3: unknown; single prec <not-in-yet>; mem in bytes: <not-in-yet>
 n95 0.0001 as n95: Virt/40; single prec 1; mem in bytes: <not-in-yet>
 n96 *. as n96: Virt/15; single prec 1x16; mem in bytes: <not-in-yet>
-n97 point as point: Host-const/37; single prec 2; mem in bytes: 8
+n97 point as point: Host-const/37; single prec 2; mem in bytes: <not-in-yet>
 n98 * as n98: Local/1046; single prec 16; mem in bytes: <not-in-yet>
 n99 grad_* as n98.grad: unknown; single prec 16; mem in bytes: <not-in-yet>
 n100 + as n100: Virt/15; single prec 16; mem in bytes: <not-in-yet>
diff --git a/test/operations/micrograd_demo_logging-metal-0-0.log.expected b/test/operations/micrograd_demo_logging-metal-0-0.log.expected
@@ -1,4 +1,12 @@
 float *a &[1] = 0xNNNN
+float *b &[1] = 0xNNNN
+COMMENT: init params for g
+# b[0] := 2;
+b[0]{=MAYBE UNINITIALIZED} = 2000e-3 = (float)(2)
+# a[0] := -4;
+a[0]{=MAYBE UNINITIALIZED} = -4000e-3 = (float)(-4)
+COMMENT: end
+float *a &[1] = 0xNNNN
 float *a_grad &[1] = 0xNNNN
 float *b &[1] = 0xNNNN
 float *b_grad &[1] = 0xNNNN