Workaround for Metal logging crashes, with prefer_backend_uniformity to make it compatible for testing

lukstafi · lukstafi · commit 1d270160f4b4 · 2025-05-19T18:31:18.000+02:00
diff --git a/CHANGES.md b/CHANGES.md
@@ -4,7 +4,7 @@
 
 - The Metal framework backend (Apple Silicon).
 - Setting `debug_log_to_stream_files` to neatly keep logs from routine execution in their separate files.
-- Setting `clean_up_artifacts_on_startup`.
+- Settings `clean_up_artifacts_on_startup`, `prefer_backend_uniformity`.
 - Tools directory and the `minised` tool: regexp replacement file rewrite.
 
 ### Changed
diff --git a/arrayjit/lib/c_syntax.ml b/arrayjit/lib/c_syntax.ml
@@ -30,6 +30,12 @@ module type C_syntax_config = sig
   val typ_of_prec : Ops.prec -> string
   val ident_blacklist : string list
 
+  val float_log_style : string
+  (** Format specifier for printing floating point numbers in debug logs. *)
+
+  val styled_log_arg : PPrint.document -> PPrint.document
+  (** Function to convert potentially floating-point numeric values for logging. *)
+
   val ternop_syntax :
     Ops.prec ->
     Ops.ternop ->
@@ -70,6 +76,7 @@ module Pure_C_config (Input : sig
 
   val use_host_memory : (size_in_bytes:int -> unit Ctypes.ptr -> buffer_ptr) option
   val procs : Low_level.optimized array
+  val full_printf_support : bool
 end) =
 struct
   let procs = Input.procs
@@ -86,6 +93,13 @@ struct
   let includes = [ "<stdio.h>"; "<stdlib.h>"; "<string.h>"; "<math.h>" ]
   let extra_declarations = []
   let typ_of_prec = Ops.c_typ_of_prec
+  let float_log_style = if Input.full_printf_support then "%g" else "%de-3"
+
+  let styled_log_arg doc =
+    if Input.full_printf_support then doc
+    else
+      let open PPrint in
+      string "(int)(" ^^ doc ^^ string " * 1000.0)"
 
   let ident_blacklist =
     let remove_paren s = String.substr_replace_all s ~pattern:"(" ~with_:"" in
@@ -303,16 +317,19 @@ module C_syntax (B : C_syntax_config) = struct
           let pp_args_docs =
             List.map debug_args_docs ~f:(function
               | `Accessor idx -> pp_array_offset idx
-              | `Value v_doc -> v_doc)
+              | `Value v_doc -> B.styled_log_arg v_doc)
           in
           let log_args_for_printf =
-            offset_doc :: (ident_doc ^^ brackets offset_doc) :: new_var :: pp_args_docs
+            offset_doc
+            :: B.styled_log_arg (ident_doc ^^ brackets offset_doc)
+            :: B.styled_log_arg new_var :: pp_args_docs
           in
           let log_doc =
             let log_param_doc = Option.map B.kernel_log_param ~f:(fun (_, name) -> string name) in
             let comment_base_msg = Printf.sprintf "# %s\n" debug in
             let value_base_msg =
-              Printf.sprintf "%s[%%u]{=%%g} = %%g = %s\n" (get_ident tn) debug_val_str
+              Printf.sprintf "%s[%%u]{=%s} = %s = %s\n" (get_ident tn) B.float_log_style
+                B.float_log_style debug_val_str
             in
             let comment_log =
               B.pp_log_statement ~log_param_c_expr_doc:log_param_doc
@@ -443,7 +460,7 @@ module C_syntax (B : C_syntax_config) = struct
         let scope_prec = Lazy.force id.tn.prec in
         let prefix, postfix = B.convert_precision ~from:scope_prec ~to_:prec in
         let v_doc = string prefix ^^ string ("v" ^ Int.to_string id.scope_id) ^^ string postfix in
-        (v_doc ^^ braces (string "=%g"), [ `Value v_doc ])
+        (v_doc ^^ braces (string ("=" ^ B.float_log_style)), [ `Value v_doc ])
     | Get_global (Ops.Merge_buffer { source_node_id }, Some idcs) ->
         let tn = Option.value_exn ~here:[%here] @@ Tn.find ~id:source_node_id in
         let from_prec = Lazy.force tn.prec in
@@ -458,7 +475,7 @@ module C_syntax (B : C_syntax_config) = struct
             (string prefix ^^ string "merge_buffer"
             ^^ brackets (string "%u")
             ^^ string postfix
-            ^^ braces (string "=%g"))
+            ^^ braces (string ("=" ^ B.float_log_style)))
         in
         (expr_doc, [ `Accessor (idcs, dims); `Value access_doc ])
     | Get_global _ -> failwith "Exec_as_cuda: Get_global / FFI NOT IMPLEMENTED YET"
@@ -476,7 +493,7 @@ module C_syntax (B : C_syntax_config) = struct
             (string prefix ^^ ident_doc
             ^^ brackets (string "%u")
             ^^ string postfix
-            ^^ braces (string "=%g"))
+            ^^ braces (string ("=" ^ B.float_log_style)))
         in
         (expr_doc, [ `Accessor (idcs, dims); `Value access_doc ])
     | Constant c ->
@@ -566,15 +583,12 @@ module C_syntax (B : C_syntax_config) = struct
       in
       body :=
         !body ^^ string "FILE* log_file = NULL;" ^^ hardline
-        ^^ group (
-             string ("if (" ^ log_file_var_name ^ ")") ^^ space
-             ^^ braces (
-                  nest 2 (
-                    string ("log_file = fopen(" ^ log_file_var_name ^ ", \"w\");")
-                  )
-                )
-           ) ^^ hardline
-      else body := !body ^^ hardline;
+        ^^ group
+             (string ("if (" ^ log_file_var_name ^ ")")
+             ^^ space
+             ^^ braces (nest 2 (string ("log_file = fopen(" ^ log_file_var_name ^ ", \"w\");"))))
+        ^^ hardline
+    else body := !body ^^ hardline;
 
     (if Utils.debug_log_from_routines () then
        let debug_init_doc =
diff --git a/arrayjit/lib/cc_backend.ml b/arrayjit/lib/cc_backend.ml
@@ -89,6 +89,9 @@ let%diagn_sexp compile ~(name : string) bindings (lowered : Low_level.optimized)
 
     let use_host_memory = use_host_memory
     let procs = [| lowered |]
+    let full_printf_support =
+      not @@ Bool.of_string
+      @@ Utils.get_global_arg ~default:"false" ~arg_name:"prefer_backend_uniformity"
   end)) in
   (* FIXME: do we really want all of them, or only the used ones? *)
   let idx_params = Indexing.bound_symbols bindings in
@@ -111,6 +114,10 @@ let%diagn_sexp compile_batch ~names bindings (lowereds : Low_level.optimized opt
 
     let use_host_memory = use_host_memory
     let procs = Array.filter_opt lowereds
+
+    let full_printf_support =
+      not @@ Bool.of_string
+      @@ Utils.get_global_arg ~default:"false" ~arg_name:"prefer_backend_uniformity"
   end)) in
   (* FIXME: do we really want all of them, or only the used ones? *)
   let idx_params = Indexing.bound_symbols bindings in
diff --git a/arrayjit/lib/cuda_backend.ml b/arrayjit/lib/cuda_backend.ml
@@ -264,6 +264,9 @@ end) : Ir.Backend_impl.Lowered_backend = struct
 
       let use_host_memory = None
       let procs = Input.procs
+      let full_printf_support =
+        not @@ Bool.of_string
+        @@ Utils.get_global_arg ~default:"false" ~arg_name:"prefer_backend_uniformity"
     end)
 
     let logs_to_stdout = true
diff --git a/arrayjit/lib/metal_backend.ml b/arrayjit/lib/metal_backend.ml
@@ -417,6 +417,7 @@ end) : Ir.Backend_impl.Lowered_backend = struct
 
       let use_host_memory = use_host_memory
       let procs = Input.procs
+      let full_printf_support = false
     end)
 
     open PPrint (* Open PPrint locally *)
@@ -553,15 +554,7 @@ end) : Ir.Backend_impl.Lowered_backend = struct
         String.substr_replace_all base ~pattern:"\n" ~with_
       in
       let base_doc = dquotes (string base) in
-      if List.length args_docs > 6 then
-        (* Failsafe for "newComputePipelineStateWithFunction:options:reflection:error: failed:
-           Compiler encountered an internal error". We could break up big log statements in
-           C_syntax, but that's too much complexity. *)
-        group
-          (string metal_log_object_name
-          ^^ string ".log_debug(\"Exceeded max of 6 logging args\")"
-          ^^ semi)
-      else if List.is_empty args_docs then
+      if List.is_empty args_docs then
         group (string metal_log_object_name ^^ string ".log_debug(" ^^ base_doc ^^ rparen ^^ semi)
       else
         group
diff --git a/ocannl_config.example b/ocannl_config.example
@@ -159,4 +159,9 @@ debug_entry_id_pairs=
 
 # For ppx_minidebug: for generating file names. If empty, all logging will be done to stdout,
 # regardless of the value of `log_main_domain_to_stdout`.
-log_file_stem=debug
+log_file_stem=debug
+
+# It is useful for testing to have outputs more uniform across backends even if that criples
+# some backends. Currently, this setting only affects logging from routines to accomodate Metal's
+# shortcoming.
+prefer_backend_uniformity=false
diff --git a/test/micrograd_demo_logging-stream-0-0.expected.log b/test/micrograd_demo_logging-stream-0-0.expected.log
@@ -6,84 +6,80 @@ float *g &[1] = 0xNNNN
 COMMENT: g gradient update
 COMMENT: g fwd
 # n4_c[0] := (a[0] + b[0]);
-n4_c[0]{=MAYBE UNINITIALIZED} = -2 = (a[0]{=-4} + b[0]{=2})
+n4_c[0]{=MAYBE UNINITIALIZED} = -2000e-3 = (a[0]{=-4000e-3} + b[0]{=2000e-3})
 # n19_c[0] := ((n4_c[0] + n4_c[0]) + 1);
-n19_c[0]{=MAYBE UNINITIALIZED} = -3 = ((n4_c[0]{=-2} + n4_c[0]{=-2}) + (float)(1))
+n19_c[0]{=MAYBE UNINITIALIZED} = -3000e-3 = ((n4_c[0]{=-2000e-3} + n4_c[0]{=-2000e-3}) + (float)(1))
 # n42[0] := (b[0] - a[0]);
-n42[0]{=MAYBE UNINITIALIZED} = 6 = (b[0]{=2} - a[0]{=-4})
+n42[0]{=MAYBE UNINITIALIZED} = 6000e-3 = (b[0]{=2000e-3} - a[0]{=-4000e-3})
 # n31[0] := (b[0] + a[0]);
-n31[0]{=MAYBE UNINITIALIZED} = -2 = (b[0]{=2} + a[0]{=-4})
+n31[0]{=MAYBE UNINITIALIZED} = -2000e-3 = (b[0]{=2000e-3} + a[0]{=-4000e-3})
 # n14_d[0] := fma(a[0], b[0], (b[0] * (b[0] * b[0])));
-n14_d[0]{=MAYBE UNINITIALIZED} = 0 = fmaf(a[0]{=-4}, b[0]{=2}, (b[0]{=2} * (b[0]{=2} * b[0]{=2})))
+n14_d[0]{=MAYBE UNINITIALIZED} = 0e-3 = fma(a[0]{=-4000e-3}, b[0]{=2000e-3}, (b[0]{=2000e-3} * (b[0]{=2000e-3} * b[0]{=2000e-3})))
 # n40_d[0] := (fma(n14_d[0], 2, n14_d[0]) + relu(n31[0]));
-n40_d[0]{=MAYBE UNINITIALIZED} = 0 = (fmaf(n14_d[0]{=0}, (float)(2), n14_d[0]{=0}) + fmaxf(0.0, n31[0]{=-2}))
-# e[0] :=
-  (fma(-1, a[0], ((n19_c[0] + 1) + n19_c[0])) -
-   (fma(3, n40_d[0], n40_d[0]) + relu(n42[0])));
-e[0]{=MAYBE UNINITIALIZED} = -7 = (fmaf((float)(-1), a[0]{=-4}, ((n19_c[0]{=-3} + (float)(1)) + n19_c[0]{=-3})) - (fmaf((float)(3), n40_d[0]{=0}, n40_d[0]{=0}) + fmaxf(0.0, n42[0]{=6})))
+n40_d[0]{=MAYBE UNINITIALIZED} = 0e-3 = (fma(n14_d[0]{=0e-3}, (float)(2), n14_d[0]{=0e-3}) + max(0.0f, n31[0]{=-2000e-3}))
+# e[0] :=$  (fma(-1, a[0], ((n19_c[0] + 1) + n19_c[0])) -$   (fma(3, n40_d[0], n40_d[0]) + relu(n42[0])));
+e[0]{=MAYBE UNINITIALIZED} = -7000e-3 = (fma((float)(-1), a[0]{=-4000e-3}, ((n19_c[0]{=-3000e-3} + (float)(1)) + n19_c[0]{=-3000e-3})) - (fma((float)(3), n40_d[0]{=0e-3}, n40_d[0]{=0e-3}) + max(0.0f, n42[0]{=6000e-3})))
 # f[0] := (e[0] * e[0]);
-f[0]{=MAYBE UNINITIALIZED} = 49 = (e[0]{=-7} * e[0]{=-7})
+f[0]{=MAYBE UNINITIALIZED} = 49000e-3 = (e[0]{=-7000e-3} * e[0]{=-7000e-3})
 # g[0] := ((f[0] / 2) + (10 / f[0]));
-g[0]{=MAYBE UNINITIALIZED} = 24.7041 = ((f[0]{=49} / (float)(2)) + ((float)(10) / f[0]{=49}))
+g[0]{=MAYBE UNINITIALIZED} = 24704e-3 = ((f[0]{=49000e-3} / (float)(2)) + ((float)(10) / f[0]{=49000e-3}))
 COMMENT: end
 COMMENT: g zero grads
 # a_grad := 0
-a_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
+a_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
 # b_grad := 0
-b_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
+b_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
 # n4_c_grad := 0
-n4_c_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
+n4_c_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
 # n19_c_grad := 0
-n19_c_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
+n19_c_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
 # n14_d_grad := 0
-n14_d_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
+n14_d_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
 # n40_d_grad := 0
-n40_d_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
+n40_d_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
 # f_grad := 0
-f_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
+f_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
 COMMENT: end
 COMMENT: g bprop
 # f.grad[0] := fma(1, (-10 / (f[0] * f[0])), f.grad[0]);
-f_grad[0]{=MAYBE UNINITIALIZED} = -0.00416493 = fmaf((float)(1), ((float)(-10) / (f[0]{=49} * f[0]{=49})), f_grad[0]{=0})
+f_grad[0]{=MAYBE UNINITIALIZED} = -4e-3 = fma((float)(1), ((float)(-10) / (f[0]{=49000e-3} * f[0]{=49000e-3})), f_grad[0]{=0e-3})
 # f.grad[0] := (f.grad[0] + 0.5);
-f_grad[0]{=MAYBE UNINITIALIZED} = 0.495835 = (f_grad[0]{=-0.00416493} + (float)(0.5))
+f_grad[0]{=MAYBE UNINITIALIZED} = 495e-3 = (f_grad[0]{=-4e-3} + (float)(0.5))
 # n40_d.grad[0] := fma(-1, ((2 * e[0]) * f.grad[0]), n40_d.grad[0]);
-n40_d_grad[0]{=MAYBE UNINITIALIZED} = 6.94169 = fmaf((float)(-1), (((float)(2) * e[0]{=-7}) * f_grad[0]{=0.495835}), n40_d_grad[0]{=0})
+n40_d_grad[0]{=MAYBE UNINITIALIZED} = 6941e-3 = fma((float)(-1), (((float)(2) * e[0]{=-7000e-3}) * f_grad[0]{=495e-3}), n40_d_grad[0]{=0e-3})
 # n40_d.grad[0] := fma(3, (-1 * ((2 * e[0]) * f.grad[0])), n40_d.grad[0]);
-n40_d_grad[0]{=MAYBE UNINITIALIZED} = 27.7668 = fmaf((float)(3), ((float)(-1) * (((float)(2) * e[0]{=-7}) * f_grad[0]{=0.495835})), n40_d_grad[0]{=6.94169})
+n40_d_grad[0]{=MAYBE UNINITIALIZED} = 27766e-3 = fma((float)(3), ((float)(-1) * (((float)(2) * e[0]{=-7000e-3}) * f_grad[0]{=495e-3})), n40_d_grad[0]{=6941e-3})
 # n14_d.grad[0] := (n14_d.grad[0] + n40_d.grad[0]);
-n14_d_grad[0]{=MAYBE UNINITIALIZED} = 27.7668 = (n14_d_grad[0]{=0} + n40_d_grad[0]{=27.7668})
+n14_d_grad[0]{=MAYBE UNINITIALIZED} = 27766e-3 = (n14_d_grad[0]{=0e-3} + n40_d_grad[0]{=27766e-3})
 # n14_d.grad[0] := fma(n40_d.grad[0], 2, n14_d.grad[0]);
-n14_d_grad[0]{=MAYBE UNINITIALIZED} = 83.3003 = fmaf(n40_d_grad[0]{=27.7668}, (float)(2), n14_d_grad[0]{=27.7668})
+n14_d_grad[0]{=MAYBE UNINITIALIZED} = 83300e-3 = fma(n40_d_grad[0]{=27766e-3}, (float)(2), n14_d_grad[0]{=27766e-3})
 # a.grad[0] := fma(n14_d.grad[0], b[0], a.grad[0]);
-a_grad[0]{=MAYBE UNINITIALIZED} = 166.601 = fmaf(n14_d_grad[0]{=83.3003}, b[0]{=2}, a_grad[0]{=0})
+a_grad[0]{=MAYBE UNINITIALIZED} = 166600e-3 = fma(n14_d_grad[0]{=83300e-3}, b[0]{=2000e-3}, a_grad[0]{=0e-3})
 # b.grad[0] := fma(a[0], n14_d.grad[0], b.grad[0]);
-b_grad[0]{=MAYBE UNINITIALIZED} = -333.201 = fmaf(a[0]{=-4}, n14_d_grad[0]{=83.3003}, b_grad[0]{=0})
+b_grad[0]{=MAYBE UNINITIALIZED} = -333201e-3 = fma(a[0]{=-4000e-3}, n14_d_grad[0]{=83300e-3}, b_grad[0]{=0e-3})
 # b.grad[0] := fma((3 * (b[0] * b[0])), n14_d.grad[0], b.grad[0]);
-b_grad[0]{=MAYBE UNINITIALIZED} = 666.402 = fmaf(((float)(3) * (b[0]{=2} * b[0]{=2})), n14_d_grad[0]{=83.3003}, b_grad[0]{=-333.201})
+b_grad[0]{=MAYBE UNINITIALIZED} = 666402e-3 = fma(((float)(3) * (b[0]{=2000e-3} * b[0]{=2000e-3})), n14_d_grad[0]{=83300e-3}, b_grad[0]{=-333201e-3})
 # b.grad[0] := (b.grad[0] + relu_gate(n31[0], n40_d.grad[0]));
-b_grad[0]{=MAYBE UNINITIALIZED} = 666.402 = (b_grad[0]{=666.402} + (n31[0]{=-2} > 0.0 ? n40_d_grad[0]{=27.7668} : 0.0))
+b_grad[0]{=MAYBE UNINITIALIZED} = 666402e-3 = (b_grad[0]{=666402e-3} + ((n31[0]{=-2000e-3} > 0.0f) ? n40_d_grad[0]{=27766e-3} : 0.0f))
 # a.grad[0] := (a.grad[0] + relu_gate(n31[0], n40_d.grad[0]));
-a_grad[0]{=MAYBE UNINITIALIZED} = 166.601 = (a_grad[0]{=166.601} + (n31[0]{=-2} > 0.0 ? n40_d_grad[0]{=27.7668} : 0.0))
-# b.grad[0] :=
-  (b.grad[0] + relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
-b_grad[0]{=MAYBE UNINITIALIZED} = 673.344 = (b_grad[0]{=666.402} + (n42[0]{=6} > 0.0 ? ((float)(-1) * (((float)(2) * e[0]{=-7}) * f_grad[0]{=0.495835})) : 0.0))
-# a.grad[0] :=
-  (a.grad[0] - relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
-a_grad[0]{=MAYBE UNINITIALIZED} = 159.659 = (a_grad[0]{=166.601} - (n42[0]{=6} > 0.0 ? ((float)(-1) * (((float)(2) * e[0]{=-7}) * f_grad[0]{=0.495835})) : 0.0))
+a_grad[0]{=MAYBE UNINITIALIZED} = 166600e-3 = (a_grad[0]{=166600e-3} + ((n31[0]{=-2000e-3} > 0.0f) ? n40_d_grad[0]{=27766e-3} : 0.0f))
+# b.grad[0] :=$  (b.grad[0] + relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
+b_grad[0]{=MAYBE UNINITIALIZED} = 673344e-3 = (b_grad[0]{=666402e-3} + ((n42[0]{=6000e-3} > 0.0f) ? ((float)(-1) * (((float)(2) * e[0]{=-7000e-3}) * f_grad[0]{=495e-3})) : 0.0f))
+# a.grad[0] :=$  (a.grad[0] - relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
+a_grad[0]{=MAYBE UNINITIALIZED} = 159658e-3 = (a_grad[0]{=166600e-3} - ((n42[0]{=6000e-3} > 0.0f) ? ((float)(-1) * (((float)(2) * e[0]{=-7000e-3}) * f_grad[0]{=495e-3})) : 0.0f))
 # n19_c.grad[0] := fma((2 * e[0]), f.grad[0], n19_c.grad[0]);
-n19_c_grad[0]{=MAYBE UNINITIALIZED} = -6.94169 = fmaf(((float)(2) * e[0]{=-7}), f_grad[0]{=0.495835}, n19_c_grad[0]{=0})
+n19_c_grad[0]{=MAYBE UNINITIALIZED} = -6941e-3 = fma(((float)(2) * e[0]{=-7000e-3}), f_grad[0]{=495e-3}, n19_c_grad[0]{=0e-3})
 # n19_c.grad[0] := fma((2 * e[0]), f.grad[0], n19_c.grad[0]);
-n19_c_grad[0]{=MAYBE UNINITIALIZED} = -13.8834 = fmaf(((float)(2) * e[0]{=-7}), f_grad[0]{=0.495835}, n19_c_grad[0]{=-6.94169})
+n19_c_grad[0]{=MAYBE UNINITIALIZED} = -13883e-3 = fma(((float)(2) * e[0]{=-7000e-3}), f_grad[0]{=495e-3}, n19_c_grad[0]{=-6941e-3})
 # n4_c.grad[0] := (n4_c.grad[0] + n19_c.grad[0]);
-n4_c_grad[0]{=MAYBE UNINITIALIZED} = -13.8834 = (n4_c_grad[0]{=0} + n19_c_grad[0]{=-13.8834})
+n4_c_grad[0]{=MAYBE UNINITIALIZED} = -13883e-3 = (n4_c_grad[0]{=0e-3} + n19_c_grad[0]{=-13883e-3})
 # n4_c.grad[0] := (n4_c.grad[0] + n19_c.grad[0]);
-n4_c_grad[0]{=MAYBE UNINITIALIZED} = -27.7668 = (n4_c_grad[0]{=-13.8834} + n19_c_grad[0]{=-13.8834})
+n4_c_grad[0]{=MAYBE UNINITIALIZED} = -27766e-3 = (n4_c_grad[0]{=-13883e-3} + n19_c_grad[0]{=-13883e-3})
 # a.grad[0] := (a.grad[0] + n4_c.grad[0]);
-a_grad[0]{=MAYBE UNINITIALIZED} = 131.892 = (a_grad[0]{=159.659} + n4_c_grad[0]{=-27.7668})
+a_grad[0]{=MAYBE UNINITIALIZED} = 131892e-3 = (a_grad[0]{=159658e-3} + n4_c_grad[0]{=-27766e-3})
 # b.grad[0] := (b.grad[0] + n4_c.grad[0]);
-b_grad[0]{=MAYBE UNINITIALIZED} = 645.577 = (b_grad[0]{=673.344} + n4_c_grad[0]{=-27.7668})
+b_grad[0]{=MAYBE UNINITIALIZED} = 645577e-3 = (b_grad[0]{=673344e-3} + n4_c_grad[0]{=-27766e-3})
 # a.grad[0] := fma(-1, ((2 * e[0]) * f.grad[0]), a.grad[0]);
-a_grad[0]{=MAYBE UNINITIALIZED} = 138.834 = fmaf((float)(-1), (((float)(2) * e[0]{=-7}) * f_grad[0]{=0.495835}), a_grad[0]{=131.892})
+a_grad[0]{=MAYBE UNINITIALIZED} = 138833e-3 = fma((float)(-1), (((float)(2) * e[0]{=-7000e-3}) * f_grad[0]{=495e-3}), a_grad[0]{=131892e-3})
 COMMENT: end
 COMMENT: end
diff --git a/test/ocannl_config b/test/ocannl_config
@@ -2,4 +2,5 @@ randomness_lib=for_tests
 log_main_domain_to_stdout=true
 backend=cc
 log_level=0
-print_decimals_precision=2
+print_decimals_precision=2
+prefer_backend_uniformity=true