ahrefs
diff --git a/‎arrayjit/lib/assignments.ml‎
Lines changed: 5 additions & 1 deletion b/‎arrayjit/lib/assignments.ml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎arrayjit/lib/c_syntax.ml‎
Lines changed: 10 additions & 0 deletions b/‎arrayjit/lib/c_syntax.ml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎arrayjit/lib/low_level.ml‎
Lines changed: 15 additions & 7 deletions b/‎arrayjit/lib/low_level.ml‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎arrayjit/lib/low_level.mli‎
Lines changed: 1 addition & 0 deletions b/‎arrayjit/lib/low_level.mli‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/operation.ml‎
Lines changed: 4 additions & 0 deletions b/‎lib/operation.ml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lib/ppx_cd.ml‎
Lines changed: 13 additions & 0 deletions b/‎lib/ppx_cd.ml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎lib/ppx_op.ml‎
Lines changed: 12 additions & 0 deletions b/‎lib/ppx_op.ml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎lib/shape.ml‎
Lines changed: 1 addition & 0 deletions b/‎lib/shape.ml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/tensor.ml‎
Lines changed: 15 additions & 2 deletions b/‎lib/tensor.ml‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎lib/tensor.mli‎
Lines changed: 5 additions & 0 deletions b/‎lib/tensor.mli‎
Lines changed: 5 additions & 0 deletions
@@ -21,6 +21,7 @@ type buffer = Node of Tn.t | Merge_buffer of Tn.t [@@deriving sexp_of, equal]
 (** Resets a array by performing the specified computation or data fetching. *)
 type fetch_op =
   | Constant of float
+  | Constant_bits of int64  (** Direct bit representation, primarily for uint4x32 *)
   | Constant_fill of float array
       (** Fills in the numbers where the rightmost axis is contiguous. Primes shape inference to
           require the assigned tensor to have the same number of elements as the array, but in case
@@ -326,6 +327,8 @@ let%track4_sexp to_low_level code =
     | Fetch { array; fetch_op = Constant 0.0; dims = _ } -> Low_level.Zero_out array
     | Fetch { array; fetch_op = Constant c; dims } ->
         Low_level.loop_over_dims (Lazy.force dims) ~body:(fun idcs -> set array idcs @@ Constant c)
+    | Fetch { array; fetch_op = Constant_bits i; dims } ->
+        Low_level.loop_over_dims (Lazy.force dims) ~body:(fun idcs -> set array idcs @@ Constant_bits i)
     | Fetch { array; fetch_op = Slice { batch_idx = { static_symbol = idx; _ }; sliced }; dims } ->
         (* TODO: doublecheck this always gets optimized away. *)
         Low_level.loop_over_dims (Lazy.force dims) ~body:(fun idcs ->
@@ -335,7 +338,7 @@ let%track4_sexp to_low_level code =
             set array idcs @@ Embed_index (Iterator s.static_symbol))
     | Fetch { array; fetch_op = Embed_self_id; dims } ->
         Low_level.loop_over_dims (Lazy.force dims) ~body:(fun idcs ->
-            set array idcs @@ Constant (Float.of_int array.id))
+            set array idcs @@ Constant_bits (Int64.of_int array.id))
     | Fetch { array; fetch_op = Range_over_offsets; dims = (lazy dims) } ->
         Low_level.loop_over_dims dims ~body:(fun idcs ->
             let offset = Indexing.reflect_projection ~dims ~projection:idcs in
@@ -422,6 +425,7 @@ let to_doc ?name ?static_indices () c =
   let doc_of_fetch_op (op : fetch_op) =
     match op with
     | Constant f -> string (Float.to_string f)
+    | Constant_bits i -> string (Printf.sprintf "bits(%LdLL)" i)
     | Constant_fill values ->
         let values_str =
           String.concat ~sep:", " (Array.to_list (Array.map values ~f:Float.to_string))
 
@@ -782,6 +782,11 @@ module C_syntax (B : C_syntax_config) = struct
           else string prefix ^^ string c_str ^^ string postfix
         in
         ([], expr)
+    | Constant_bits i ->
+        let from_prec = Ops.int64 in
+        let prefix, postfix = B.convert_precision ~from:from_prec ~to_:prec in
+        let expr = string prefix ^^ string (Printf.sprintf "%LdLL" i) ^^ string postfix in
+        ([], expr)
     | Embed_index idx ->
         let from_prec = Ops.int32 in
         let prefix, postfix = B.convert_precision ~from:from_prec ~to_:prec in
@@ -859,6 +864,11 @@ module C_syntax (B : C_syntax_config) = struct
         let prefix, postfix = B.convert_precision ~from:from_prec ~to_:prec in
         let c_str = Printf.sprintf "%.16g" c in
         (string prefix ^^ string c_str ^^ string postfix, [])
+    | Constant_bits i ->
+        let from_prec = Ops.int64 in
+        let prefix, postfix = B.convert_precision ~from:from_prec ~to_:prec in
+        let expr = string prefix ^^ string (Printf.sprintf "%LdLL" i) ^^ string postfix in
+        (expr, [])
     | Embed_index idx ->
         let idx_doc = pp_axis_index idx in
         ((if PPrint.is_empty idx_doc then string "0" else idx_doc), [])
 
@@ -56,6 +56,7 @@ and scalar_t =
   | Binop of Ops.binop * scalar_t * scalar_t
   | Unop of Ops.unop * scalar_t
   | Constant of float
+  | Constant_bits of int64  (** Direct bit representation, primarily for uint4x32 *)
   | Embed_index of Indexing.axis_index
 [@@deriving sexp_of, equal, compare]
 
@@ -177,7 +178,7 @@ let is_constexpr_comp traced_store llsc =
     | Ternop (_, v1, v2, v3) -> loop v1 && loop v2 && loop v3
     | Binop (_, v1, v2) -> loop v1 && loop v2
     | Unop (_, v) -> loop v
-    | Constant _ -> true
+    | Constant _ | Constant_bits _ -> true
     | Embed_index _ -> false
   in
   loop llsc
@@ -198,7 +199,7 @@ let is_accessing_comp traced_store llsc =
     | Ternop (_, v1, v2, v3) -> loop v1 || loop v2 || loop v3
     | Binop (_, v1, v2) -> loop v1 || loop v2
     | Unop (_, v) -> loop v
-    | Constant _ -> false
+    | Constant _ | Constant_bits _ -> false
     | Embed_index _ -> false
   in
   loop llsc
@@ -214,7 +215,7 @@ let is_complex_comp traced_store llsc =
   | Ternop (_, v1, v2, v3) -> accessing v1 || accessing v2 || accessing v3
   | Binop (_, v1, v2) -> accessing v1 || accessing v2
   | Unop (_, v) -> accessing v
-  | Constant _ -> false
+  | Constant _ | Constant_bits _ -> false
   | Embed_index _ -> false
 
 let is_scalar_dims tn = Array.for_all ~f:(( = ) 1) @@ Lazy.force tn.Tn.dims
@@ -332,7 +333,7 @@ let visit_llc traced_store ~merge_node_id reverse_node_map ~max_visits llc =
   and loop_scalar env (access_pos : int array option) llsc =
     let loop = loop_scalar env access_pos in
     match llsc with
-    | Constant _ -> ()
+    | Constant _ | Constant_bits _ -> ()
     | Get (ptr, indices) ->
         let traced : traced_array = get_node traced_store ptr in
         let at_pos = lookup env indices in
@@ -491,7 +492,7 @@ let%diagn2_sexp check_and_store_virtual computations_table traced static_indices
     | Staged_compilation _ -> raise @@ Non_virtual 8
   and loop_scalar ~env_dom llsc =
     match llsc with
-    | Constant _ -> ()
+    | Constant _ | Constant_bits _ -> ()
     | Get (tn, idcs) ->
         if Tn.equal tn top_tn then check_idcs idcs
         else
@@ -648,7 +649,7 @@ let%track7_sexp inline_computation ~id
       | Staged_compilation _ -> Some llc
     and loop_scalar env llsc : scalar_t =
       match llsc with
-      | Constant _ -> llsc
+      | Constant _ | Constant_bits _ -> llsc
       | Get (tn, indices) when Tn.equal tn traced.tn ->
           assert ([%equal: Indexing.axis_index array option] (Some indices) def_args);
           Get_local id
@@ -740,6 +741,7 @@ let virtual_llc computations_table traced_store reverse_node_map static_indices
   and loop_scalar ~process_for (llsc : scalar_t) : scalar_t =
     match llsc with
     | Constant _ -> llsc
+    | Constant_bits _ -> llsc
     | Get (tn, _) when Set.mem process_for tn ->
         (* [Get_local] will replace this [Get] during [inline_computation] if [tn] remains
            virtual. *)
@@ -829,6 +831,7 @@ let cleanup_virtual_llc reverse_node_map ~static_indices (llc : t) : t =
     let loop = loop_scalar ~balanced ~env_dom in
     match llsc with
     | Constant _ -> llsc
+    | Constant_bits _ -> llsc
     | Get (a, indices) ->
         (* TODO(#296): this should probably already be Never_virtual, we could assert it. *)
         Tn.update_memory_mode a Never_virtual 17;
@@ -874,6 +877,7 @@ let rec substitute_float ~var ~value llsc =
   else
     match llsc with
     | Constant _ -> llsc
+    | Constant_bits _ -> llsc
     | Get (_ptr, _indices) -> llsc
     | Local_scope opts -> Local_scope { opts with body = loop_proc opts.body }
     | Get_local _ -> llsc
@@ -937,6 +941,7 @@ let simplify_llc llc =
     in
     match llsc' with
     | Constant _ -> llsc
+    | Constant_bits _ -> llsc
     | Get (_ptr, _indices) -> llsc
     | Local_scope { id; body = Set_local (id2, v); _ } when equal_scope_id id id2 -> loop_scalar v
     | Local_scope { id; body = Seq (Set_local (id1, v1), Set_local (id2, v2)); _ }
@@ -1048,6 +1053,7 @@ let simplify_llc llc =
     let loop = check_float tn in
     match llsc with
     | Constant c -> check_constant tn c
+    | Constant_bits _ -> () (* No check needed for bit constants *)
     | Local_scope { body; _ } -> check_proc body
     | Ternop (_, v1, v2, v3) ->
         loop v1;
@@ -1170,7 +1176,7 @@ let get_ident_within_code ?no_dots ?(blacklist = []) llcs =
         loop_scalar f2
     | Unop (_, f) -> loop_scalar f
     | Get_local { tn; _ } -> visit tn
-    | Constant _ | Embed_index _ -> ()
+    | Constant _ | Constant_bits _ | Embed_index _ -> ()
   in
   Array.iter ~f:loop llcs;
   let repeating_nograd_idents =
@@ -1260,6 +1266,7 @@ let to_doc_cstyle ?name ?static_indices () llc =
         group (doc_ident source ^^ string ".merge" ^^ brackets (pp_indices idcs))
     | Get (tn, idcs) -> group (doc_ident tn ^^ brackets (pp_indices idcs))
     | Constant c -> string (Printf.sprintf "%.16g" c)
+    | Constant_bits i -> string (Printf.sprintf "0x%LX" i)
     | Embed_index idx ->
         let idx_doc = pp_axis_index idx in
         if PPrint.is_empty idx_doc then string "0" else idx_doc
@@ -1351,6 +1358,7 @@ let to_doc ?name ?static_indices () llc =
         group (doc_ident source ^^ string ".merge" ^^ brackets (pp_indices idcs))
     | Get (tn, idcs) -> group (doc_ident tn ^^ brackets (pp_indices idcs))
     | Constant c -> string (Printf.sprintf "%.16g" c)
+    | Constant_bits i -> string (Printf.sprintf "0x%LX" i)
     | Embed_index idx ->
         let idx_doc = pp_axis_index idx in
         if PPrint.is_empty idx_doc then string "0" else idx_doc
 
@@ -50,6 +50,7 @@ and scalar_t =
   | Binop of Ops.binop * scalar_t * scalar_t
   | Unop of Ops.unop * scalar_t
   | Constant of float
+  | Constant_bits of int64  (** Direct bit representation, primarily for uint4x32 *)
   | Embed_index of Indexing.axis_index
 [@@deriving sexp_of, equal, compare]
 
 
@@ -121,6 +121,7 @@ module NDO_before_pow = struct
   let ( + ) t1 t2 = add ~grad_spec:Prohibit_grad t1 t2 ()
   let ( !. ) f = Tensor.number ~grad_spec:Prohibit_grad f
   let ( !.. ) ?label i = Tensor.number ?label ~grad_spec:Prohibit_grad @@ Float.of_int i
+  let ( !% ) ?label i = Tensor.bits ?label ~grad_spec:Prohibit_grad i
   let ( - ) t1 t2 = sub ~grad_spec:Prohibit_grad t1 t2 ()
 
   let ( ~- ) ?label t =
@@ -447,6 +448,7 @@ module DO = struct
   let fma ?label t1 t2 t3 = fma ~grad_spec:If_needed ?label t1 t2 t3 ()
   let ( !. ) f = Tensor.number ~grad_spec:If_needed f
   let ( !.. ) ?label i = Tensor.number ?label ~grad_spec:If_needed @@ Float.of_int i
+  let ( !% ) ?label i = Tensor.bits ?label ~grad_spec:If_needed i
   let ( !@ ) = embed_symbol
   let ( - ) ?label t1 t2 = sub ~grad_spec:If_needed ?label t1 t2 ()
 
@@ -549,6 +551,7 @@ module TDSL = struct
 
   let term = Tensor.term ~grad_spec:If_needed
   let number = Tensor.number ~grad_spec:If_needed
+  let bits = Tensor.bits ~grad_spec:If_needed
   let ndarray = Tensor.ndarray ~grad_spec:If_needed
   let threefry4x32 = threefry4x32 ~grad_spec:If_needed
   let uint4x32_to_prec_uniform = uint4x32_to_prec_uniform ~grad_spec:If_needed
@@ -606,6 +609,7 @@ module NTDSL = struct
   let wrap = wrap ~grad_spec:Prohibit_grad
   let wrap_padded = wrap_padded ~grad_spec:Prohibit_grad
   let rebatch = rebatch ~grad_spec:Prohibit_grad
+  let bits = Tensor.bits ~grad_spec:Prohibit_grad
   let threefry4x32 = threefry4x32 ~grad_spec:Prohibit_grad
   let uint4x32_to_prec_uniform = uint4x32_to_prec_uniform ~grad_spec:Prohibit_grad
   let embed_self_id = embed_self_id
 
@@ -784,6 +784,8 @@ let translate ?ident_label (expr : expression) : result =
     match expr with
     | { pexp_desc = Pexp_constant (Pconst_float _); _ } ->
         { default_result with expr = [%expr NTDSL.number [%e expr]]; slot = Scalar }
+    | { pexp_desc = Pexp_constant (Pconst_integer (_, Some ('L' | 'l'))); _ } ->
+        { default_result with expr = [%expr NTDSL.bits [%e expr]]; slot = Scalar }
     | { pexp_desc = Pexp_constant (Pconst_integer _); _ } ->
         { default_result with expr = [%expr NTDSL.number (Float.of_int [%e expr])]; slot = Scalar }
     | [%expr
@@ -797,6 +799,17 @@ let translate ?ident_label (expr : expression) : result =
           expr = [%expr NTDSL.number ~axis_label:[%e axis] [%e f]];
           slot = Scalar;
         }
+    | [%expr
+        [%e? { pexp_desc = Pexp_constant (Pconst_char ch); pexp_loc; _ }]
+          [%e? { pexp_desc = Pexp_constant (Pconst_integer (_, Some ('L' | 'l'))); _ } as i]] ->
+        let axis =
+          Ast_helper.Exp.constant ~loc:pexp_loc (Pconst_string (String.of_char ch, pexp_loc, None))
+        in
+        {
+          default_result with
+          expr = [%expr NTDSL.bits ~axis_label:[%e axis] [%e i]];
+          slot = Scalar;
+        }
     | [%expr
         [%e? { pexp_desc = Pexp_constant (Pconst_char ch); pexp_loc; _ }]
           [%e? { pexp_desc = Pexp_constant (Pconst_integer _); _ } as i]] ->
 
@@ -72,6 +72,8 @@ let rec translate ~num_configs ~is_toplevel ~has_config ?label expr =
   match expr with
   | { pexp_desc = Pexp_constant (Pconst_float _); _ } ->
       (no_vbs, [%expr TDSL.number ?label:[%e opt_expr ~loc label] [%e expr]])
+  | { pexp_desc = Pexp_constant (Pconst_integer (_, Some ('L' | 'l'))); _ } ->
+      (no_vbs, [%expr TDSL.bits [%e expr]])
   | { pexp_desc = Pexp_constant (Pconst_integer _); _ } ->
       (no_vbs, [%expr TDSL.number (Float.of_int [%e expr])])
   | [%expr
@@ -81,6 +83,16 @@ let rec translate ~num_configs ~is_toplevel ~has_config ?label expr =
         Ast_helper.Exp.constant ~loc:pexp_loc (Pconst_string (String.of_char ch, pexp_loc, None))
       in
       (no_vbs, [%expr TDSL.number ?label:[%e opt_expr ~loc label] ~axis_label:[%e axis] [%e f]])
+  | [%expr
+      [%e? { pexp_desc = Pexp_constant (Pconst_char ch); pexp_loc; _ }]
+        [%e? { pexp_desc = Pexp_constant (Pconst_integer (_, Some ('L' | 'l'))); _ } as i]] ->
+      let axis =
+        Ast_helper.Exp.constant ~loc:pexp_loc (Pconst_string (String.of_char ch, pexp_loc, None))
+      in
+      ( no_vbs,
+        [%expr
+          TDSL.bits ?label:[%e opt_expr ~loc label] ~axis_label:[%e axis] [%e i]]
+      )
   | [%expr
       [%e? { pexp_desc = Pexp_constant (Pconst_char ch); pexp_loc; _ }]
         [%e? { pexp_desc = Pexp_constant (Pconst_integer _); _ } as i]] ->
 
@@ -410,6 +410,7 @@ let%debug4_sexp get_inequalities ({ shape = cur_sh; logic; id = _ } as _upd : up
   match logic with
   | Terminal (Fetch Range_over_offsets) -> (Row.dim_map_empty, mark_terminal ())
   | Terminal (Fetch (Constant _)) -> (Row.dim_map_empty, mark_terminal ())
+  | Terminal (Fetch (Constant_bits _)) -> (Row.dim_map_empty, mark_terminal ())
   | Terminal (Data (Reshape nd)) ->
       ( dim_map_empty,
         Rows_constr
 
@@ -477,7 +477,7 @@ let%track7_sexp term ?init_data ?fetch_op ?grad_spec ?(label = []) ?(top_down_pr
     match fetch_op with
     | None -> Asgns.empty_comp
     | Some
-        (( Constant _ | Slice _ | Embed_symbol _ | Embed_self_id | Range_over_offsets
+        (( Constant _ | Constant_bits _ | Slice _ | Embed_symbol _ | Embed_self_id | Range_over_offsets
          | Constant_fill _ ) as fetch_op) ->
         Asgns.to_comp @@ Fetch { array = v; fetch_op; dims }
   in
@@ -508,6 +508,19 @@ let%track7_sexp number ?(label = []) ?axis_label ?(grad_spec = Prohibit_grad) c
     if exceeds_fp16_cutoff c then Tn.update_infer_prec ~only_if:is_up_to_fp16 t.value (lazy single));
   t
 
+let%track7_sexp bits ?(label = []) ?axis_label ?(grad_spec = Prohibit_grad) i : t =
+  (* Use Constant_bits for exact bit representation, primarily for uint4x32 *)
+  let label = Int64.to_string i :: label in
+  let fetch_op = Ir.Assignments.Constant_bits i in
+  let t = term ~label ~grad_spec ~batch_dims:[] ~input_dims:[] ~fetch_op in
+  let t =
+    match axis_label with
+    | None -> t ~output_dims:[ 1 ] ()
+    | Some axis_label -> t ~output_axes:[ (axis_label, 1) ] ()
+  in
+  Tn.update_memory_mode t.value Effectively_constant 24;
+  t
+
 let constant_fill ~debug values =
   match Array.length values with
   | 0 -> (None, None)
@@ -632,7 +645,7 @@ let set_random_seed ?seed () =
   let seed =
     Option.value ~default:42 @@ Option.first_some seed Utils.settings.fixed_state_for_init
   in
-  let res = number ~label:[ "random_seed" ] ~grad_spec:Prohibit_grad (Int.to_float seed) in
+  let res = bits ~label:[ "random_seed" ] ~grad_spec:Prohibit_grad (Int64.of_int seed) in
   Tn.update_prec res.value Ir.Ops.uint4x32;
   random_seed := Some res
 
 
@@ -190,6 +190,11 @@ val number : ?label:string list -> ?axis_label:string -> ?grad_spec:grad_spec ->
 (** A number: a tensor with a single axis of one dimension, initialized to the given value.
     [grad_spec] is by default [Prohibit_grad]. *)
 
+val bits : ?label:string list -> ?axis_label:string -> ?grad_spec:grad_spec -> int64 -> t
+(** A number with exact bit representation: a tensor with a single axis of one dimension,
+    initialized to the given int64 value. Useful for initializing uint4x32 tensors where exact bit
+    patterns matter. [grad_spec] is by default [Prohibit_grad]. *)
+
 val ndarray : ?grad_spec:grad_spec -> float array -> op_fun
 (** A tensor with an explicit shape, initialized to the given values. Omitted shape rows default to
     no axes. [grad_spec] is by default [Prohibit_grad]. If [strict] is [true] (the default), the