ahrefs
diff --git a/‎arrayjit/lib/assignments.ml‎
Lines changed: 68 additions & 1 deletion b/‎arrayjit/lib/assignments.ml‎
Lines changed: 68 additions & 1 deletion
diff --git a/‎arrayjit/lib/c_syntax.ml‎
Lines changed: 39 additions & 14 deletions b/‎arrayjit/lib/c_syntax.ml‎
Lines changed: 39 additions & 14 deletions
@@ -66,6 +66,12 @@ and t =
       rhs : buffer;
       projections : Indexing.projections Lazy.t;
     }
+  | Set_vec_unop of {
+      op : Ops.vec_unop;
+      lhs : Tn.t;
+      rhs : buffer;
+      projections : Indexing.projections Lazy.t;
+    }
   | Fetch of { array : Tn.t; fetch_op : fetch_op; dims : int array Lazy.t }
 [@@deriving sexp_of]
 
@@ -116,6 +122,7 @@ let%debug3_sexp context_nodes ~(use_host_memory : 'a option) (asgns : t) : Tn.t_
     | Seq (t1, t2) -> loop t1 + loop t2
     | Block_comment (_, t) -> loop t
     | Accum_unop { lhs; rhs; _ } -> Set.union (one lhs) (of_node rhs)
+    | Set_vec_unop { lhs; rhs; _ } -> Set.union (one lhs) (of_node rhs)
     | Accum_binop { lhs; rhs1; rhs2; _ } ->
         Set.union_list (module Tn) [ one lhs; of_node rhs1; of_node rhs2 ]
     | Accum_ternop { lhs; rhs1; rhs2; rhs3; _ } ->
@@ -138,6 +145,7 @@ let%debug3_sexp guess_output_nodes (asgns : t) : Tn.t_set =
         (i1 + i2, o1 + o2 - (i1 + i2))
     | Block_comment (_, t) -> loop t
     | Accum_unop { lhs; rhs; _ } -> (of_node rhs, one lhs)
+    | Set_vec_unop { lhs; rhs; _ } -> (of_node rhs, one lhs)
     | Accum_binop { lhs; rhs1; rhs2; _ } -> (of_node rhs1 + of_node rhs2, one lhs)
     | Accum_ternop { lhs; rhs1; rhs2; rhs3; _ } ->
         (of_node rhs1 + of_node rhs2 + of_node rhs3, one lhs)
@@ -254,6 +262,49 @@ let%diagn2_sexp to_low_level code =
           projections
     | Accum_unop { initialize_neutral; accum; op; lhs; rhs; projections } ->
         loop_accum ~initialize_neutral ~accum ~op:(Ops.Unop op) ~lhs ~rhses:[| rhs |] projections
+    | Set_vec_unop { op; lhs; rhs; projections } ->
+        (* Handle vector unary operations *)
+        let projections = Lazy.force projections in
+        let basecase rev_iters =
+          let subst_map =
+            let loop_iters = Array.of_list_rev rev_iters in
+            Array.mapi projections.product_iterators ~f:(fun i prod_iter ->
+                (prod_iter, Indexing.Iterator loop_iters.(i)))
+            |> Array.to_list
+            |> Map.of_alist_exn (module Indexing.Symbol)
+          in
+          let subst_index = function
+            | Indexing.Fixed_idx _ as idx -> idx
+            | Indexing.Iterator s as idx -> Option.value ~default:idx (Map.find subst_map s)
+            | Indexing.Affine { symbols; offset } ->
+                Indexing.Affine { symbols; offset }
+          in
+          let lhs_idcs = Array.map projections.project_lhs ~f:subst_index in
+          let rhs_idcs = Array.map projections.project_rhs.(0) ~f:subst_index in
+          let open Low_level in
+          let rhs_ll = get rhs rhs_idcs in
+          (* For now, we know the only vec_unop is Uint4x32_to_prec_uniform *)
+          let length = match op with
+            | Ops.Uint4x32_to_prec_uniform ->
+                (* TODO: Calculate length based on precision *)
+                16  (* Default for now, should be calculated from target precision *)
+          in
+          Set_from_vec { tn = lhs; idcs = lhs_idcs; length; vec_unop = op; arg = rhs_ll; debug = "" }
+        in
+        let rec for_loop rev_iters = function
+          | [] -> basecase rev_iters
+          | d :: product ->
+              let index = Indexing.get_symbol () in
+              For_loop
+                {
+                  index;
+                  from_ = 0;
+                  to_ = d - 1;
+                  body = for_loop (index :: rev_iters) product;
+                  trace_it = true;
+                }
+        in
+        for_loop [] (Array.to_list projections.product_space)
     | Noop -> Low_level.Noop
     | Block_comment (s, c) -> Low_level.unflat_lines [ Comment s; loop c; Comment "end" ]
     | Seq (c1, c2) ->
@@ -297,7 +348,7 @@ let flatten c =
     | Noop -> []
     | Seq (c1, c2) -> loop c1 @ loop c2
     | Block_comment (s, c) -> Block_comment (s, Noop) :: loop c
-    | (Accum_ternop _ | Accum_binop _ | Accum_unop _ | Fetch _) as c -> [ c ]
+    | (Accum_ternop _ | Accum_binop _ | Accum_unop _ | Set_vec_unop _ | Fetch _) as c -> [ c ]
   in
   loop c
 
@@ -331,6 +382,8 @@ let get_ident_within_code ?no_dots c =
         List.iter ~f:visit [ lhs; tn rhs1; tn rhs2 ]
     | Accum_unop { initialize_neutral = _; accum = _; op = _; lhs; rhs; projections = _ } ->
         List.iter ~f:visit [ lhs; tn rhs ]
+    | Set_vec_unop { op = _; lhs; rhs; projections = _ } ->
+        List.iter ~f:visit [ lhs; tn rhs ]
     | Fetch { array; fetch_op = _; dims = _ } -> visit array
   in
   loop c;
@@ -426,6 +479,20 @@ let to_doc ?name ?static_indices () c =
         ^^ (if not (String.equal proj_spec ".") then string (" ~logic:\"" ^ proj_spec ^ "\"")
             else empty)
         ^^ string ";" ^^ break 1
+    | Set_vec_unop { op; lhs; rhs; projections } ->
+        let proj_spec =
+          if Lazy.is_val projections then (Lazy.force projections).debug_info.spec
+          else "<not-in-yet>"
+        in
+        string (ident lhs)
+        ^^ string " := "
+        ^^ string (Ops.vec_unop_cd_syntax op)
+        ^^ string "("
+        ^^ string (buffer_ident rhs)
+        ^^ string ")"
+        ^^ (if not (String.equal proj_spec ".") then string (" ~logic:\"" ^ proj_spec ^ "\"")
+            else empty)
+        ^^ string ";" ^^ break 1
     | Fetch { array; fetch_op; dims = _ } ->
         string (ident array) ^^ string " := " ^^ doc_of_fetch_op fetch_op ^^ string ";" ^^ break 1
   in
 
@@ -46,6 +46,7 @@ module type C_syntax_config = sig
 
   val binop_syntax : Ops.prec -> Ops.binop -> PPrint.document -> PPrint.document -> PPrint.document
   val unop_syntax : Ops.prec -> Ops.unop -> PPrint.document -> PPrint.document
+  val vec_unop_syntax : Ops.prec -> Ops.vec_unop -> string
   val convert_precision : from:Ops.prec -> to_:Ops.prec -> string * string
 
   val kernel_log_param : (string * string) option
@@ -214,10 +215,17 @@ struct
               Neg;
               Tanh_approx;
               Not;
-              Uint4x32_to_prec_uniform;
             ]
           ~f:(fun op ->
             let p, _ = try Ops.unop_c_syntax prec op with Invalid_argument _ -> ("", "") in
+            if String.is_suffix p ~suffix:"(" then functions := Set.add !functions (remove_paren p));
+        List.iter
+          Ops.
+            [
+              Uint4x32_to_prec_uniform;
+            ]
+          ~f:(fun op ->
+            let p, _ = try Ops.vec_unop_c_syntax prec op with Invalid_argument _ -> ("", "") in
             if String.is_suffix p ~suffix:"(" then functions := Set.add !functions (remove_paren p)));
     Set.to_list !functions
 
@@ -410,6 +418,10 @@ struct
         let open PPrint in
         group (string op_prefix ^^ v ^^ string op_suffix)
 
+  let vec_unop_syntax prec op =
+    let prefix, _ = Ops.vec_unop_c_syntax prec op in
+    prefix
+
   let convert_precision = Ops.c_convert_precision
   let kernel_log_param = Some ("const char*", "log_file_name")
   let log_involves_file_management = true
@@ -569,6 +581,32 @@ module C_syntax (B : C_syntax_config) = struct
             ~args_docs:[]
         else string "/* " ^^ string message ^^ string " */"
     | Staged_compilation callback -> callback ()
+    | Set_from_vec { tn; idcs; length; vec_unop; arg; debug = _ } ->
+        let ident_doc = string (get_ident tn) in
+        let dims = Lazy.force tn.dims in
+        let prec = Lazy.force tn.prec in
+        let arg_prec = Ops.uint4x32 in
+        let local_defs, arg_doc = pp_float arg_prec arg in
+        (* Generate the function call *)
+        let func_name = string (Ops.vec_unop_c_syntax prec vec_unop |> fst) in
+        (* Generate assignments for each output element *)
+        let assignments = 
+          let open PPrint in
+          let vec_var = string "vec_result" in
+          let vec_typ = string (B.typ_of_prec prec ^ Int.to_string length) in
+          let vec_decl = vec_typ ^^ space ^^ vec_var ^^ string " = " ^^ func_name ^^ arg_doc ^^ semi in
+          let elem_assigns = 
+            List.init length ~f:(fun i ->
+              let elem_idcs = Array.copy idcs in
+              (match elem_idcs.(Array.length elem_idcs - 1) with
+               | Fixed_idx idx -> elem_idcs.(Array.length elem_idcs - 1) <- Fixed_idx (idx + i)
+               | _ -> failwith "Set_from_vec: last index must be Fixed_idx");
+              let offset_doc = pp_array_offset (elem_idcs, dims) in
+              ident_doc ^^ brackets offset_doc ^^ string " = " ^^ vec_var ^^ string ("." ^ Printf.sprintf "s%d" i) ^^ semi)
+          in
+          vec_decl ^^ hardline ^^ separate hardline elem_assigns
+        in
+        if PPrint.is_empty local_defs then assignments else local_defs ^^ hardline ^^ assignments
     | Set_local ({ scope_id; tn = { prec; _ } }, value) ->
         let local_defs, value_doc = pp_float (Lazy.force prec) value in
         let assignment =
@@ -647,12 +685,6 @@ module C_syntax (B : C_syntax_config) = struct
         in
         let expr = group (B.binop_syntax prec op e1 e2) in
         (defs, expr)
-    | Unop (Ops.Uint4x32_to_prec_uniform, v) ->
-        let defs, expr_v = pp_float Ops.uint4x32 v in
-        let expr =
-          string ("uint4x32_to_" ^ Ops.prec_string prec ^ "_uniform(") ^^ expr_v ^^ string ")"
-        in
-        (defs, expr)
     | Unop (op, v) ->
         let defs, expr_v = pp_float prec v in
         let expr = group (B.unop_syntax prec op expr_v) in
@@ -720,13 +752,6 @@ module C_syntax (B : C_syntax_config) = struct
         let v1_doc, idcs1 = debug_float prec v1 in
         let v2_doc, idcs2 = debug_float prec v2 in
         (B.binop_syntax prec op v1_doc v2_doc, idcs1 @ idcs2)
-    | Unop (Ops.Uint4x32_to_prec_uniform, v) ->
-        let v_doc, idcs = debug_float Ops.uint4x32 v in
-        let expr_doc =
-          string ("uint4x32_to_" ^ Ops.prec_string prec ^ "_uniform(")
-          ^^ v_doc ^^ string "){=" ^^ string B.float_log_style ^^ string "}"
-        in
-        (expr_doc, idcs)
     | Unop (op, v) ->
         let v_doc, idcs = debug_float prec v in
         (B.unop_syntax prec op v_doc, idcs)