Migrate the syntax away from operators for unary primitive ops and relu

lukstafi · lukstafi · commit 842daaadbbd7 · 2025-01-27T18:10:21.000+01:00
diff --git a/arrayjit/lib/ops.ml b/arrayjit/lib/ops.ml
@@ -273,9 +273,9 @@ let binop_cd_fallback_syntax = function
   | Relu_gate -> "relu_gate"
   | Cmplt -> "lt"
   | Cmpne -> "le"
-  | Or -> "orf"
-  | And -> "andf"
-  | Mod -> "modf"
+  | Or -> "or_"
+  | And -> "and_"
+  | Mod -> "mod_"
   | Max -> "max"
   | Min -> "min"
 (* | Shl -> "shlf" *)
diff --git a/bin/hello_world.ml b/bin/hello_world.ml
@@ -51,7 +51,7 @@ let hello3 () =
   let stream = Backend.(new_stream @@ get_device ~ordinal:0) in
   let ctx = Backend.make_context stream in
   (* Hey is inferred to be a matrix. *)
-  let hey = TDSL.O.(!~"hey") in
+  let hey = Tensor.param "hey" in
   let zero_to_twenty = TDSL.range 20 in
   let y = TDSL.O.(( + ) ~label:[ "y" ] (hey * zero_to_twenty) zero_to_twenty) in
   Train.set_hosted hey.value;
diff --git a/bin/micrograd_basic.ml b/bin/micrograd_basic.ml
@@ -43,8 +43,8 @@ let%diagn_sexp _suspended () : unit =
   let%op d = (a *. b) + (b **. 3) in
   let%op c = c + c + 1 in
   let%op c = c + 1 + c + ~-a in
-  let%op d = d + (d *. 2) + ?/(b + a) in
-  let%op d = d + (3 *. d) + ?/(b - a) in
+  let%op d = d + (d *. 2) + relu (b + a) in
+  let%op d = d + (3 *. d) + relu (b - a) in
   let%op e = c - d in
   let%op f = e *. e in
   let%op g = f /. 2 in
diff --git a/bin/micrograd_demo.ml b/bin/micrograd_demo.ml
@@ -39,7 +39,7 @@ let experiment seed ~no_batch_shape_inference ~use_builtin_weight_decay () =
   let moons_classes = TDSL.init_const ~l:"moons_classes" ?b ~o:[ 1 ] moons_classes in
   let batch_n, bindings = IDX.get_static_symbol ~static_range:n_batches IDX.empty in
   let step_n, bindings = IDX.get_static_symbol bindings in
-  let%op mlp x = "b3" + ("w3" * ?/("b2" hid_dim + ("w2" * ?/("b1" hid_dim + ("w1" * x))))) in
+  let%op mlp x = "b3" + ("w3" * relu ("b2" hid_dim + ("w2" * relu ("b1" hid_dim + ("w1" * x))))) in
   let%op moons_input = moons_flat @| batch_n in
   (* Tell shape inference to make a minibatch axis. *)
   let () =
@@ -56,7 +56,7 @@ let experiment seed ~no_batch_shape_inference ~use_builtin_weight_decay () =
   let losses = ref [] in
   let log_losses = ref [] in
   let learning_rates = ref [] in
-  let%op margin_loss = ?/(1 - (moons_class *. mlp moons_input)) in
+  let%op margin_loss = relu (1 - (moons_class *. mlp moons_input)) in
   (* We don't need a regression loss formula thanks to weight_decay built into the sgd_update
      computation. *)
   let scalar_loss, weight_decay =
diff --git a/bin/moons_benchmark.ml b/bin/moons_benchmark.ml
@@ -76,11 +76,13 @@ let classify_moons ~seed ~on_device ~inlining_cutoff ~num_streams ~batch_size ~b
   let init_time = Time_now.nanoseconds_since_unix_epoch () in
   let%op mlp x =
     "w4"
-    * ?/("b3" hid_dim_3 + ("w3" * ?/("b2" hid_dim_2 + ("w2" * ?/("b1" hid_dim_1 + ("w1" * x))))))
+    * relu
+        ("b3" hid_dim_3
+        + ("w3" * relu ("b2" hid_dim_2 + ("w2" * relu ("b1" hid_dim_1 + ("w1" * x))))))
   in
   (* TINY for debugging: *)
-  (* let%op mlp x = "w2" * ?/("b1" hid_dim + ("w1" * x)) in *)
-  let%op loss_fn ~output ~expectation = ?/(!..1 - (expectation *. output)) in
+  (* let%op mlp x = "w2" * relu("b1" hid_dim + ("w1" * x)) in *)
+  let%op loss_fn ~output ~expectation = relu (!..1 - (expectation *. output)) in
   let start_time = ref None in
   let weight_decay = 0.0002 in
   Arrayjit.Schedulers.sync_suggested_num_streams := num_streams;
diff --git a/bin/moons_demo.ml b/bin/moons_demo.ml
@@ -27,7 +27,7 @@ let demo () =
   let steps = epochs * n_batches in
   let weight_decay = 0.0002 in
 
-  let%op mlp x = "b3" + ("w3" * ?/("b2" hid_dim + ("w2" * ?/("b1" hid_dim + ("w1" * x))))) in
+  let%op mlp x = "b3" + ("w3" * relu ("b2" hid_dim + ("w2" * relu ("b1" hid_dim + ("w1" * x))))) in
 
   let noise () = Rand.float_range (-0.1) 0.1 in
   let moons_flat =
@@ -49,7 +49,7 @@ let demo () =
   let%op moons_input = moons_flat @| batch_n in
   let%op moons_class = moons_classes @| batch_n in
 
-  let%op margin_loss = ?/(1 - (moons_class *. mlp moons_input)) in
+  let%op margin_loss = relu (1 - (moons_class *. mlp moons_input)) in
   let%op scalar_loss = (margin_loss ++ "...|... => 0") /. !..batch_size in
 
   let update = Train.grad_update scalar_loss in
diff --git a/bin/moons_demo_parallel.ml b/bin/moons_demo_parallel.ml
@@ -35,9 +35,9 @@ let experiment ~seed ~backend_name ~config () =
   let moons_flat ~b = TDSL.init_const ~l:"moons_flat" ~b ~o:[ 2 ] moons_flat in
   let moons_classes = Array.init (len * 2) ~f:(fun i -> if i % 2 = 0 then 1. else -1.) in
   let moons_classes ~b = TDSL.init_const ~l:"moons_classes" ~b ~o:[ 1 ] moons_classes in
-  let%op mlp x = "b3" + ("w3" * ?/("b2" hid_dim + ("w2" * ?/("b1" hid_dim + ("w1" * x))))) in
+  let%op mlp x = "b3" + ("w3" * relu ("b2" hid_dim + ("w2" * relu ("b1" hid_dim + ("w1" * x))))) in
   (* let%op mlp x = "b" + ("w" * x) in *)
-  let%op loss_fn ~output ~expectation = ?/(!..1 - (expectation *. output)) in
+  let%op loss_fn ~output ~expectation = relu (!..1 - (expectation *. output)) in
   (* We don't need a regression loss formula thanks to weight_decay built into the sgd_update
      computation. *)
   let weight_decay = 0.0002 in
diff --git a/docs/OCANNL-ocaml_workshop_2024.tm b/docs/OCANNL-ocaml_workshop_2024.tm
@@ -5,10 +5,10 @@
 <\body>
   <doc-data|<doc-title|OCANNL optimization framework>|<doc-subtitle|Tensor
   shape inference, concise notation, multidevice
-  runtime>|<doc-author|<author-data|<author-name|�ukasz
+  runtime>|<doc-author|<author-data|<author-name|�ukasz
   Stafiniak>|<\author-note>
     Since April 2024, <hlink|<with|font-family|tt|<with|color|orange|a><with|color|blue|hrefs>>|https://ahrefs.com/>
-    sponsors �ukasz's work on OCANNL.
+    sponsors �ukasz's work on OCANNL.
   </author-note>>>>
 
   <abstract-data|<abstract|OCANNL is a Deep Learning framework with
@@ -98,14 +98,14 @@
   fetaures <with|font-shape|italic|parameter punning> (strings become
   let-bindings of tensors) and inline output dimensions specification. Full
   example of a Multi Layer Perceptron with 2 hidden layers and Rectified
-  Linear Unit non-linearity <verbatim|(?/)>, defining tensors <verbatim|b1>,
+  Linear Unit non-linearity <verbatim|(relu)>, defining tensors <verbatim|b1>,
   <verbatim|w1>, <verbatim|b2>, <verbatim|w2>, <verbatim|b3>, <verbatim|w3>,
   and a tensor-returning function <verbatim|mlp>:
 
   <\verbatim-code>
     let%op mlp x =
 
-    \ \ "b3" + ("w3" * ?/("b2" hid_dim + ("w2" * ?/("b1" hid_dim + ("w1" *
+    \ \ "b3" + ("w3" * relu("b2" hid_dim + ("w2" * relu("b1" hid_dim + ("w1" *
     x)))))
   </verbatim-code>
 
diff --git a/lib/nn_blocks.ml b/lib/nn_blocks.ml
@@ -7,7 +7,7 @@ module NTDSL = Operation.NTDSL
 
 type mlp_layer_config = { label : string list; hid_dim : int }
 
-let%op mlp_layer ~config x = ?/(("w" * x) + "b" config.hid_dim)
+let%op mlp_layer ~config x = relu (("w" * x) + "b" config.hid_dim)
 
 type mlp_config = { label : string list; hid_dims : int list }
 
diff --git a/lib/operation.ml b/lib/operation.ml
@@ -127,13 +127,13 @@ let relu ?(label = []) =
   let module NTDSL = Initial_NTDSL in
   let%cd op_asn ~v ~t1 ~projections = v =: relu v1 ~projections in
   let%cd grad_asn ~v ~g ~t1 ~projections = g1 =+ v -?/ g in
-  Tensor.unop ~label:("?/" :: label) ~transpose_op:Pointwise_un ~op_asn ~grad_asn
+  Tensor.unop ~label:("relu" :: label) ~transpose_op:Pointwise_un ~op_asn ~grad_asn
 
 module NDO_without_pow = struct
   let ( * ) = matmul ~grad_spec:Prohibit_grad
   let ( *. ) = pointmul ~grad_spec:Prohibit_grad
   let ( + ) = add ~grad_spec:Prohibit_grad
-  let ( ?/ ) = relu ~grad_spec:Prohibit_grad
+  let relu = relu ~grad_spec:Prohibit_grad
   let ( !. ) = Tensor.number ~grad_spec:Prohibit_grad
   let ( !.. ) ?label i = Tensor.number ?label ~grad_spec:Prohibit_grad @@ Float.of_int i
   let ( - ) = sub ~grad_spec:Prohibit_grad
@@ -260,8 +260,7 @@ module DO = struct
   let ( *. ) = pointmul ~grad_spec:If_needed
   let ( + ) = add ~grad_spec:If_needed
   let ( **. ) ?label base exp = pointpow ?label exp base ~grad_spec:If_needed
-  let ( ?/ ) = relu ~grad_spec:If_needed
-  let ( !~ ) label = Tensor.param label
+  let relu = relu ~grad_spec:If_needed
   let ( !. ) = Tensor.number ~grad_spec:If_needed
   let ( !.. ) ?label i = Tensor.number ?label ~grad_spec:If_needed @@ Float.of_int i
   let ( !@ ) = embed_symbol
diff --git a/lib/ppx_cd.ml b/lib/ppx_cd.ml
@@ -106,32 +106,6 @@ let binary_op expr =
              "+ (Add), - (Sub), * (Mul), / (Div), ** (ToPowOf), -?/ (Relu_gate), -/> (Arg2), < \
               (Cmplt), <> (Cmpne), || (Or), && (And), % (Mod), @^ (Max), ^^ (Min)" )
 
-let is_binary_op ident =
-  (* TODO: compile into a hashtable *)
-  List.mem
-    [ "+"; "-"; "*"; "/"; "**"; "-?/"; "-/>"; "-@>"; "<"; "<>"; "&&"; "%"; "@^"; "^^" ]
-    ident ~equal:String.equal
-
-let unary_ops =
-  Hashtbl.of_alist_exn
-    (module String)
-    [
-      ("id", fun loc -> [%expr Arrayjit.Ops.Identity]);
-      ("relu", fun loc -> [%expr Arrayjit.Ops.Relu]);
-      ("sat01", fun loc -> [%expr Arrayjit.Ops.Satur01]);
-      ("exp", fun loc -> [%expr Arrayjit.Ops.Exp]);
-      ("log", fun loc -> [%expr Arrayjit.Ops.Log]);
-      ("exp2", fun loc -> [%expr Arrayjit.Ops.Exp2]);
-      ("log2", fun loc -> [%expr Arrayjit.Ops.Log2]);
-      ("sin", fun loc -> [%expr Arrayjit.Ops.Sin]);
-      ("cos", fun loc -> [%expr Arrayjit.Ops.Cos]);
-      ("sqrt", fun loc -> [%expr Arrayjit.Ops.Sqrt]);
-      ("recip", fun loc -> [%expr Arrayjit.Ops.Recip]);
-      ("recip_sqrt", fun loc -> [%expr Arrayjit.Ops.Recip_sqrt]);
-      ("neg", fun loc -> [%expr Arrayjit.Ops.Neg]);
-      ("tanh", fun loc -> [%expr Arrayjit.Ops.Tanh_approx]);
-    ]
-
 type result = {
   vbs : value_binding Map.M(String).t;
       (** [vbs] are the bindings introduced by inline tensor declarations (aka. punning). These
@@ -681,7 +655,7 @@ let translate (expr : expression) : result =
           slot = RHS2;
           expr = [%expr Option.map t2.Tensor.diff ~f:(fun d -> d.Tensor.grad)];
         }
-    | { pexp_desc = Pexp_ident { txt = Lident op_ident; _ }; _ } when is_operator op_ident ->
+    | { pexp_desc = Pexp_ident { txt = Lident op_ident; _ }; _ } when is_primitive_op op_ident ->
         default_result
     | [%expr [%e? expr1] **. [%e? { pexp_desc = Pexp_constant (Pconst_integer _); _ } as i]] ->
         (* FIXME: `**.` should take a tensor and require that it's a literal. *)
@@ -906,7 +880,7 @@ let translate (expr : expression) : result =
           ([%e? { pexp_desc = Pexp_ident { txt = Lident binop_ident; _ }; _ } as bin_op]
              [%e? rhs1]
              [%e? rhs2])]
-      when is_assignment accu_ident && is_binary_op binop_ident && proj_in_scope ->
+      when is_assignment accu_ident && Hashtbl.mem binary_ops binop_ident && proj_in_scope ->
         process_assign_binop ~accu_op ~lhs ~bin_op ~rhs1 ~rhs2 ~proj_in_scope ()
     | [%expr
         [%e? { pexp_desc = Pexp_ident { txt = Lident accu_ident; _ }; _ } as accu_op]
@@ -928,7 +902,7 @@ let translate (expr : expression) : result =
           ([%e? { pexp_desc = Pexp_ident { txt = Lident binop_ident; _ }; _ } as bin_op]
              [%e? rhs1]
              [%e? rhs2])]
-      when is_assignment accu_ident && is_binary_op binop_ident ->
+      when is_assignment accu_ident && Hashtbl.mem binary_ops binop_ident ->
         let logic, bin_op = binary_op bin_op in
         process_raw_binop ~accu_op ~lhs ~bin_op ~rhs1 ~rhs2 ~logic
     | [%expr
@@ -1100,8 +1074,6 @@ let translate (expr : expression) : result =
     | { pexp_desc = Pexp_letmodule (name, module_expr, expr1); _ } ->
         let res1 = loop ~proj_in_scope expr1 in
         { res1 with expr = { expr with pexp_desc = Pexp_letmodule (name, module_expr, res1.expr) } }
-    | { pexp_desc = Pexp_ident { txt = Lident op_ident; _ }; _ } when is_operator op_ident ->
-        { default_result with typ = Unknown; expr = [%expr [%e expr]] }
     | _ -> { default_result with typ = Unknown }
   in
   transl ~proj_in_scope:false ~bad_pun_hints:(Set.empty (module String)) expr
diff --git a/lib/ppx_op.ml b/lib/ppx_op.ml
@@ -259,7 +259,10 @@ let rec translate ~num_configs ~is_toplevel ~has_config ?label expr =
   | { pexp_desc = Pexp_letmodule (name, module_expr, body); _ } ->
       let vbs, body = loop ?label body in
       (vbs, { expr with pexp_desc = Pexp_letmodule (name, module_expr, body) })
-  | { pexp_desc = Pexp_ident { txt = Lident op_ident; _ }; _ } when is_operator op_ident ->
+  | { pexp_desc = Pexp_ident { txt = Lident op_ident; _ }; _ }
+    when is_primitive_op op_ident || is_operator op_ident
+    ->
+      (* FIXME: this heuristic is hacky... *)
       (no_vbs, [%expr [%e expr] ?label:[%e opt_expr ~loc label]])
   | expr -> (no_vbs, expr)
 
diff --git a/lib/ppx_shared.ml b/lib/ppx_shared.ml
@@ -106,14 +106,83 @@ let rec pat2expr pat =
       @@ Location.error_extensionf ~loc
            "ppx_ocannl does not recognize/support the pattern; maybe try using an `as` alias."
 
-let alphanum_regexp = Str.regexp "^[^a-zA-Z0-9]+$"
-let is_operator ident = Str.string_match alphanum_regexp ident 0
+let non_alphanum_regexp = Str.regexp "^[^a-zA-Z0-9]+$"
+let is_operator ident = Str.string_match non_alphanum_regexp ident 0
 
 let is_assignment ident =
   String.length ident > 1
   && Char.equal ident.[0] '='
   && (not @@ List.mem [ "=="; "==="; "=>"; "==>"; "=>>" ] ident ~equal:String.equal)
 
+(** Binary primitive ops, both infix operator and function name variants. *)
+let binary_ops =
+  Hashtbl.of_alist_exn
+    (module String)
+    [
+      ("-@>", fun loc -> [%expr Arrayjit.Ops.Arg1]);
+      ("fst", fun loc -> [%expr Arrayjit.Ops.Arg1]);
+      ("-/>", fun loc -> [%expr Arrayjit.Ops.Arg2]);
+      ("snd", fun loc -> [%expr Arrayjit.Ops.Arg2]);
+      ("+", fun loc -> [%expr Arrayjit.Ops.Add]);
+      ("add", fun loc -> [%expr Arrayjit.Ops.Add]);
+      ("-", fun loc -> [%expr Arrayjit.Ops.Sub]);
+      ("sub", fun loc -> [%expr Arrayjit.Ops.Sub]);
+      ("*", fun loc -> [%expr Arrayjit.Ops.Mul]);
+      ("mul", fun loc -> [%expr Arrayjit.Ops.Mul]);
+      ("/", fun loc -> [%expr Arrayjit.Ops.Div]);
+      ("div", fun loc -> [%expr Arrayjit.Ops.Div]);
+      ("**", fun loc -> [%expr Arrayjit.Ops.ToPowOf]);
+      ("pow", fun loc -> [%expr Arrayjit.Ops.ToPowOf]);
+      ("-?/", fun loc -> [%expr Arrayjit.Ops.Relu_gate]);
+      ("relu_gate", fun loc -> [%expr Arrayjit.Ops.Relu_gate]);
+      ("<", fun loc -> [%expr Arrayjit.Ops.Cmplt]);
+      ("lt", fun loc -> [%expr Arrayjit.Ops.Cmplt]);
+      ("<>", fun loc -> [%expr Arrayjit.Ops.Cmpne]);
+      ("ne", fun loc -> [%expr Arrayjit.Ops.Cmpne]);
+      ("||", fun loc -> [%expr Arrayjit.Ops.Or]);
+      ("or_", fun loc -> [%expr Arrayjit.Ops.Or]);
+      ("&&", fun loc -> [%expr Arrayjit.Ops.And]);
+      ("and_", fun loc -> [%expr Arrayjit.Ops.And]);
+      ("%", fun loc -> [%expr Arrayjit.Ops.Mod]);
+      ("mod_", fun loc -> [%expr Arrayjit.Ops.Mod]);
+      ("@^", fun loc -> [%expr Arrayjit.Ops.Max]);
+      ("max", fun loc -> [%expr Arrayjit.Ops.Max]);
+      ("^^", fun loc -> [%expr Arrayjit.Ops.Min]);
+      ("min", fun loc -> [%expr Arrayjit.Ops.Min]);
+    ]
+
+(** Unary primitive ops. *)
+let unary_ops =
+  Hashtbl.of_alist_exn
+    (module String)
+    [
+      ("id", fun loc -> [%expr Arrayjit.Ops.Identity]);
+      ("relu", fun loc -> [%expr Arrayjit.Ops.Relu]);
+      ("sat01", fun loc -> [%expr Arrayjit.Ops.Satur01]);
+      ("exp", fun loc -> [%expr Arrayjit.Ops.Exp]);
+      ("log", fun loc -> [%expr Arrayjit.Ops.Log]);
+      ("exp2", fun loc -> [%expr Arrayjit.Ops.Exp2]);
+      ("log2", fun loc -> [%expr Arrayjit.Ops.Log2]);
+      ("sin", fun loc -> [%expr Arrayjit.Ops.Sin]);
+      ("cos", fun loc -> [%expr Arrayjit.Ops.Cos]);
+      ("sqrt", fun loc -> [%expr Arrayjit.Ops.Sqrt]);
+      ("recip", fun loc -> [%expr Arrayjit.Ops.Recip]);
+      ("recip_sqrt", fun loc -> [%expr Arrayjit.Ops.Recip_sqrt]);
+      ("neg", fun loc -> [%expr Arrayjit.Ops.Neg]);
+      ("tanh", fun loc -> [%expr Arrayjit.Ops.Tanh_approx]);
+    ]
+
+(** Unary primitive ops. *)
+let ternary_ops =
+  Hashtbl.of_alist_exn
+    (module String)
+    [
+      ("where", fun loc -> [%expr Arrayjit.Ops.Where]); ("fma", fun loc -> [%expr Arrayjit.Ops.FMA]);
+    ]
+
+let is_primitive_op op_ident =
+  List.exists ~f:(Fn.flip Hashtbl.mem op_ident) [ ternary_ops; unary_ops; binary_ops ]
+
 let let_opt ~loc vbs expr =
   if Map.is_empty vbs then expr else Ast_helper.Exp.let_ ~loc Nonrecursive (Map.data vbs) expr
 
diff --git a/lib/syntax_extensions.md b/lib/syntax_extensions.md
diff --git a/test/micrograd_demo.ml b/test/micrograd_demo.ml
diff --git a/test/moons_demo_parallel.ml b/test/moons_demo_parallel.ml
diff --git a/test/moons_demo_parallel_run.ml b/test/moons_demo_parallel_run.ml
diff --git a/test_ppx/test_ppx_op.ml b/test_ppx/test_ppx_op.ml
diff --git a/test_ppx/test_ppx_op_expected.ml b/test_ppx/test_ppx_op_expected.ml