22
33 This file contains basic building blocks for neural networks, with limited functionality. Feel
44 free to copy-paste and modify as needed.
5-
5+
66 Design principles, OCANNL fundamentals, and common patterns:
7- - "Principle of least commitment": use row variables where axis count doesn't matter
8- - Einsum specs here often use single-char mode (no commas) but with spaces for readability
9- - Pooling uses constant kernels (0.5 + 0.5) to propagate window dimensions
10- - conv2d uses convolution syntax: "stride*out+kernel," (often in multi-char mode)
11- - Input axes (before →) for kernels show intent (and end up rightmost for memory locality)
12- - Inline params { } are always learnable and are lifted to unit parameter ()
13- - Introduce inputs to a block after sub-block construction
14- (sub-blocks have no automatic lifting like there is for inline definitions of params)
15- - Always use literal strings with einsum operators when capturing variables
16- - Avoid unnecessary variable captures in einsum operators, be mindful they can shadow
17- other identifiers
18- *)
7+ - "Principle of least commitment": use row variables where axis count doesn't matter
8+ - Einsum specs here often use single-char mode (no commas) but with spaces for readability
9+ - Pooling uses constant kernels (0.5 + 0.5) to propagate window dimensions
10+ - conv2d uses convolution syntax: "stride*out+kernel," (often in multi-char mode)
11+ - Input axes (before →) for kernels show intent (and end up rightmost for memory locality)
12+ - Inline params \{ \} are always learnable and are lifted to unit parameter ()
13+ - Introduce inputs to a block after sub-block construction (sub-blocks have no automatic lifting
14+ like there is for inline definitions of params)
15+ - Always use literal strings with einsum operators when capturing variables
16+ - Avoid unnecessary variable captures in einsum operators, be mindful they can shadow other
17+ identifiers *)
1918
2019open ! Base
2120open Ocannl_tensor.Operation.DSL_modules
@@ -130,7 +129,7 @@ let transformer_encoder ~label ~num_layers ~num_heads ~d_k ~d_v ~d_ff ?(epsilon
130129 let layers =
131130 List. init num_layers ~f: (fun i ->
132131 transformer_encoder_block
133- ~label: (( " layer" ^ Int. to_string i) :: label)
132+ ~label: ((" layer" ^ Int. to_string i) :: label)
134133 ~num_heads ~d_k ~d_v ~d_ff ~epsilon () )
135134 in
136135 fun ~train_step x -> List. fold layers ~init: x ~f: (fun x layer -> layer ~train_step x)
@@ -139,7 +138,7 @@ let transformer_decoder ~label ~num_layers ~num_heads ~d_k ~d_v ~d_ff ?(epsilon
139138 let layers =
140139 List. init num_layers ~f: (fun i ->
141140 transformer_decoder_block
142- ~label: (( " layer" ^ Int. to_string i) :: label)
141+ ~label: ((" layer" ^ Int. to_string i) :: label)
143142 ~num_heads ~d_k ~d_v ~d_ff ~epsilon () )
144143 in
145144 fun ~train_step target ~enc_output ~mask ->
0 commit comments