Fix propagating information to projections inference in pooling operations

lukstafi · lukstafi · commit 49c176869357 · 2025-09-05T15:07:45.000+02:00
diff --git a/lib/nn_blocks.ml b/lib/nn_blocks.ml
@@ -195,16 +195,26 @@ let%op depthwise_separable_conv2d ~label ?(kernel_size = 3) ?(stride = 1) () x =
 
 (** Max pooling for 2D spatial data - reduces spatial dimensions by taking maximum values. *)
 let%op max_pool2d ?(stride = 2) ?(window_size = 2) () x =
-  (* Although there is no kernel participating, we need to set the iteration size. *)
   Shape.set_dim wh window_size;
   Shape.set_dim ww window_size;
-  x @^^ "... | stride*oh+wh, stride*ow+ww, ..c.. => ... | oh, ow, ..c.." [ "wh"; "ww" ]
+  (* NOTE: projections inference runs per-assignment in a distinct phase from shape inference, so
+     for it to know about the window size, we use a constant kernel = 1 to propagate the shape.
+     We use a trick to create a shape-inferred constant tensor, equivalently we could write
+     "NTDSL.term ~fetch_op:(Constant 1.) ()" but that's less concise. See:
+     https://github.com/ahrefs/ocannl/discussions/381 *)
+  x
+  @^+ "... | stride*oh+wh, stride*ow+ww, ..c..; wh, ww => ... | oh, ow, ..c.." [ "wh"; "ww" ]
+        (0.5 + 0.5)
 
 (** Average pooling for 2D spatial data - reduces spatial dimensions by averaging values. *)
 let%op avg_pool2d ?(stride = 2) ?(window_size = 2) () x =
   Shape.set_dim wh window_size;
   Shape.set_dim ww window_size;
-  let sum = x ++ "... | stride*oh+wh, stride*ow+ww, ..c.. => ... | oh, ow, ..c.." [ "wh"; "ww" ] in
+  let sum =
+    x
+    +++ "... | stride*oh+wh, stride*ow+ww, ..c..; wh, ww => ... | oh, ow, ..c.." [ "wh"; "ww" ]
+          (0.5 + 0.5)
+  in
   sum /. (dim wh *. dim ww)
 
 (** Global average pooling - reduces each feature map to a single value by averaging. Commonly used