lanczos3: Const-fold offset calculation to hardcoded 0.5 (#28)

The usefulness of this magic took a fair bit of time to understand, while we can trivially remove it after deducing that it always computes to the constant `0.5`, and gets rid of some strange bright spots in the center of our image compared to #26. Before: ![square_test_result](https://github.com/Traverse-Research/ispc-downsampler/assets/2325264/273556b4-6f53-43d5-9424-31fef5ca7966) After: ![square_test_result](https://github.com/Traverse-Research/ispc-downsampler/assets/2325264/e0507eee-9a58-4fd8-b9fc-7a0a3c485ee8) First, we start by knowing that `uv` is divided by `target_size` before it is passed to `resample_internal()`. Hence, if we multiply it by `target_size` again, there should be no fractional part and `center_pixel` always becomes `0`. Floating point rounding errors being gone now, this is what solves the bright spots in the center of the image mentioned above. Then we are left with: center = uv - (0.0 - 0.5) * inv_target_size Which becomes: center = uv + 0.5 * inv_target_size As a drive-by cleanup we can now see that `(inv_)target_size` is only used to offset `uv` by another half _target_ pixel to point to the center instead of the top-left. These values were already involved in converting the `uv` coordinate from target pixels to normalized coordinates, so it reads more logical (involving less math) to factor this calculation into the call site and remove two extraneous function parameters from `resample_internal()` as a result. Now, continuing our journey, plug this into `offset` and simplify: offset = (uv - center) * target_size offset = (uv - (uv + 0.5 * inv_target_size)) * target_size offset = (-0.5 * inv_target_size) * target_size offset = -0.5 And we have our target value. Then, because they are subtracted when calling `lanczos3_filter()`, we turn this into positive `0.5`. Note that I have _zero_ clue whether this is the right value, but when sampling a 6x6 grid (not 7x7 as thought in #27) we only visit pixel positions `[-3, ..., 2]`, thus neatly retrieving weights at `[-2.5, ..., 2.5]` and never hitting the `3.5` value which is above `3` where `lanczos3_filter(3.5)` returns `0.`.
Traverse-Research · Aug 23, 2023 · 4164ee3 · 4164ee3
1 parent e545343
commit 4164ee3
Showing 1 changed file with 15 additions and 17 deletions.
diff --git a/src/ispc/kernels/lanczos3.ispc b/src/ispc/kernels/lanczos3.ispc
@@ -51,30 +51,27 @@ static inline float byte_to_float(uint b) {
     return (float)b;
 }
 
-static inline uint8<4> resample_internal(uniform Image src_image, uniform float<2> inv_src_size, float<2> uv, uniform float<2> target_size, uniform float<2> inv_target_size, uniform uint8 num_channels) {
-    float<2> center_pixel = uv * target_size;
-    center_pixel.x = frac(center_pixel.x);
-    center_pixel.y = frac(center_pixel.y);
-    float<2> center = uv - (center_pixel - 0.5) * inv_target_size;
-    float<2> offset = (uv - center) * target_size;
-    const float inv_255 = rcp(255.0);
-
+static inline uint8<4> resample_internal(uniform Image src_image, uniform float<2> inv_src_size, float<2> uv, uniform uint8 num_channels) {
     float<4> col = { 0, 0, 0, 0 };
     float weight = 0.0;
 
     for (uniform int x = -3; x <= 3; x++) {
         for (uniform int y = -3; y <= 3; y++) {
-            float wx = lanczos3_filter((uniform float)x - offset.x);
-            float wy = lanczos3_filter((uniform float)y - offset.y);
+            const float lanczos3_offset = 0.5;
+
+            float wx = lanczos3_filter((uniform float)x + lanczos3_offset);
+            float wy = lanczos3_filter((uniform float)y + lanczos3_offset);
             float w = wx * wy;
             float<2> texel_offset = {x, y};
-            float<2> texel_uv = center + texel_offset * inv_src_size;
+            float<2> texel_uv = uv + texel_offset * inv_src_size;
             int<2> pixel_coord = uv_to_pixel_id(src_image.size, texel_uv);
 
             int addr = (pixel_coord.x + pixel_coord.y * src_image.size.x) * num_channels;
 
             float<4> texel;
 
+            const float inv_255 = rcp(255.0);
+
             if (num_channels == 3) {
                 texel.x = byte_to_float(src_image.data[addr + 0]) * inv_255;
                 texel.y = byte_to_float(src_image.data[addr + 1]) * inv_255;
@@ -97,16 +94,17 @@ static inline uint8<4> resample_internal(uniform Image src_image, uniform float<
 export void resample(uniform uint32 width, uniform uint32 height, uniform uint32 stride, uniform uint8 num_channels, uniform uint32 target_width, uniform uint32 target_height, uniform const uint8 src_data[], uniform uint8 out_data[]) {
     uniform Image src = {src_data, {width, height}};
     uniform float<2> target_size = {(float)target_width, (float)target_height};
-    uniform float<2> inv_target_size = {1.0 / target_width, 1.0 / target_height};
+    uniform float<2> inv_target_size = 1.0f / target_size;
     uniform float<2> inv_src_size = 1.0f / src.size;
 
     foreach_tiled (y = 0 ... target_height, x = 0 ... target_width) {
-        float yf = y / target_size.y;
-        float xf = x / target_size.x;
-
-        float<2> uv = { xf, yf };
+        float<2> uv = {x, y};
+        // Use the center of each pixel, not the top-left:
+        uv += 0.5f;
+        // Convert to uniform space:
+        uv *= inv_target_size;
 
-        uint8<4> s = resample_internal(src, inv_src_size, uv, target_size, inv_target_size, num_channels);
+        uint8<4> s = resample_internal(src, inv_src_size, uv, num_channels);
 
         for (uniform int i = 0; i < num_channels; i++)
             out_data[(x + y * target_width) * num_channels + i] = s[i];