Skip to content

Commit

Permalink
lanczos3: Const-fold offset calculation to hardcoded 0.5 (#28)
Browse files Browse the repository at this point in the history
The usefulness of this magic took a fair bit of time to understand,
while we can trivially remove it after deducing that it always computes
to the constant `0.5`, and gets rid of some strange bright spots in the
center of our image compared to #26.

Before:

![square_test_result](https://github.com/Traverse-Research/ispc-downsampler/assets/2325264/273556b4-6f53-43d5-9424-31fef5ca7966)

After:

![square_test_result](https://github.com/Traverse-Research/ispc-downsampler/assets/2325264/e0507eee-9a58-4fd8-b9fc-7a0a3c485ee8)

First, we start by knowing that `uv` is divided by `target_size` before
it is passed to `resample_internal()`.  Hence, if we multiply it by
`target_size` again, there should be no fractional part and
`center_pixel` always becomes `0`.  Floating point rounding errors being
gone now, this is what solves the bright spots in the center of the
image mentioned above.

Then we are left with:

    center = uv - (0.0 - 0.5) * inv_target_size

Which becomes:

    center = uv + 0.5 * inv_target_size

As a drive-by cleanup we can now see that `(inv_)target_size` is only
used to offset `uv` by another half _target_ pixel to point to the
center instead of the top-left.  These values were already involved in
converting the `uv` coordinate from target pixels to normalized
coordinates, so it reads more logical (involving less math) to factor
this calculation into the call site and remove two extraneous function
parameters from `resample_internal()` as a result.

Now, continuing our journey, plug this into `offset` and simplify:

    offset = (uv - center) * target_size
    offset = (uv - (uv + 0.5 * inv_target_size)) * target_size
    offset = (-0.5 * inv_target_size) * target_size
    offset = -0.5

And we have our target value.  Then, because they are subtracted when
calling `lanczos3_filter()`, we turn this into positive `0.5`.

Note that I have _zero_ clue whether this is the right value, but when
sampling a 6x6 grid (not 7x7 as thought in #27) we only visit pixel
positions `[-3, ..., 2]`, thus neatly retrieving weights at `[-2.5, ...,
2.5]` and never hitting the `3.5` value which is above `3` where
`lanczos3_filter(3.5)` returns `0.`.
  • Loading branch information
MarijnS95 committed Aug 23, 2023
1 parent e545343 commit 4164ee3
Showing 1 changed file with 15 additions and 17 deletions.
32 changes: 15 additions & 17 deletions src/ispc/kernels/lanczos3.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -51,30 +51,27 @@ static inline float byte_to_float(uint b) {
return (float)b;
}

static inline uint8<4> resample_internal(uniform Image src_image, uniform float<2> inv_src_size, float<2> uv, uniform float<2> target_size, uniform float<2> inv_target_size, uniform uint8 num_channels) {
float<2> center_pixel = uv * target_size;
center_pixel.x = frac(center_pixel.x);
center_pixel.y = frac(center_pixel.y);
float<2> center = uv - (center_pixel - 0.5) * inv_target_size;
float<2> offset = (uv - center) * target_size;
const float inv_255 = rcp(255.0);

static inline uint8<4> resample_internal(uniform Image src_image, uniform float<2> inv_src_size, float<2> uv, uniform uint8 num_channels) {
float<4> col = { 0, 0, 0, 0 };
float weight = 0.0;

for (uniform int x = -3; x <= 3; x++) {
for (uniform int y = -3; y <= 3; y++) {
float wx = lanczos3_filter((uniform float)x - offset.x);
float wy = lanczos3_filter((uniform float)y - offset.y);
const float lanczos3_offset = 0.5;

float wx = lanczos3_filter((uniform float)x + lanczos3_offset);
float wy = lanczos3_filter((uniform float)y + lanczos3_offset);
float w = wx * wy;
float<2> texel_offset = {x, y};
float<2> texel_uv = center + texel_offset * inv_src_size;
float<2> texel_uv = uv + texel_offset * inv_src_size;
int<2> pixel_coord = uv_to_pixel_id(src_image.size, texel_uv);

int addr = (pixel_coord.x + pixel_coord.y * src_image.size.x) * num_channels;

float<4> texel;

const float inv_255 = rcp(255.0);

if (num_channels == 3) {
texel.x = byte_to_float(src_image.data[addr + 0]) * inv_255;
texel.y = byte_to_float(src_image.data[addr + 1]) * inv_255;
Expand All @@ -97,16 +94,17 @@ static inline uint8<4> resample_internal(uniform Image src_image, uniform float<
export void resample(uniform uint32 width, uniform uint32 height, uniform uint32 stride, uniform uint8 num_channels, uniform uint32 target_width, uniform uint32 target_height, uniform const uint8 src_data[], uniform uint8 out_data[]) {
uniform Image src = {src_data, {width, height}};
uniform float<2> target_size = {(float)target_width, (float)target_height};
uniform float<2> inv_target_size = {1.0 / target_width, 1.0 / target_height};
uniform float<2> inv_target_size = 1.0f / target_size;
uniform float<2> inv_src_size = 1.0f / src.size;

foreach_tiled (y = 0 ... target_height, x = 0 ... target_width) {
float yf = y / target_size.y;
float xf = x / target_size.x;

float<2> uv = { xf, yf };
float<2> uv = {x, y};
// Use the center of each pixel, not the top-left:
uv += 0.5f;
// Convert to uniform space:
uv *= inv_target_size;

uint8<4> s = resample_internal(src, inv_src_size, uv, target_size, inv_target_size, num_channels);
uint8<4> s = resample_internal(src, inv_src_size, uv, num_channels);

for (uniform int i = 0; i < num_channels; i++)
out_data[(x + y * target_width) * num_channels + i] = s[i];
Expand Down

0 comments on commit 4164ee3

Please sign in to comment.