Skip to content

Commit

Permalink
lanczos3: Mark filter weights as uniform
Browse files Browse the repository at this point in the history
Before:

    Downsample `square_test.png` using ispc_downsampler
                        time:   [43.438 ms 43.468 ms 43.500 ms]

After:

    Downsample `square_test.png` using ispc_downsampler
                        time:   [29.891 ms 29.922 ms 29.953 ms]
                        change: [-31.246% -31.162% -31.077%] (p = 0.00 < 0.05)
  • Loading branch information
MarijnS95 committed Nov 21, 2023
1 parent 79605f6 commit 632f46d
Showing 1 changed file with 19 additions and 24 deletions.
43 changes: 19 additions & 24 deletions src/ispc/kernels/lanczos3.ispc
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
#include "image.ispc"

#define M_PI 3.14159265358979
const uniform float M_PI = 3.14159265358979;

static inline float clean(float t)
static inline uniform float clean(uniform float t)
{
const float EPSILON = .0000125f;
const uniform float EPSILON = .0000125f;
if (abs(t) < EPSILON)
return 0.0f;
return (float)t;
return t;
}

static inline float sinc(float x)
static inline uniform float sinc(uniform float x)
{
x = (x * M_PI);
x = x * M_PI;

// if ((x < 0.01f) && (x > -0.01f))
// return 1.0f + x * x * (-1.0f / 6.0f + x * x * 1.0f / 120.0f);

return sin(x) / x;
}

static inline float lanczos3_filter(float t)
static inline uniform float lanczos3_filter(uniform float t)
{
t = abs(t);

Expand All @@ -30,19 +30,14 @@ static inline float lanczos3_filter(float t)
return 0.0f;
}

static inline float frac(float f) {
float absf = abs(f);
return absf - floor(absf);
}

static inline float byte_to_float(uint b) {
//return floatbits(0x3f800000 | (b << (23 - 8))) - 1.0;
return (float)b;
}

static inline uint8<4> resample_internal(uniform Image src_image, float<2> uv, uniform uint8 num_channels) {
static inline uint8<4> resample_internal(const uniform Image src_image, const float<2> uv, const uniform uint8 num_channels) {
float<4> col = 0.0;
float weight = 0.0;
uniform float weight = 0.0;
// Truncate floating point coordinate to integer:
const int<2> src_coord = uv * src_image.size;

Expand All @@ -52,22 +47,22 @@ static inline uint8<4> resample_internal(uniform Image src_image, float<2> uv, u
// right and bottom of the target pixel.
for (uniform int x = -3; x < 3; x++) {
for (uniform int y = -3; y < 3; y++) {
float wx = lanczos3_filter((uniform float)x + 0.5);
float wy = lanczos3_filter((uniform float)y + 0.5);
const uniform float wx = lanczos3_filter((uniform float)x + 0.5);
const uniform float wy = lanczos3_filter((uniform float)y + 0.5);
const uniform float w = wx * wy;
const uniform int<2> texel_offset = {x, y};

float w = wx * wy;
int<2> texel_offset = {x, y};
int<2> src_kernel_coord = src_coord + texel_offset;
// TODO: Let the user specify a boundary mode!
// https://github.com/Traverse-Research/ispc-downsampler/issues/25#issuecomment-1584915050
src_kernel_coord.x = clamp(src_kernel_coord.x, 0, src_image.size.x - 1);
src_kernel_coord.y = clamp(src_kernel_coord.y, 0, src_image.size.y - 1);

int addr = (src_kernel_coord.x + src_kernel_coord.y * src_image.size.x) * num_channels;
const int addr = (src_kernel_coord.x + src_kernel_coord.y * src_image.size.x) * num_channels;

float<4> texel;

const float inv_255 = rcp(255.0);
const uniform float inv_255 = rcp(255.0);

if (num_channels == 3) {
texel.x = byte_to_float(src_image.data[addr + 0]) * inv_255;
Expand All @@ -89,9 +84,9 @@ static inline uint8<4> resample_internal(uniform Image src_image, float<2> uv, u
}

export void resample(uniform uint32 width, uniform uint32 height, uniform uint32 stride, uniform uint8 num_channels, uniform uint32 target_width, uniform uint32 target_height, uniform const uint8 src_data[], uniform uint8 out_data[]) {
uniform Image src = {src_data, {width, height}};
uniform float<2> target_size = {(float)target_width, (float)target_height};
uniform float<2> inv_target_size = 1.0f / target_size;
const uniform Image src = {src_data, {width, height}};
const uniform float<2> target_size = {(float)target_width, (float)target_height};
const uniform float<2> inv_target_size = 1.0f / target_size;

foreach_tiled (y = 0 ... target_height, x = 0 ... target_width) {
float<2> uv = {x, y};
Expand All @@ -100,7 +95,7 @@ export void resample(uniform uint32 width, uniform uint32 height, uniform uint32
// Convert to uniform space:
uv *= inv_target_size;

uint8<4> s = resample_internal(src, uv, num_channels);
const uint8<4> s = resample_internal(src, uv, num_channels);

for (uniform int i = 0; i < num_channels; i++)
out_data[(x + y * target_width) * num_channels + i] = s[i];
Expand Down

0 comments on commit 632f46d

Please sign in to comment.