Skip to content

Commit

Permalink
Fix incorrect output address calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
KYovchevski committed Jan 23, 2024
1 parent d3ccd4b commit be36093
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
Binary file modified src/ispc/downsample_ispcx86_64-pc-windows-msvc.lib
Binary file not shown.
7 changes: 3 additions & 4 deletions src/ispc/kernels/lanczos3.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ void resample_with_cache(uniform uint32 num_channels, uniform uint32 src_width,
// Accumulate only along the width for each pixel, sampling from the source image
// Results in the source image being downsampled to src_height X target_width
foreach_tiled(y = 0 ... src_height, x = 0 ... target_width) {

uint32 src_width_start = horizontal_weight_collection->starts[x];
uint32 num_horizontal_weights = horizontal_weight_collection->weight_counts[x];
float* horizontal_weights = horizontal_weight_collection->values[x];
Expand All @@ -160,15 +161,13 @@ void resample_with_cache(uniform uint32 num_channels, uniform uint32 src_width,
else
clean_and_write_4_channels(color4, scratch_write_address, scratch_space);
}

// Accumulate the scratch space data along the height
// Downsamples the src_height X target_width image to target_height * target_width
foreach_tiled(y = 0 ... target_height, x = 0 ... target_width) {

uint32 src_height_start = vertical_weight_collection->starts[y];
uint32 num_vertical_weights = vertical_weight_collection->weight_counts[y];
float* vertical_weights = vertical_weight_collection->values[y];

float<3> color3 = {0.0f, 0.0f, 0.0f};
float<4> color4 = {0.0f, 0.0f, 0.0f, 0.0f};
for (uint32 i = 0; i < num_vertical_weights; i++) {
Expand All @@ -185,8 +184,8 @@ void resample_with_cache(uniform uint32 num_channels, uniform uint32 src_width,
color4 += sample_4_channels(scratch_read_address, scratch_space) * weight;
}

uint64 out_write_address = (y * target_height + x) * num_channels;

uint64 out_write_address = (y * target_width + x) * num_channels;
assert(out_write_address < target_height * target_width * num_channels);
if (num_channels == 3)
clean_and_write_3_channels(color3, out_write_address, out_data);
else
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ pub fn downsample_with_custom_scale(
vertical_weights: height_weights.ispc_representation(),
horizontal_weights: width_weights.ispc_representation(),
};

unsafe {
if src.format.num_channels() == 3 {
ispc::downsample_ispc::resample_with_cache_3(
Expand Down

0 comments on commit be36093

Please sign in to comment.