Skip to content

Commit

Permalink
Remove a skippable write in the clean_and_write ISPC functions
Browse files Browse the repository at this point in the history
  • Loading branch information
KYovchevski committed Nov 28, 2022
1 parent c20f809 commit 22af04d
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 25 deletions.
22 changes: 14 additions & 8 deletions src/ispc/downsample_ispc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ pub struct WeightVariables {
}
#[test]
fn bindgen_test_layout_WeightVariables() {
const UNINIT: ::std::mem::MaybeUninit<WeightVariables> = ::std::mem::MaybeUninit::uninit();
let ptr = UNINIT.as_ptr();
assert_eq!(
::std::mem::size_of::<WeightVariables>(),
12usize,
Expand All @@ -22,7 +24,7 @@ fn bindgen_test_layout_WeightVariables() {
concat!("Alignment of ", stringify!(WeightVariables))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<WeightVariables>())).src_center as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).src_center) as usize - ptr as usize },
0usize,
concat!(
"Offset of field: ",
Expand All @@ -32,7 +34,7 @@ fn bindgen_test_layout_WeightVariables() {
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<WeightVariables>())).src_start as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).src_start) as usize - ptr as usize },
4usize,
concat!(
"Offset of field: ",
Expand All @@ -42,7 +44,7 @@ fn bindgen_test_layout_WeightVariables() {
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<WeightVariables>())).src_end as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).src_end) as usize - ptr as usize },
8usize,
concat!(
"Offset of field: ",
Expand All @@ -61,6 +63,8 @@ pub struct WeightCollection {
}
#[test]
fn bindgen_test_layout_WeightCollection() {
const UNINIT: ::std::mem::MaybeUninit<WeightCollection> = ::std::mem::MaybeUninit::uninit();
let ptr = UNINIT.as_ptr();
assert_eq!(
::std::mem::size_of::<WeightCollection>(),
24usize,
Expand All @@ -72,7 +76,7 @@ fn bindgen_test_layout_WeightCollection() {
concat!("Alignment of ", stringify!(WeightCollection))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<WeightCollection>())).starts as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).starts) as usize - ptr as usize },
0usize,
concat!(
"Offset of field: ",
Expand All @@ -82,7 +86,7 @@ fn bindgen_test_layout_WeightCollection() {
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<WeightCollection>())).weight_counts as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).weight_counts) as usize - ptr as usize },
8usize,
concat!(
"Offset of field: ",
Expand All @@ -92,7 +96,7 @@ fn bindgen_test_layout_WeightCollection() {
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<WeightCollection>())).values as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).values) as usize - ptr as usize },
16usize,
concat!(
"Offset of field: ",
Expand All @@ -110,6 +114,8 @@ pub struct Cache {
}
#[test]
fn bindgen_test_layout_Cache() {
const UNINIT: ::std::mem::MaybeUninit<Cache> = ::std::mem::MaybeUninit::uninit();
let ptr = UNINIT.as_ptr();
assert_eq!(
::std::mem::size_of::<Cache>(),
48usize,
Expand All @@ -121,7 +127,7 @@ fn bindgen_test_layout_Cache() {
concat!("Alignment of ", stringify!(Cache))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<Cache>())).vertical_weights as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).vertical_weights) as usize - ptr as usize },
0usize,
concat!(
"Offset of field: ",
Expand All @@ -131,7 +137,7 @@ fn bindgen_test_layout_Cache() {
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<Cache>())).horizontal_weights as *const _ as usize },
unsafe { ::std::ptr::addr_of!((*ptr).horizontal_weights) as usize - ptr as usize },
24usize,
concat!(
"Offset of field: ",
Expand Down
Binary file modified src/ispc/downsample_ispcx86_64-pc-windows-msvc.lib
Binary file not shown.
26 changes: 9 additions & 17 deletions src/ispc/kernels/lanczos3.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct Cache {
};

uint8<3> sample_3_channels(varying uint64 read_address, const uniform uint8* varying src) {
// Pointer hacks to read all channels at once rather than one-by-one, despite technically reading uint8<3> to a uint8<4>.
// Memory reinterpretation to read all channels at once rather than one-by-one.
// While testing, this proved more performant than reading one-by-one.
const uniform uint8* pixel_ptr = src + read_address;
const uniform uint8<3>* pixel_ptr3 = (const uniform uint8<3>*)(pixel_ptr);
Expand All @@ -101,17 +101,12 @@ uint8<3> sample_3_channels(varying uint64 read_address, const uniform uint8* var

void clean_and_write_3_channels(varying float<3> color, varying uint64 write_address, uniform uint8* varying dst) {
// The final color is a sum of numbers that are multiplied by the weights of their respective pixels.
// Because of their numbers, floating point precision leads to the final color being potentially outside of the 0-255 range by a slighty margin.
// Because of their numbers, floating point precision leads to the final color being potentially outside of the 0-255 range by a slight margin.
// This would cause an underflow/overflow, which we avoid with the clamps.
color.x = clamp(color.x, 0.0f, 255.0f);
color.y = clamp(color.y, 0.0f, 255.0f);
color.z = clamp(color.z, 0.0f, 255.0f);

// Pointer hacks to write all channels at once rather than one-by-one, despite technically writing uint8<4> to a uint8<3>.
// While testing, this proved more performant than writing one-by-one.
uniform uint8* pixel_ptr = dst + write_address;
uniform uint8<3>* pixel_ptr3 = (uniform uint8<3>*)(pixel_ptr);
*pixel_ptr3 = color;
pixel_ptr[0] = clamp(color[0], 0.0f, 255.0f);
pixel_ptr[1] = clamp(color[1], 0.0f, 255.0f);
pixel_ptr[2] = clamp(color[2], 0.0f, 255.0f);
}

uint8<4> sample_4_channels(varying uint64 read_address, const uniform uint8* varying src) {
Expand All @@ -123,14 +118,11 @@ uint8<4> sample_4_channels(varying uint64 read_address, const uniform uint8* var
}

void clean_and_write_4_channels(varying float<4> color, varying uint64 write_address, uniform uint8* varying dst) {
color.x = clamp(color.x, 0.0f, 255.0f);
color.y = clamp(color.y, 0.0f, 255.0f);
color.z = clamp(color.z, 0.0f, 255.0f);
color.w = clamp(color.w, 0.0f, 255.0f);

uniform uint8* pixel_ptr = dst + write_address;
uniform uint8<4>* pixel_ptr4 = (uniform uint8<4>*)(pixel_ptr);
*pixel_ptr4 = color;
pixel_ptr[0] = clamp(color.x, 0.0f, 255.0f);
pixel_ptr[1] = clamp(color.y, 0.0f, 255.0f);
pixel_ptr[2] = clamp(color.z, 0.0f, 255.0f);
pixel_ptr[3] = clamp(color.w, 0.0f, 255.0f);
}


Expand Down
Binary file modified src/ispc/libdownsample_ispcaarch64-linux-android.a
Binary file not shown.
Binary file modified src/ispc/libdownsample_ispcaarch64-unknown-linux-gnu.a
Binary file not shown.
Binary file modified src/ispc/libdownsample_ispcx86_64-unknown-linux-gnu.a
Binary file not shown.

0 comments on commit 22af04d

Please sign in to comment.