Skip to content

Commit

Permalink
Refactor unpack_bits for palette expanded images
Browse files Browse the repository at this point in the history
Remove copy_from_slice in palette match arms of next_interlaced_row_impl
Reuse the previous row in unpack_bits calculation instead of
expanding the palette within the buffer
Use chunks_exact and iterate from start to end of the buffer instead
of back to front in-place
  • Loading branch information
okaneco committed Aug 18, 2023
1 parent 7642f0f commit 4b2a01f
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 38 deletions.
17 changes: 8 additions & 9 deletions src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -619,12 +619,10 @@ impl<R: Read> Reader<R> {
};
match (color_type, trns) {
(ColorType::Indexed, _) if expand => {
output_buffer[..row.len()].copy_from_slice(row);
expand_paletted(output_buffer, info, trns)?;
expand_paletted(row, output_buffer, info, trns)?;
}
(ColorType::Grayscale | ColorType::GrayscaleAlpha, _) if bit_depth < 8 && expand => {
output_buffer[..row.len()].copy_from_slice(row);
expand_gray_u8(output_buffer, info, trns)
expand_gray_u8(row, output_buffer, info, trns)
}
(ColorType::Grayscale | ColorType::Rgb, Some(trns)) if expand => {
let channels = color_type.samples();
Expand Down Expand Up @@ -811,6 +809,7 @@ impl SubframeInfo {
}

fn expand_paletted(
row: &[u8],
buffer: &mut [u8],
info: &Info,
trns: Option<Option<&[u8]>>,
Expand Down Expand Up @@ -842,7 +841,7 @@ fn expand_paletted(
&[]
};

utils::unpack_bits(buffer, 4, info.bit_depth as u8, |i, chunk| {
utils::unpack_bits(row, buffer, 4, info.bit_depth as u8, |i, chunk| {
let (rgb, a) = (
palette
.get(3 * i as usize..3 * i as usize + 3)
Expand All @@ -855,7 +854,7 @@ fn expand_paletted(
chunk[3] = a;
});
} else {
utils::unpack_bits(buffer, 3, info.bit_depth as u8, |i, chunk| {
utils::unpack_bits(row, buffer, 3, info.bit_depth as u8, |i, chunk| {
let rgb = palette
.get(3 * i as usize..3 * i as usize + 3)
.unwrap_or(&black);
Expand All @@ -873,15 +872,15 @@ fn expand_paletted(
}
}

fn expand_gray_u8(buffer: &mut [u8], info: &Info, trns: Option<Option<&[u8]>>) {
fn expand_gray_u8(row: &[u8], buffer: &mut [u8], info: &Info, trns: Option<Option<&[u8]>>) {
let rescale = true;
let scaling_factor = if rescale {
(255) / ((1u16 << info.bit_depth as u8) - 1) as u8
} else {
1
};
if let Some(trns) = trns {
utils::unpack_bits(buffer, 2, info.bit_depth as u8, |pixel, chunk| {
utils::unpack_bits(row, buffer, 2, info.bit_depth as u8, |pixel, chunk| {
chunk[1] = if let Some(trns) = trns {
if pixel == trns[0] {
0
Expand All @@ -894,7 +893,7 @@ fn expand_gray_u8(buffer: &mut [u8], info: &Info, trns: Option<Option<&[u8]>>) {
chunk[0] = pixel * scaling_factor
})
} else {
utils::unpack_bits(buffer, 1, info.bit_depth as u8, |val, chunk| {
utils::unpack_bits(row, buffer, 1, info.bit_depth as u8, |val, chunk| {
chunk[0] = val * scaling_factor
})
}
Expand Down
56 changes: 27 additions & 29 deletions src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,38 @@
//! Utility functions
use std::iter::{repeat, StepBy};
use std::iter::StepBy;
use std::ops::Range;

#[inline(always)]
pub fn unpack_bits<F>(buf: &mut [u8], channels: usize, bit_depth: u8, func: F)
pub fn unpack_bits<F>(row: &[u8], buf: &mut [u8], channels: usize, bit_depth: u8, func: F)
where
F: Fn(u8, &mut [u8]),
{
// Return early if empty. This enables to subtract `channels` later without overflow.
if buf.len() < channels {
return;
}

let bits = buf.len() / channels * bit_depth as usize;
let extra_bits = bits % 8;
let entries = bits / 8
+ match extra_bits {
0 => 0,
_ => 1,
};
let skip = match extra_bits {
0 => 0,
n => (8 - n) / bit_depth as usize,
};
let mask = ((1u16 << bit_depth) - 1) as u8;
let i = (0..entries)
.rev() // reverse iterator
.flat_map(|idx|
// this has to be reversed too
(0..8).step_by(bit_depth.into())
.zip(repeat(idx)))
.skip(skip);
let j = (0..=buf.len() - channels).rev().step_by(channels);
for ((shift, i), j) in i.zip(j) {
let pixel = (buf[i] & (mask << shift)) >> shift;
func(pixel, &mut buf[j..(j + channels)])

let mut buf_chunks = buf.chunks_exact_mut(channels);

// `shift` iterates through these ranges for each bit depth:
// 1 => &[7, 6, 5, 4, 3, 2, 1, 0],
// 2 => &[6, 4, 2, 0],
// 4 => &[4, 0],
// 8 => &[0],
//
// `(0..8).step_by(bit_depth.into()).rev()` doesn't always optimize well so
// shifts are calculated instead. (2023-08, Rust 1.71)

for &curr in row.iter() {
let mut shift = 8 - bit_depth as i32;

while shift >= 0 {
if let Some(chunk) = buf_chunks.next() {
let pixel = (curr >> shift) & mask;
func(pixel, chunk);
} else {
return;
}

shift -= bit_depth as i32;
}
}
}

Expand Down

1 comment on commit 4b2a01f

@hotspoons
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI I think this commit made a massive improvement on grayscale image decoding in wasm. Before, using image 0.24.7 as is (this is profiling a micro benchmark taking two 2560x3300 monochrome images stored as base64-encoded text representation of png files, and downscaling them):

image

and after, just adding png = { git = "https://github.com/image-rs/image-png.git", rev = "ae5dee9" } under [patch.crates-io] and rebuilding my wasm:

image

expand_gray_u8 was where the vast majority of time was spent in calls from next_interlaced_row_impl.

Almost a 100% improvement!

Please sign in to comment.