Skip to content

Commit

Permalink
Merge pull request #50 from etemesi254/dev
Browse files Browse the repository at this point in the history
dev:new updates
  • Loading branch information
etemesi254 committed Jan 3, 2023
2 parents fc5c785 + aac73e7 commit 74c36b8
Show file tree
Hide file tree
Showing 23 changed files with 329 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/fuzz_png.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Fuzz PPM
name: Fuzz PNG

on:
push:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fuzz_qoi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ jobs:
run: cargo install cargo-fuzz

- name: QOI Fuzz testing.
run: cargo +nightly fuzz run --fuzz-dir zune-qoi/fuzz decode_buffer -- -runs=10000
run: cargo +nightly fuzz run --fuzz-dir zune-qoi/fuzz decode_buffer zune-qoi/test_images -- -runs=10000
11 changes: 10 additions & 1 deletion zune-inflate/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,13 @@

## Version 0.2.2

- Fix bug in which some paths would cause the stream not to refill
- Fix bug in which some paths would cause the stream not to refill

## Version 0.2.3

- Small performance improvements, especially on files with a lot of RLE redundant data

## Version 0.2.4

- Fix bug with some gzip that would cause errors during decoding
- Small performance improvement
2 changes: 1 addition & 1 deletion zune-inflate/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "zune-inflate"
version = "0.2.2"
version = "0.2.4"
edition = "2021"
description = "A heavily optimized deflate decompressor in Pure Rust"
exclude = ["tests/"]
Expand Down
78 changes: 76 additions & 2 deletions zune-inflate/benches/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,43 @@ fn decode_writer_libdeflate(bytes: &[u8]) -> Vec<u8>
out
}

fn decode_writer_flate_gz(bytes: &[u8]) -> Vec<u8>
{
let mut writer = Vec::new();

let mut deflater = flate2::read::GzDecoder::new(Cursor::new(bytes));

deflater.read_to_end(&mut writer).unwrap();

writer
}

fn decode_writer_zune_gz(bytes: &[u8]) -> Vec<u8>
{
let options = zune_inflate::DeflateOptions::default().set_size_hint((1 << 20) * 50);

let mut deflater = zune_inflate::DeflateDecoder::new_with_options(bytes, options);

deflater.decode_gzip().unwrap()
}

fn decode_writer_libdeflate_gz(bytes: &[u8]) -> Vec<u8>
{
let mut deflater = libdeflater::Decompressor::new();
// decompressed size is 43 mb. so allocate 50 mb
let mut out = vec![0; (1 << 20) * 50];

deflater.gzip_decompress(bytes, &mut out).unwrap();
out
}

fn decode_test(c: &mut Criterion)
{
let path = env!("CARGO_MANIFEST_DIR").to_string() + "/tests/zlib/enwiki_part.zlib";

let data = read(path).unwrap();

let mut group = c.benchmark_group("ZLIB decoding");
let mut group = c.benchmark_group("enwiki zlib decoding");
group.throughput(Throughput::Bytes(data.len() as u64));

group.bench_function("FLATE-[zlib-ng]", |b| {
Expand Down Expand Up @@ -77,11 +107,55 @@ fn decode_test_crow(c: &mut Criterion)
b.iter(|| black_box(decode_writer_libdeflate(data.as_slice())))
});
}

fn decode_test_gzip(c: &mut Criterion)
{
let path = env!("CARGO_MANIFEST_DIR").to_string() + "/tests/gzip/tokio.tar.gz";

let data = read(path).unwrap();

let mut group = c.benchmark_group("Gzip decoding, tokio-rs source code");
group.throughput(Throughput::Bytes(data.len() as u64));

group.bench_function("FLATE/zlib-ng", |b| {
b.iter(|| black_box(decode_writer_flate_gz(data.as_slice())))
});

group.bench_function("ZUNE", |b| {
b.iter(|| black_box(decode_writer_zune_gz(data.as_slice())))
});

group.bench_function("libdeflate", |b| {
b.iter(|| black_box(decode_writer_libdeflate_gz(data.as_slice())))
});
}

fn decode_test_gzip_json(c: &mut Criterion)
{
let path = env!("CARGO_MANIFEST_DIR").to_string() + "/tests/gzip/image.json.gz";

let data = read(path).unwrap();

let mut group = c.benchmark_group("Gzip decoding, image-rs rustdoc json");
group.throughput(Throughput::Bytes(data.len() as u64));

group.bench_function("FLATE/zlib-ng", |b| {
b.iter(|| black_box(decode_writer_flate_gz(data.as_slice())))
});

group.bench_function("ZUNE", |b| {
b.iter(|| black_box(decode_writer_zune_gz(data.as_slice())))
});

group.bench_function("libdeflate", |b| {
b.iter(|| black_box(decode_writer_libdeflate_gz(data.as_slice())))
});
}
criterion_group!(name=benches;
config={
let c = Criterion::default();
c.measurement_time(Duration::from_secs(20))
};
targets=decode_test_crow,decode_test);
targets=decode_test_crow,decode_test,decode_test_gzip,decode_test_gzip_json);

criterion_main!(benches);
30 changes: 29 additions & 1 deletion zune-inflate/src/bitstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub struct BitStreamReader<'src>
{
// buffer from which we are pulling in bits from
// used in decompression.
src: &'src [u8],
pub src: &'src [u8],
// position in our buffer,
pub position: usize,
pub bits_left: u8,
Expand Down Expand Up @@ -65,6 +65,34 @@ impl<'src> BitStreamReader<'src>
None => self.refill_slow()
}
}
#[inline(always)]
pub fn refill_inner_loop(&mut self)
{
/*
* The refill always guarantees refills between 56-63
*
* Bits stored will never go above 63 and if bits are in the range 56-63 no refills occur.
*/
let mut buf = [0; 8];

if let Some(bytes) = self.src.get(self.position..self.position + 8)
{
{
buf.copy_from_slice(bytes);
// create a u64 from an array of u8's
let new_buffer = u64::from_le_bytes(buf);
// num indicates how many bytes we actually consumed.
let num = 63 ^ self.bits_left;
// offset position
self.position += (num >> 3) as usize;
// shift number of bits
self.buffer |= new_buffer << self.bits_left;
// update bits left
// bits left are now between 56-63
self.bits_left |= 56;
}
}
}
#[inline(never)]
fn refill_slow(&mut self)
{
Expand Down
48 changes: 29 additions & 19 deletions zune-inflate/src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ impl<'a> DeflateDecoder<'a>
self.position += 1;

let flg = self.data[self.position];
self.position += 1;

// skip mtime
self.position += 4;
Expand Down Expand Up @@ -370,6 +371,7 @@ impl<'a> DeflateDecoder<'a>
{
if *byte == 0
{
self.position += 1;
break;
}
}
Expand All @@ -381,8 +383,6 @@ impl<'a> DeflateDecoder<'a>
}
self.position += 1;
}

self.position += 1;
}
// File comment zero terminated
if (flg & GZIP_FCOMMENT) != 0
Expand All @@ -393,6 +393,7 @@ impl<'a> DeflateDecoder<'a>
{
if *byte == 0
{
self.position += 1;
break;
}
}
Expand All @@ -404,7 +405,6 @@ impl<'a> DeflateDecoder<'a>
}
self.position += 1;
}
self.position += 1;
}
// crc16 for gzip header
if (flg & GZIP_FHCRC) != 0
Expand Down Expand Up @@ -651,7 +651,7 @@ impl<'a> DeflateDecoder<'a>

if close_src
{
self.stream.refill();
self.stream.refill_inner_loop();

let lit_mask = self.stream.peek_bits::<LITLEN_DECODE_BITS>();

Expand All @@ -673,14 +673,14 @@ impl<'a> DeflateDecoder<'a>
// recheck after every sequence
// when we hit continue, we need to recheck this
// as we are trying to emulate a do while
let new_check = 2 * FASTCOPY_BYTES > self.stream.remaining_bytes();
let new_check = self.stream.src.len() < self.stream.position + 8;

if new_check
{
break 'sequence;
}

self.stream.refill();
self.stream.refill_inner_loop();
/*
* Consume the bits for the litlen decode table entry. Save the
* original bit-buf for later, in case the extra match length
Expand All @@ -707,15 +707,22 @@ impl<'a> DeflateDecoder<'a>
* that happens later while decoding the match offset).
*/

literal = entry >> 16;

let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();

literal = entry >> 16;
entry = litlen_decode_table[new_pos];
saved_bitbuf = self.stream.buffer;

self.stream.drop_bits(entry as u8);

out_block[dest_offset] = literal as u8;
let out: &mut [u8; 2] = out_block
.get_mut(dest_offset..dest_offset + 2)
.unwrap()
.try_into()
.unwrap();

out[0] = literal as u8;
dest_offset += 1;

if (entry & HUFFDEC_LITERAL) != 0
Expand All @@ -724,13 +731,15 @@ impl<'a> DeflateDecoder<'a>
* Another fast literal, but this one is in lieu of the
* primary item, so it doesn't count as one of the extras.
*/
let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();

// load in the next entry.
literal = entry >> 16;

let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();

entry = litlen_decode_table[new_pos];

out_block[dest_offset] = literal as u8;
out[1] = literal as u8;
dest_offset += 1;

continue;
Expand Down Expand Up @@ -760,7 +769,7 @@ impl<'a> DeflateDecoder<'a>
saved_bitbuf = self.stream.buffer;

pos += self.stream.peek_var_bits(entry_position);
entry = litlen_decode_table[pos];
entry = litlen_decode_table[pos.min(LITLEN_ENOUGH - 1)];

self.stream.drop_bits(entry as u8);

Expand All @@ -772,7 +781,9 @@ impl<'a> DeflateDecoder<'a>
literal = entry >> 16;
entry = litlen_decode_table[new_pos];

out_block[dest_offset] = (literal & 0xFF) as u8;
*out_block.get_mut(dest_offset).unwrap_or(&mut 0) =
(literal & 0xFF) as u8;

dest_offset += 1;

continue;
Expand All @@ -797,21 +808,20 @@ impl<'a> DeflateDecoder<'a>
* fast loop where it's already been verified that the output
* buffer has enough space remaining to copy a max-length match.
*/
length = (entry >> 16) as usize;
let entry_dup = entry;

let mask = (1 << entry as u8) - 1;
entry = offset_decode_table[self.stream.peek_bits::<OFFSET_TABLEBITS>()];
length = (entry_dup >> 16) as usize;

length += (saved_bitbuf & mask) as usize >> ((entry >> 8) as u8);
let mask = (1 << entry_dup as u8) - 1;

entry = offset_decode_table[self.stream.peek_bits::<OFFSET_TABLEBITS>()];
length += (saved_bitbuf & mask) as usize >> ((entry_dup >> 8) as u8);

// offset requires a subtable
if (entry & HUFFDEC_EXCEPTIONAL) != 0
{
self.stream.drop_bits(OFFSET_TABLEBITS as u8);

let extra = self.stream.peek_var_bits(((entry >> 8) & 0x3F) as usize);

entry = offset_decode_table[((entry >> 16) as usize + extra) & 511];
}

Expand Down Expand Up @@ -958,7 +968,7 @@ impl<'a> DeflateDecoder<'a>
return Err(error);
}

if 2 * FASTCOPY_BYTES > self.stream.remaining_bytes()
if self.stream.src.len() < self.stream.position + 8
{
// close to input end, move to the slower one
break 'sequence;
Expand Down
14 changes: 13 additions & 1 deletion zune-inflate/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub struct InflateDecodeErrors
{
/// reason why decompression fails
pub error: DecodeErrorStatus,
/// Data up until that decompression stage
/// Decoded data up until that decompression error
pub data: Vec<u8>
}

Expand Down Expand Up @@ -34,12 +34,24 @@ impl Debug for InflateDecodeErrors

pub enum DecodeErrorStatus
{
/// Input data is not enough to construct
/// a full output
InsufficientData,
/// Anything that isn't significant
Generic(&'static str),
GenericStr(String),
///Input data was malformed.
CorruptData,
/// Limit set by the user was exceeded by
/// decompressed output
OutputLimitExceeded(usize, usize),
/// Output CRC does not match stored CRC.
///
/// Only present for zlib
MismatchedCRC(u32, u32),
/// Output Adler does not match stored adler
///
/// Only present for gzip
MismatchedAdler(u32, u32)
}

Expand Down
Binary file added zune-inflate/tests/gzip/image.json.gz
Binary file not shown.
Binary file added zune-inflate/tests/gzip/image_compressed.tar.gz
Binary file not shown.
Binary file added zune-inflate/tests/gzip/tokio.tar.gz
Binary file not shown.

0 comments on commit 74c36b8

Please sign in to comment.