Merge pull request #50 from etemesi254/dev

dev:new updates
etemesi254 · Jan 3, 2023 · 74c36b8 · 74c36b8
2 parents fc5c785 + aac73e7
commit 74c36b8
Show file tree

Hide file tree

Showing 23 changed files with 329 additions and 27 deletions.
diff --git a/.github/workflows/fuzz_png.yml b/.github/workflows/fuzz_png.yml
@@ -1,4 +1,4 @@
-name: Fuzz PPM
+name: Fuzz PNG
 
 on:
   push:

diff --git a/.github/workflows/fuzz_qoi.yml b/.github/workflows/fuzz_qoi.yml
@@ -27,4 +27,4 @@ jobs:
         run: cargo install cargo-fuzz
 
       - name: QOI Fuzz testing.
-        run: cargo +nightly fuzz run --fuzz-dir zune-qoi/fuzz decode_buffer  -- -runs=10000
+        run: cargo +nightly fuzz run --fuzz-dir zune-qoi/fuzz decode_buffer zune-qoi/test_images -- -runs=10000
diff --git a/zune-inflate/CHANGELOG.md b/zune-inflate/CHANGELOG.md
@@ -8,4 +8,13 @@
 
 ## Version 0.2.2
 
-- Fix bug in which some paths would cause the stream not to refill
+- Fix bug in which some paths would cause the stream not to refill
+
+## Version 0.2.3
+
+- Small performance improvements, especially on files with a lot of RLE redundant data
+
+## Version 0.2.4
+
+- Fix bug with some gzip that would cause errors during decoding
+- Small performance improvement
diff --git a/zune-inflate/Cargo.toml b/zune-inflate/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "zune-inflate"
-version = "0.2.2"
+version = "0.2.4"
 edition = "2021"
 description = "A heavily optimized deflate decompressor in Pure Rust"
 exclude = ["tests/"]

diff --git a/zune-inflate/benches/decode.rs b/zune-inflate/benches/decode.rs
@@ -34,13 +34,43 @@ fn decode_writer_libdeflate(bytes: &[u8]) -> Vec<u8>
     out
 }
 
+fn decode_writer_flate_gz(bytes: &[u8]) -> Vec<u8>
+{
+    let mut writer = Vec::new();
+
+    let mut deflater = flate2::read::GzDecoder::new(Cursor::new(bytes));
+
+    deflater.read_to_end(&mut writer).unwrap();
+
+    writer
+}
+
+fn decode_writer_zune_gz(bytes: &[u8]) -> Vec<u8>
+{
+    let options = zune_inflate::DeflateOptions::default().set_size_hint((1 << 20) * 50);
+
+    let mut deflater = zune_inflate::DeflateDecoder::new_with_options(bytes, options);
+
+    deflater.decode_gzip().unwrap()
+}
+
+fn decode_writer_libdeflate_gz(bytes: &[u8]) -> Vec<u8>
+{
+    let mut deflater = libdeflater::Decompressor::new();
+    // decompressed size is 43 mb. so allocate 50 mb
+    let mut out = vec![0; (1 << 20) * 50];
+
+    deflater.gzip_decompress(bytes, &mut out).unwrap();
+    out
+}
+
 fn decode_test(c: &mut Criterion)
 {
     let path = env!("CARGO_MANIFEST_DIR").to_string() + "/tests/zlib/enwiki_part.zlib";
 
     let data = read(path).unwrap();
 
-    let mut group = c.benchmark_group("ZLIB decoding");
+    let mut group = c.benchmark_group("enwiki zlib decoding");
     group.throughput(Throughput::Bytes(data.len() as u64));
 
     group.bench_function("FLATE-[zlib-ng]", |b| {
@@ -77,11 +107,55 @@ fn decode_test_crow(c: &mut Criterion)
         b.iter(|| black_box(decode_writer_libdeflate(data.as_slice())))
     });
 }
+
+fn decode_test_gzip(c: &mut Criterion)
+{
+    let path = env!("CARGO_MANIFEST_DIR").to_string() + "/tests/gzip/tokio.tar.gz";
+
+    let data = read(path).unwrap();
+
+    let mut group = c.benchmark_group("Gzip decoding, tokio-rs source code");
+    group.throughput(Throughput::Bytes(data.len() as u64));
+
+    group.bench_function("FLATE/zlib-ng", |b| {
+        b.iter(|| black_box(decode_writer_flate_gz(data.as_slice())))
+    });
+
+    group.bench_function("ZUNE", |b| {
+        b.iter(|| black_box(decode_writer_zune_gz(data.as_slice())))
+    });
+
+    group.bench_function("libdeflate", |b| {
+        b.iter(|| black_box(decode_writer_libdeflate_gz(data.as_slice())))
+    });
+}
+
+fn decode_test_gzip_json(c: &mut Criterion)
+{
+    let path = env!("CARGO_MANIFEST_DIR").to_string() + "/tests/gzip/image.json.gz";
+
+    let data = read(path).unwrap();
+
+    let mut group = c.benchmark_group("Gzip decoding, image-rs rustdoc json");
+    group.throughput(Throughput::Bytes(data.len() as u64));
+
+    group.bench_function("FLATE/zlib-ng", |b| {
+        b.iter(|| black_box(decode_writer_flate_gz(data.as_slice())))
+    });
+
+    group.bench_function("ZUNE", |b| {
+        b.iter(|| black_box(decode_writer_zune_gz(data.as_slice())))
+    });
+
+    group.bench_function("libdeflate", |b| {
+        b.iter(|| black_box(decode_writer_libdeflate_gz(data.as_slice())))
+    });
+}
 criterion_group!(name=benches;
       config={
       let c = Criterion::default();
         c.measurement_time(Duration::from_secs(20))
       };
-    targets=decode_test_crow,decode_test);
+    targets=decode_test_crow,decode_test,decode_test_gzip,decode_test_gzip_json);
 
 criterion_main!(benches);
diff --git a/zune-inflate/src/bitstream.rs b/zune-inflate/src/bitstream.rs
@@ -7,7 +7,7 @@ pub struct BitStreamReader<'src>
 {
     // buffer from which we are pulling in bits from
     // used in decompression.
-    src:           &'src [u8],
+    pub src:       &'src [u8],
     // position in our buffer,
     pub position:  usize,
     pub bits_left: u8,
@@ -65,6 +65,34 @@ impl<'src> BitStreamReader<'src>
             None => self.refill_slow()
         }
     }
+    #[inline(always)]
+    pub fn refill_inner_loop(&mut self)
+    {
+        /*
+         * The refill always guarantees refills between 56-63
+         *
+         * Bits stored will never go above 63 and if bits are in the range 56-63 no refills occur.
+         */
+        let mut buf = [0; 8];
+
+        if let Some(bytes) = self.src.get(self.position..self.position + 8)
+        {
+            {
+                buf.copy_from_slice(bytes);
+                // create a u64 from an array of u8's
+                let new_buffer = u64::from_le_bytes(buf);
+                // num indicates how many bytes we actually consumed.
+                let num = 63 ^ self.bits_left;
+                // offset position
+                self.position += (num >> 3) as usize;
+                // shift number of bits
+                self.buffer |= new_buffer << self.bits_left;
+                // update bits left
+                // bits left are now between 56-63
+                self.bits_left |= 56;
+            }
+        }
+    }
     #[inline(never)]
     fn refill_slow(&mut self)
     {

diff --git a/zune-inflate/src/decoder.rs b/zune-inflate/src/decoder.rs
@@ -329,6 +329,7 @@ impl<'a> DeflateDecoder<'a>
         self.position += 1;
 
         let flg = self.data[self.position];
+        self.position += 1;
 
         // skip mtime
         self.position += 4;
@@ -370,6 +371,7 @@ impl<'a> DeflateDecoder<'a>
                 {
                     if *byte == 0
                     {
+                        self.position += 1;
                         break;
                     }
                 }
@@ -381,8 +383,6 @@ impl<'a> DeflateDecoder<'a>
                 }
                 self.position += 1;
             }
-
-            self.position += 1;
         }
         // File comment zero terminated
         if (flg & GZIP_FCOMMENT) != 0
@@ -393,6 +393,7 @@ impl<'a> DeflateDecoder<'a>
                 {
                     if *byte == 0
                     {
+                        self.position += 1;
                         break;
                     }
                 }
@@ -404,7 +405,6 @@ impl<'a> DeflateDecoder<'a>
                 }
                 self.position += 1;
             }
-            self.position += 1;
         }
         // crc16 for gzip header
         if (flg & GZIP_FHCRC) != 0
@@ -651,7 +651,7 @@ impl<'a> DeflateDecoder<'a>
 
                 if close_src
                 {
-                    self.stream.refill();
+                    self.stream.refill_inner_loop();
 
                     let lit_mask = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
 
@@ -673,14 +673,14 @@ impl<'a> DeflateDecoder<'a>
                         // recheck after every sequence
                         // when we hit continue, we need to recheck this
                         // as we are trying to emulate a do while
-                        let new_check = 2 * FASTCOPY_BYTES > self.stream.remaining_bytes();
+                        let new_check = self.stream.src.len() < self.stream.position + 8;
 
                         if new_check
                         {
                             break 'sequence;
                         }
 
-                        self.stream.refill();
+                        self.stream.refill_inner_loop();
                         /*
                          * Consume the bits for the litlen decode table entry.  Save the
                          * original bit-buf for later, in case the extra match length
@@ -707,15 +707,22 @@ impl<'a> DeflateDecoder<'a>
                              * that happens later while decoding the match offset).
                              */
 
+                            literal = entry >> 16;
+
                             let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
 
-                            literal = entry >> 16;
                             entry = litlen_decode_table[new_pos];
                             saved_bitbuf = self.stream.buffer;
 
                             self.stream.drop_bits(entry as u8);
 
-                            out_block[dest_offset] = literal as u8;
+                            let out: &mut [u8; 2] = out_block
+                                .get_mut(dest_offset..dest_offset + 2)
+                                .unwrap()
+                                .try_into()
+                                .unwrap();
+
+                            out[0] = literal as u8;
                             dest_offset += 1;
 
                             if (entry & HUFFDEC_LITERAL) != 0
@@ -724,13 +731,15 @@ impl<'a> DeflateDecoder<'a>
                                  * Another fast literal, but this one is in lieu of the
                                  * primary item, so it doesn't count as one of the extras.
                                  */
-                                let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
 
                                 // load in the next entry.
                                 literal = entry >> 16;
+
+                                let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
+
                                 entry = litlen_decode_table[new_pos];
 
-                                out_block[dest_offset] = literal as u8;
+                                out[1] = literal as u8;
                                 dest_offset += 1;
 
                                 continue;
@@ -760,7 +769,7 @@ impl<'a> DeflateDecoder<'a>
                             saved_bitbuf = self.stream.buffer;
 
                             pos += self.stream.peek_var_bits(entry_position);
-                            entry = litlen_decode_table[pos];
+                            entry = litlen_decode_table[pos.min(LITLEN_ENOUGH - 1)];
 
                             self.stream.drop_bits(entry as u8);
 
@@ -772,7 +781,9 @@ impl<'a> DeflateDecoder<'a>
                                 literal = entry >> 16;
                                 entry = litlen_decode_table[new_pos];
 
-                                out_block[dest_offset] = (literal & 0xFF) as u8;
+                                *out_block.get_mut(dest_offset).unwrap_or(&mut 0) =
+                                    (literal & 0xFF) as u8;
+
                                 dest_offset += 1;
 
                                 continue;
@@ -797,21 +808,20 @@ impl<'a> DeflateDecoder<'a>
                          * fast loop where it's already been verified that the output
                          * buffer has enough space remaining to copy a max-length match.
                          */
-                        length = (entry >> 16) as usize;
+                        let entry_dup = entry;
 
-                        let mask = (1 << entry as u8) - 1;
+                        entry = offset_decode_table[self.stream.peek_bits::<OFFSET_TABLEBITS>()];
+                        length = (entry_dup >> 16) as usize;
 
-                        length += (saved_bitbuf & mask) as usize >> ((entry >> 8) as u8);
+                        let mask = (1 << entry_dup as u8) - 1;
 
-                        entry = offset_decode_table[self.stream.peek_bits::<OFFSET_TABLEBITS>()];
+                        length += (saved_bitbuf & mask) as usize >> ((entry_dup >> 8) as u8);
 
                         // offset requires a subtable
                         if (entry & HUFFDEC_EXCEPTIONAL) != 0
                         {
                             self.stream.drop_bits(OFFSET_TABLEBITS as u8);
-
                             let extra = self.stream.peek_var_bits(((entry >> 8) & 0x3F) as usize);
-
                             entry = offset_decode_table[((entry >> 16) as usize + extra) & 511];
                         }
 
@@ -958,7 +968,7 @@ impl<'a> DeflateDecoder<'a>
                             return Err(error);
                         }
 
-                        if 2 * FASTCOPY_BYTES > self.stream.remaining_bytes()
+                        if self.stream.src.len() < self.stream.position + 8
                         {
                             // close to input end, move to the slower one
                             break 'sequence;

diff --git a/zune-inflate/src/errors.rs b/zune-inflate/src/errors.rs
@@ -5,7 +5,7 @@ pub struct InflateDecodeErrors
 {
     /// reason why decompression fails
     pub error: DecodeErrorStatus,
-    /// Data up until that decompression stage
+    /// Decoded data up until that decompression error
     pub data:  Vec<u8>
 }
 
@@ -34,12 +34,24 @@ impl Debug for InflateDecodeErrors
 
 pub enum DecodeErrorStatus
 {
+    /// Input data is not enough to construct
+    /// a full output
     InsufficientData,
+    /// Anything that isn't significant
     Generic(&'static str),
     GenericStr(String),
+    ///Input data was malformed.
     CorruptData,
+    /// Limit set by the user was exceeded by
+    /// decompressed output
     OutputLimitExceeded(usize, usize),
+    /// Output CRC does not match stored CRC.
+    ///
+    /// Only present for zlib
     MismatchedCRC(u32, u32),
+    /// Output Adler does not match stored adler
+    ///
+    /// Only present for gzip
     MismatchedAdler(u32, u32)
 }
 

diff --git a/zune-inflate/tests/gzip/image.json.gz b/zune-inflate/tests/gzip/image.json.gz
diff --git a/zune-inflate/tests/gzip/image_compressed.tar.gz b/zune-inflate/tests/gzip/image_compressed.tar.gz
diff --git a/zune-inflate/tests/gzip/tokio.tar.gz b/zune-inflate/tests/gzip/tokio.tar.gz