Skip to content

Commit

Permalink
Merge branch 'master' into dragly/fix-corrupted-range-coding
Browse files Browse the repository at this point in the history
  • Loading branch information
gendx committed Dec 16, 2019
2 parents b8c9f8f + 4429567 commit 0ee7b55
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 43 deletions.
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ rust:
matrix:
allow_failures:
- rust: nightly
before_script:
- rustup component add rustfmt
script:
- cargo fmt -- --check
- cargo build --verbose
- cargo test --verbose
9 changes: 9 additions & 0 deletions bors.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# List of commit statuses that must pass on the merge commit before it is
# pushed to master.
status = [
"continuous-integration/travis-ci/push",
]

# If set to true, and if the PR branch is on the same repository that bors-ng
# itself is on, the branch will be deleted.
delete_merged_branches = true
36 changes: 24 additions & 12 deletions src/decode/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ use crate::error;
use byteorder::{LittleEndian, ReadBytesExt};
use std::io;

use crate::decompress::Options;
use crate::decompress::UnpackedSize;

pub struct LZMAParams {
// most lc significant bits of previous byte are part of the literal context
lc: u32, // 0..8
Expand All @@ -15,7 +18,7 @@ pub struct LZMAParams {
}

impl LZMAParams {
pub fn read_header<R>(input: &mut R) -> error::Result<LZMAParams>
pub fn read_header<R>(input: &mut R, options: &Options) -> error::Result<LZMAParams>
where
R: io::BufRead,
{
Expand Down Expand Up @@ -58,17 +61,26 @@ impl LZMAParams {
info!("Dict size: {}", dict_size);

// Unpacked size
let unpacked_size_provided = input.read_u64::<LittleEndian>().or_else(|e| {
Err(error::Error::LZMAError(format!(
"LZMA header too short: {}",
e
)))
})?;
let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
let unpacked_size = if marker_mandatory {
None
} else {
Some(unpacked_size_provided)
let unpacked_size: Option<u64> = match options.unpacked_size {
UnpackedSize::ReadFromHeader => {
let unpacked_size_provided = input.read_u64::<LittleEndian>().or_else(|e| {
Err(error::Error::LZMAError(format!(
"LZMA header too short: {}",
e
)))
})?;
let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
if marker_mandatory {
None
} else {
Some(unpacked_size_provided)
}
}
UnpackedSize::ReadHeaderButUseProvided(x) => {
input.read_u64::<LittleEndian>()?;
x
}
UnpackedSize::UseProvided(x) => x,
};

info!("Unpacked size: {:?}", unpacked_size);
Expand Down
1 change: 1 addition & 0 deletions src/decode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod lzbuffer;
pub mod lzma;
pub mod lzma2;
pub mod options;
pub mod rangecoder;
pub mod util;
pub mod xz;
34 changes: 34 additions & 0 deletions src/decode/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#[derive(Clone, Copy, Debug, Default)]
pub struct Options {
/// Defines whether the unpacked size should be read from the header or provided.
/// The default is
/// [`UnpackedSize::ReadFromHeader`](enum.UnpackedSize.html#variant.ReadFromHeader)
pub unpacked_size: UnpackedSize,
}

/// Alternatives for defining the unpacked size of the decoded data
#[derive(Clone, Copy, Debug)]
pub enum UnpackedSize {
/// Assume that the 8 bytes used to specify the unpacked size are present in the header.
/// If the bytes are `0xFFFF_FFFF_FFFF_FFFF`, assume that there is an end-of-payload marker in
/// the file.
/// If not, read the 8 bytes as a little-endian encoded u64.
ReadFromHeader,
/// Assume that there are 8 bytes representing the unpacked size present in the header.
/// Read it, but ignore it and use the provided value instead.
/// If the provided value is `None`, assume that there is an end-of-payload marker in the file.
/// Note that this is a non-standard way of reading LZMA data,
/// but is used by certain libraries such as
/// [OpenCTM](http://openctm.sourceforge.net/).
ReadHeaderButUseProvided(Option<u64>),
/// Assume that the 8 bytes typically used to represent the unpacked size are *not* present in
/// the header. Use the provided value.
/// If the provided value is `None`, assume that there is an end-of-payload marker in the file.
UseProvided(Option<u64>),
}

impl Default for UnpackedSize {
fn default() -> UnpackedSize {
UnpackedSize::ReadFromHeader
}
}
81 changes: 52 additions & 29 deletions src/encode/dumbencoder.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::compress::{Options, UnpackedSize};
use crate::encode::rangecoder;
use byteorder::{LittleEndian, WriteBytesExt};
use std::io;
Expand All @@ -9,6 +10,7 @@ where
rangecoder: rangecoder::RangeEncoder<'a, W>,
literal_probs: [[u16; 0x300]; 8],
is_match: [u16; 4], // true = LZ, false = literal
unpacked_size: UnpackedSize,
}

const LC: u32 = 3;
Expand All @@ -19,7 +21,7 @@ impl<'a, W> Encoder<'a, W>
where
W: io::Write,
{
pub fn from_stream(stream: &'a mut W) -> io::Result<Self> {
pub fn from_stream(stream: &'a mut W, options: &Options) -> io::Result<Self> {
let dict_size = 0x800000;

// Properties
Expand All @@ -32,13 +34,28 @@ where
stream.write_u32::<LittleEndian>(dict_size)?;

// Unpacked size
info!("Unpacked size: unknown");
stream.write_u64::<LittleEndian>(0xFFFF_FFFF_FFFF_FFFF)?;
match &options.unpacked_size {
UnpackedSize::WriteToHeader(unpacked_size) => {
let value: u64 = match unpacked_size {
None => {
info!("Unpacked size: unknown");
0xFFFF_FFFF_FFFF_FFFF
}
Some(x) => {
info!("Unpacked size: {}", x);
*x
}
};
stream.write_u64::<LittleEndian>(value)?;
}
UnpackedSize::SkipWritingToHeader => {}
};

let encoder = Encoder {
rangecoder: rangecoder::RangeEncoder::new(stream),
literal_probs: [[0x400; 0x300]; 8],
is_match: [0x400; 4],
unpacked_size: options.unpacked_size,
};

Ok(encoder)
Expand Down Expand Up @@ -68,33 +85,39 @@ where
}

fn finish(&mut self, input_len: usize) -> io::Result<()> {
// Write end-of-stream marker
let pos_state = input_len & 3;

// Match
self.rangecoder
.encode_bit(&mut self.is_match[pos_state], true)?;
// New distance
self.rangecoder.encode_bit(&mut 0x400, false)?;

// Dummy len, as small as possible (len = 0)
for _ in 0..4 {
self.rangecoder.encode_bit(&mut 0x400, false)?;
}

// Distance marker = 0xFFFFFFFF
// pos_slot = 63
for _ in 0..6 {
self.rangecoder.encode_bit(&mut 0x400, true)?;
}
// num_direct_bits = 30
// result = 3 << 30 = C000_0000
// + 3FFF_FFF0 (26 bits)
// + F ( 4 bits)
for _ in 0..30 {
self.rangecoder.encode_bit(&mut 0x400, true)?;
match self.unpacked_size {
UnpackedSize::SkipWritingToHeader => {}
UnpackedSize::WriteToHeader(Some(_)) => {}
UnpackedSize::WriteToHeader(None) => {
// Write end-of-stream marker
let pos_state = input_len & 3;

// Match
self.rangecoder
.encode_bit(&mut self.is_match[pos_state], true)?;
// New distance
self.rangecoder.encode_bit(&mut 0x400, false)?;

// Dummy len, as small as possible (len = 0)
for _ in 0..4 {
self.rangecoder.encode_bit(&mut 0x400, false)?;
}

// Distance marker = 0xFFFFFFFF
// pos_slot = 63
for _ in 0..6 {
self.rangecoder.encode_bit(&mut 0x400, true)?;
}
// num_direct_bits = 30
// result = 3 << 30 = C000_0000
// + 3FFF_FFF0 (26 bits)
// + F ( 4 bits)
for _ in 0..30 {
self.rangecoder.encode_bit(&mut 0x400, true)?;
}
// = FFFF_FFFF
}
}
// = FFFF_FFFF

// Flush range coder
self.rangecoder.finish()
Expand Down
1 change: 1 addition & 0 deletions src/encode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod dumbencoder;
pub mod lzma2;
pub mod options;
mod rangecoder;
mod util;
pub mod xz;
30 changes: 30 additions & 0 deletions src/encode/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/// Options for the `lzma_compress` function
#[derive(Clone, Copy, Debug, Default)]
pub struct Options {
/// Defines whether the unpacked size should be written to the header.
/// The default is
/// [`UnpackedSize::WriteToHeader(None)`](enum.encode.UnpackedSize.html#variant.WriteValueToHeader)
pub unpacked_size: UnpackedSize,
}

/// Alternatives for handling unpacked size
#[derive(Clone, Copy, Debug)]
pub enum UnpackedSize {
/// If the value is `Some(u64)`, write the provided u64 value to the header.
/// There is currently no check in place that verifies that this is the actual number of bytes
/// provided by the input stream.
/// If the value is `None`, write the special `0xFFFF_FFFF_FFFF_FFFF` code to the header,
/// indicating that the unpacked size is unknown.
WriteToHeader(Option<u64>),
/// Do not write anything to the header. The unpacked size needs to be stored elsewhere and
/// provided when reading the file. Note that this is a non-standard way of writing LZMA data,
/// but is used by certain libraries such as
/// [OpenCTM](http://openctm.sourceforge.net/).
SkipWritingToHeader,
}

impl Default for UnpackedSize {
fn default() -> UnpackedSize {
UnpackedSize::WriteToHeader(None)
}
}
36 changes: 36 additions & 0 deletions src/encode/rangecoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,39 @@ where
self.normalize()
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::decode::rangecoder::RangeDecoder;
use std::io::BufReader;

fn encode_decode(prob_init: u16, bits: &[bool]) {
let mut buf: Vec<u8> = Vec::new();

let mut encoder = RangeEncoder::new(&mut buf);
let mut prob = prob_init;
for &b in bits {
encoder.encode_bit(&mut prob, b).unwrap();
}
encoder.finish().unwrap();

let mut bufread = BufReader::new(buf.as_slice());
let mut decoder = RangeDecoder::new(&mut bufread).unwrap();
let mut prob = prob_init;
for &b in bits {
assert_eq!(decoder.decode_bit(&mut prob).unwrap(), b);
}
assert!(decoder.is_finished_ok().unwrap());
}

#[test]
fn test_encode_decode_zeros() {
encode_decode(0x400, &[false; 10000]);
}

#[test]
fn test_encode_decode_ones() {
encode_decode(0x400, &[true; 10000]);
}
}
31 changes: 29 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,29 @@ pub mod error;
use crate::decode::lzbuffer::LZBuffer;
use std::io;

pub mod compress {
pub use crate::encode::options::*;
}

pub mod decompress {
pub use crate::decode::options::*;
}

/// Decompress LZMA data with default [`Options`](decompress/struct.Options.html).
pub fn lzma_decompress<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
) -> error::Result<()> {
let params = decode::lzma::LZMAParams::read_header(input)?;
lzma_decompress_with_options(input, output, &decompress::Options::default())
}

/// Decompress LZMA data with the provided options.
pub fn lzma_decompress_with_options<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
options: &decompress::Options,
) -> error::Result<()> {
let params = decode::lzma::LZMAParams::read_header(input, options)?;
let mut decoder = decode::lzma::new_circular(output, params)?;
let mut rangecoder = decode::rangecoder::RangeDecoder::new(input).or_else(|e| {
Err(error::Error::LZMAError(format!(
Expand All @@ -27,11 +45,20 @@ pub fn lzma_decompress<R: io::BufRead, W: io::Write>(
Ok(())
}

/// Compresses the data with default [`Options`](compress/struct.Options.html).
pub fn lzma_compress<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
) -> io::Result<()> {
let encoder = encode::dumbencoder::Encoder::from_stream(output)?;
lzma_compress_with_options(input, output, &compress::Options::default())
}

pub fn lzma_compress_with_options<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
options: &compress::Options,
) -> io::Result<()> {
let encoder = encode::dumbencoder::Encoder::from_stream(output, options)?;
encoder.process(input)
}

Expand Down

0 comments on commit 0ee7b55

Please sign in to comment.