Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for specifying the unpacked size outside of header #17

Merged
merged 7 commits into from
Dec 16, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions src/decode/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ use crate::decode::rangecoder;
use crate::error;
use std::io;

use crate::decompress::Options;
use crate::decompress::UnpackedSize;

pub struct LZMAParams {
// most lc significant bits of previous byte are part of the literal context
lc: u32, // 0..8
Expand All @@ -15,7 +18,7 @@ pub struct LZMAParams {
}

impl LZMAParams {
pub fn read_header<R>(input: &mut R) -> error::Result<LZMAParams>
pub fn read_header<R>(input: &mut R, options: &Options) -> error::Result<LZMAParams>
where
R: io::BufRead,
{
Expand Down Expand Up @@ -58,17 +61,26 @@ impl LZMAParams {
info!("Dict size: {}", dict_size);

// Unpacked size
let unpacked_size_provided = input.read_u64::<LittleEndian>().or_else(|e| {
Err(error::Error::LZMAError(format!(
"LZMA header too short: {}",
e
)))
})?;
let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
let unpacked_size = if marker_mandatory {
None
} else {
Some(unpacked_size_provided)
let unpacked_size: Option<u64> = match options.unpacked_size {
UnpackedSize::ReadHeaderAndUseHeader => {
let unpacked_size_provided = input.read_u64::<LittleEndian>().or_else(|e| {
Err(error::Error::LZMAError(format!(
"LZMA header too short: {}",
e
)))
})?;
let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
if marker_mandatory {
None
} else {
Some(unpacked_size_provided)
}
},
UnpackedSize::ReadHeaderButUseProvided(x) => {
input.read_u64::<LittleEndian>()?;
x
}
UnpackedSize::SkipHeaderAndUseProvided(x) => x
};

info!("Unpacked size: {:?}", unpacked_size);
Expand Down
1 change: 1 addition & 0 deletions src/decode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod lzbuffer;
pub mod lzma;
pub mod lzma2;
pub mod options;
pub mod rangecoder;
pub mod util;
pub mod xz;
32 changes: 32 additions & 0 deletions src/decode/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#[derive(Default)]
pub struct Options {
/// Defines whether the unpacked size should be read from the header or provided.
/// The default is [`UnpackedSize::ReadHeaderAndUseHeader`](enum.UnpackedSize.html#variant.ReadHeaderAndUseHeader)
pub unpacked_size: UnpackedSize,
}

/// Alternatives for defining the unpacked size of the decoded data
pub enum UnpackedSize {
/// Assume that the 8 bytes used to specify the unpacked size are present in the header.
/// If the bytes are `0xFFFF_FFFF_FFFF_FFFF`, assume that there is an end-of-payload marker in
/// the file.
/// If not, read the 8 bytes as a little-endian encoded u64.
ReadHeaderAndUseHeader,
dragly marked this conversation as resolved.
Show resolved Hide resolved
/// Assume that there are 8 bytes representing the unpacked size present in the header.
/// Read it, but ignore it and use the provided value instead.
/// If the provided value is `None`, assume that there is an end-of-payload marker in the file.
/// Note that this is a non-standard way of reading LZMA data,
/// but is used by certain libraries such as OpenCTM.
dragly marked this conversation as resolved.
Show resolved Hide resolved
ReadHeaderButUseProvided(Option<u64>),
/// Assume that the 8 bytes typically used to represent the unpacked size are *not* present in
/// the header. Use the provided value.
/// If the provided value is `None`, assume that there is an end-of-payload marker in the file.
SkipHeaderAndUseProvided(Option<u64>),
dragly marked this conversation as resolved.
Show resolved Hide resolved
}

impl Default for UnpackedSize {
fn default() -> UnpackedSize {
UnpackedSize::ReadHeaderAndUseHeader
}
}

23 changes: 19 additions & 4 deletions src/encode/dumbencoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use byteorder::{LittleEndian, WriteBytesExt};
use crate::compress::{Options, UnpackedSize};
use crate::encode::rangecoder;
use byteorder::{LittleEndian, WriteBytesExt};
use std::io;

pub struct Encoder<'a, W>
Expand All @@ -19,7 +20,7 @@ impl<'a, W> Encoder<'a, W>
where
W: io::Write,
{
pub fn from_stream(stream: &'a mut W) -> io::Result<Self> {
pub fn from_stream(stream: &'a mut W, options: &Options) -> io::Result<Self> {
let dict_size = 0x800000;

// Properties
Expand All @@ -32,8 +33,22 @@ where
stream.write_u32::<LittleEndian>(dict_size)?;

// Unpacked size
info!("Unpacked size: unknown");
stream.write_u64::<LittleEndian>(0xFFFF_FFFF_FFFF_FFFF)?;
match &options.unpacked_size {
UnpackedSize::WriteToHeader(unpacked_size) => {
let value: u64 = match unpacked_size {
None => {
info!("Unpacked size: unknown");
0xFFFF_FFFF_FFFF_FFFF
}
Some(x) => {
info!("Unpacked size: {}", x);
*x
}
};
stream.write_u64::<LittleEndian>(value)?;
}
UnpackedSize::SkipWritingToHeader => {}
};

let encoder = Encoder {
rangecoder: rangecoder::RangeEncoder::new(stream),
Expand Down
1 change: 1 addition & 0 deletions src/encode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod dumbencoder;
pub mod lzma2;
pub mod options;
mod rangecoder;
mod util;
pub mod xz;
28 changes: 28 additions & 0 deletions src/encode/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/// Options for the `lzma_compress` function
#[derive(Default)]
pub struct Options {
/// Defines whether the unpacked size should be written to the header and whether the value is
/// known.
dragly marked this conversation as resolved.
Show resolved Hide resolved
/// The default is
/// [`UnpackedSize::WriteToHeader(None)`](enum.encode.UnpackedSize.html#variant.WriteValueToHeader)
pub unpacked_size: UnpackedSize,
}

/// Alternatives for handling unpacked size
pub enum UnpackedSize {
/// If the value is `Some(u64)`, write the provided u64 value to the header.
/// There is currently no check in place that verifies that this is the actual number of bytes provided by the input stream.
dragly marked this conversation as resolved.
Show resolved Hide resolved
/// If the value is `None`, write the special `0xFFFF_FFFF_FFFF_FFFF` code to the header,
/// indicating that the unpacked size is unknown.
WriteToHeader(Option<u64>),
/// Do not write anything to the header. The unpacked size needs to be stored elsewhere and
/// provided when reading the file. Note that this is a non-standard way of writing LZMA data,
/// but is used by certain libraries such as OpenCTM.
dragly marked this conversation as resolved.
Show resolved Hide resolved
SkipWritingToHeader,
}

impl Default for UnpackedSize {
fn default() -> UnpackedSize {
UnpackedSize::WriteToHeader(None)
}
}
31 changes: 29 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,29 @@ pub mod error;
use crate::decode::lzbuffer::LZBuffer;
use std::io;

pub mod compress {
pub use crate::encode::options::*;
}

pub mod decompress {
pub use crate::decode::options::*;
}

/// Decompress LZMA data with default [`Options`](decompress/struct.Options.html).
pub fn lzma_decompress<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
) -> error::Result<()> {
let params = decode::lzma::LZMAParams::read_header(input)?;
lzma_decompress_with_options(input, output, &decompress::Options::default())
}

/// Decompress LZMA data with the provided options
dragly marked this conversation as resolved.
Show resolved Hide resolved
pub fn lzma_decompress_with_options<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
options: &decompress::Options,
) -> error::Result<()> {
let params = decode::lzma::LZMAParams::read_header(input, &options)?;
dragly marked this conversation as resolved.
Show resolved Hide resolved
let mut decoder = decode::lzma::new_circular(output, params)?;
let mut rangecoder = decode::rangecoder::RangeDecoder::new(input).or_else(|e| {
Err(error::Error::LZMAError(format!(
Expand All @@ -27,11 +45,20 @@ pub fn lzma_decompress<R: io::BufRead, W: io::Write>(
Ok(())
}

/// Compresses the data with default [`Options`](compress/struct.Options.html).
pub fn lzma_compress<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
) -> io::Result<()> {
let encoder = encode::dumbencoder::Encoder::from_stream(output)?;
lzma_compress_with_options(input, output, &compress::Options::default())
}

pub fn lzma_compress_with_options<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
options: &compress::Options,
) -> io::Result<()> {
let encoder = encode::dumbencoder::Encoder::from_stream(output, options)?;
encoder.process(input)
}

Expand Down
78 changes: 78 additions & 0 deletions tests/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,26 @@ fn round_trip(x: &[u8]) {
assert_eq!(decomp, x)
}

fn round_trip_with_options(
x: &[u8],
encode_options: &lzma_rs::compress::Options,
decode_options: &lzma_rs::decompress::Options,
) {
let mut compressed: Vec<u8> = Vec::new();
lzma_rs::lzma_compress_with_options(
&mut std::io::BufReader::new(x),
&mut compressed,
encode_options,
)
.unwrap();
info!("Compressed {} -> {} bytes", x.len(), compressed.len());
debug!("Compressed content: {:?}", compressed);
let mut bf = std::io::BufReader::new(compressed.as_slice());
let mut decomp: Vec<u8> = Vec::new();
lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options).unwrap();
assert_eq!(decomp, x)
}

fn round_trip_file(filename: &str) {
use std::io::Read;

Expand Down Expand Up @@ -113,3 +133,61 @@ fn decompress_huge_dict() {
lzma_rs::lzma_decompress(&mut x, &mut decomp).unwrap();
assert_eq!(decomp, b"Hello world\x0a")
}

#[test]
fn unpacked_size_write_to_header() {
let data = b"Some data";
let encode_options = lzma_rs::compress::Options {
unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(
Some(data.len() as u64),
),
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderAndUseHeader,
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
fn unpacked_size_provided_outside() {
let data = b"Some data";
let encode_options = lzma_rs::compress::Options {
unpacked_size: lzma_rs::compress::UnpackedSize::SkipWritingToHeader,
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::SkipHeaderAndUseProvided(Some(
dragly marked this conversation as resolved.
Show resolved Hide resolved
data.len() as u64,
)),
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
fn unpacked_size_write_some_to_header_but_use_provided_on_read() {
let data = b"Some data";
let encode_options = lzma_rs::compress::Options {
unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(
Some(data.len() as u64),
),
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
dragly marked this conversation as resolved.
Show resolved Hide resolved
data.len() as u64,
)),
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
fn unpacked_size_write_none_to_header_and_use_provided_on_read() {
let data = b"Some data";
let encode_options = lzma_rs::compress::Options {
unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None),
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
data.len() as u64,
)),
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
}