gendx · bors · Dec 16, 2019 · Jul 11, 2019 · Dec 11, 2019 · Dec 13, 2019
diff --git a/src/decode/lzma.rs b/src/decode/lzma.rs
@@ -4,6 +4,9 @@ use crate::decode::rangecoder;
 use crate::error;
 use std::io;
 
+use crate::decompress::Options;
+use crate::decompress::UnpackedSize;
+
 pub struct LZMAParams {
     // most lc significant bits of previous byte are part of the literal context
     lc: u32, // 0..8
@@ -15,7 +18,7 @@ pub struct LZMAParams {
 }
 
 impl LZMAParams {
-    pub fn read_header<R>(input: &mut R) -> error::Result<LZMAParams>
+    pub fn read_header<R>(input: &mut R, options: &Options) -> error::Result<LZMAParams>
     where
         R: io::BufRead,
     {
@@ -58,17 +61,26 @@ impl LZMAParams {
         info!("Dict size: {}", dict_size);
 
         // Unpacked size
-        let unpacked_size_provided = input.read_u64::<LittleEndian>().or_else(|e| {
-            Err(error::Error::LZMAError(format!(
-                "LZMA header too short: {}",
-                e
-            )))
-        })?;
-        let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
-        let unpacked_size = if marker_mandatory {
-            None
-        } else {
-            Some(unpacked_size_provided)
+        let unpacked_size: Option<u64> = match options.unpacked_size {
+            UnpackedSize::ReadHeaderAndUseHeader => {
+                let unpacked_size_provided = input.read_u64::<LittleEndian>().or_else(|e| {
+                    Err(error::Error::LZMAError(format!(
+                        "LZMA header too short: {}",
+                        e
+                    )))
+                })?;
+                let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
+                if marker_mandatory {
+                    None
+                } else {
+                    Some(unpacked_size_provided)
+                }
+            },
+            UnpackedSize::ReadHeaderButUseProvided(x) => {
+                input.read_u64::<LittleEndian>()?;
+                x
+            }
+            UnpackedSize::SkipHeaderAndUseProvided(x) => x
         };
 
         info!("Unpacked size: {:?}", unpacked_size);

diff --git a/src/decode/mod.rs b/src/decode/mod.rs
@@ -1,6 +1,7 @@
 pub mod lzbuffer;
 pub mod lzma;
 pub mod lzma2;
+pub mod options;
 pub mod rangecoder;
 pub mod util;
 pub mod xz;
diff --git a/src/decode/options.rs b/src/decode/options.rs
@@ -0,0 +1,32 @@
+#[derive(Default)]
+pub struct Options {
+    /// Defines whether the unpacked size should be read from the header or provided.
+    /// The default is [`UnpackedSize::ReadHeaderAndUseHeader`](enum.UnpackedSize.html#variant.ReadHeaderAndUseHeader)
+    pub unpacked_size: UnpackedSize,
+}
+
+/// Alternatives for defining the unpacked size of the decoded data
+pub enum UnpackedSize {
+    /// Assume that the 8 bytes used to specify the unpacked size are present in the header.
+    /// If the bytes are `0xFFFF_FFFF_FFFF_FFFF`, assume that there is an end-of-payload marker in
+    /// the file.
+    /// If not, read the 8 bytes as a little-endian encoded u64.
+    ReadHeaderAndUseHeader,
+    /// Assume that there are 8 bytes representing the unpacked size present in the header.
+    /// Read it, but ignore it and use the provided value instead.
+    /// If the provided value is `None`, assume that there is an end-of-payload marker in the file.
+    /// Note that this is a non-standard way of reading LZMA data,
+    /// but is used by certain libraries such as OpenCTM.
+    ReadHeaderButUseProvided(Option<u64>),
+    /// Assume that the 8 bytes typically used to represent the unpacked size are *not* present in
+    /// the header. Use the provided value.
+    /// If the provided value is `None`, assume that there is an end-of-payload marker in the file.
+    SkipHeaderAndUseProvided(Option<u64>),
+}
+
+impl Default for UnpackedSize {
+    fn default() -> UnpackedSize {
+        UnpackedSize::ReadHeaderAndUseHeader
+    }
+}
+
diff --git a/src/encode/dumbencoder.rs b/src/encode/dumbencoder.rs
@@ -1,5 +1,6 @@
-use byteorder::{LittleEndian, WriteBytesExt};
+use crate::compress::{Options, UnpackedSize};
 use crate::encode::rangecoder;
+use byteorder::{LittleEndian, WriteBytesExt};
 use std::io;
 
 pub struct Encoder<'a, W>
@@ -19,7 +20,7 @@ impl<'a, W> Encoder<'a, W>
 where
     W: io::Write,
 {
-    pub fn from_stream(stream: &'a mut W) -> io::Result<Self> {
+    pub fn from_stream(stream: &'a mut W, options: &Options) -> io::Result<Self> {
         let dict_size = 0x800000;
 
         // Properties
@@ -32,8 +33,22 @@ where
         stream.write_u32::<LittleEndian>(dict_size)?;
 
         // Unpacked size
-        info!("Unpacked size: unknown");
-        stream.write_u64::<LittleEndian>(0xFFFF_FFFF_FFFF_FFFF)?;
+        match &options.unpacked_size {
+            UnpackedSize::WriteToHeader(unpacked_size) => {
+                let value: u64 = match unpacked_size {
+                    None => {
+                        info!("Unpacked size: unknown");
+                        0xFFFF_FFFF_FFFF_FFFF
+                    }
+                    Some(x) => {
+                        info!("Unpacked size: {}", x);
+                        *x
+                    }
+                };
+                stream.write_u64::<LittleEndian>(value)?;
+            }
+            UnpackedSize::SkipWritingToHeader => {}
+        };
 
         let encoder = Encoder {
             rangecoder: rangecoder::RangeEncoder::new(stream),

diff --git a/src/encode/mod.rs b/src/encode/mod.rs
@@ -1,5 +1,6 @@
 pub mod dumbencoder;
 pub mod lzma2;
+pub mod options;
 mod rangecoder;
 mod util;
 pub mod xz;
diff --git a/src/encode/options.rs b/src/encode/options.rs
@@ -0,0 +1,28 @@
+/// Options for the `lzma_compress` function
+#[derive(Default)]
+pub struct Options {
+    /// Defines whether the unpacked size should be written to the header and whether the value is
+    /// known.
+    /// The default is
+    /// [`UnpackedSize::WriteToHeader(None)`](enum.encode.UnpackedSize.html#variant.WriteValueToHeader)
+    pub unpacked_size: UnpackedSize,
+}
+
+/// Alternatives for handling unpacked size
+pub enum UnpackedSize {
+    /// If the value is `Some(u64)`, write the provided u64 value to the header.
+    /// There is currently no check in place that verifies that this is the actual number of bytes provided by the input stream.
+    /// If the value is `None`, write the special `0xFFFF_FFFF_FFFF_FFFF` code to the header,
+    /// indicating that the unpacked size is unknown.
+    WriteToHeader(Option<u64>),
+    /// Do not write anything to the header. The unpacked size needs to be stored elsewhere and
+    /// provided when reading the file. Note that this is a non-standard way of writing LZMA data,
+    /// but is used by certain libraries such as OpenCTM.
+    SkipWritingToHeader,
+}
+
+impl Default for UnpackedSize {
+    fn default() -> UnpackedSize {
+        UnpackedSize::WriteToHeader(None)
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -10,11 +10,29 @@ pub mod error;
 use crate::decode::lzbuffer::LZBuffer;
 use std::io;
 
+pub mod compress {
+    pub use crate::encode::options::*;
+}
+
+pub mod decompress {
+    pub use crate::decode::options::*;
+}
+
+/// Decompress LZMA data with default [`Options`](decompress/struct.Options.html).
 pub fn lzma_decompress<R: io::BufRead, W: io::Write>(
     input: &mut R,
     output: &mut W,
 ) -> error::Result<()> {
-    let params = decode::lzma::LZMAParams::read_header(input)?;
+    lzma_decompress_with_options(input, output, &decompress::Options::default())
+}
+
+/// Decompress LZMA data with the provided options
+pub fn lzma_decompress_with_options<R: io::BufRead, W: io::Write>(
+    input: &mut R,
+    output: &mut W,
+    options: &decompress::Options,
+) -> error::Result<()> {
+    let params = decode::lzma::LZMAParams::read_header(input, &options)?;
     let mut decoder = decode::lzma::new_circular(output, params)?;
     let mut rangecoder = decode::rangecoder::RangeDecoder::new(input).or_else(|e| {
         Err(error::Error::LZMAError(format!(
@@ -27,11 +45,20 @@ pub fn lzma_decompress<R: io::BufRead, W: io::Write>(
     Ok(())
 }
 
+/// Compresses the data with default [`Options`](compress/struct.Options.html).
 pub fn lzma_compress<R: io::BufRead, W: io::Write>(
     input: &mut R,
     output: &mut W,
 ) -> io::Result<()> {
-    let encoder = encode::dumbencoder::Encoder::from_stream(output)?;
+    lzma_compress_with_options(input, output, &compress::Options::default())
+}
+
+pub fn lzma_compress_with_options<R: io::BufRead, W: io::Write>(
+    input: &mut R,
+    output: &mut W,
+    options: &compress::Options,
+) -> io::Result<()> {
+    let encoder = encode::dumbencoder::Encoder::from_stream(output, options)?;
     encoder.process(input)
 }
 

diff --git a/tests/lzma.rs b/tests/lzma.rs
@@ -14,6 +14,26 @@ fn round_trip(x: &[u8]) {
     assert_eq!(decomp, x)
 }
 
+fn round_trip_with_options(
+    x: &[u8],
+    encode_options: &lzma_rs::compress::Options,
+    decode_options: &lzma_rs::decompress::Options,
+) {
+    let mut compressed: Vec<u8> = Vec::new();
+    lzma_rs::lzma_compress_with_options(
+        &mut std::io::BufReader::new(x),
+        &mut compressed,
+        encode_options,
+    )
+    .unwrap();
+    info!("Compressed {} -> {} bytes", x.len(), compressed.len());
+    debug!("Compressed content: {:?}", compressed);
+    let mut bf = std::io::BufReader::new(compressed.as_slice());
+    let mut decomp: Vec<u8> = Vec::new();
+    lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options).unwrap();
+    assert_eq!(decomp, x)
+}
+
 fn round_trip_file(filename: &str) {
     use std::io::Read;
 
@@ -113,3 +133,61 @@ fn decompress_huge_dict() {
     lzma_rs::lzma_decompress(&mut x, &mut decomp).unwrap();
     assert_eq!(decomp, b"Hello world\x0a")
 }
+
+#[test]
+fn unpacked_size_write_to_header() {
+    let data = b"Some data";
+    let encode_options = lzma_rs::compress::Options {
+        unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(
+            Some(data.len() as u64),
+        ),
+    };
+    let decode_options = lzma_rs::decompress::Options {
+        unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderAndUseHeader,
+    };
+    round_trip_with_options(&data[..], &encode_options, &decode_options);
+}
+
+#[test]
+fn unpacked_size_provided_outside() {
+    let data = b"Some data";
+    let encode_options = lzma_rs::compress::Options {
+        unpacked_size: lzma_rs::compress::UnpackedSize::SkipWritingToHeader,
+    };
+    let decode_options = lzma_rs::decompress::Options {
+        unpacked_size: lzma_rs::decompress::UnpackedSize::SkipHeaderAndUseProvided(Some(
+            data.len() as u64,
+        )),
+    };
+    round_trip_with_options(&data[..], &encode_options, &decode_options);
+}
+
+#[test]
+fn unpacked_size_write_some_to_header_but_use_provided_on_read() {
+    let data = b"Some data";
+    let encode_options = lzma_rs::compress::Options {
+        unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(
+            Some(data.len() as u64),
+        ),
+    };
+    let decode_options = lzma_rs::decompress::Options {
+        unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
+            data.len() as u64,
+        )),
+    };
+    round_trip_with_options(&data[..], &encode_options, &decode_options);
+}
+
+#[test]
+fn unpacked_size_write_none_to_header_and_use_provided_on_read() {
+    let data = b"Some data";
+    let encode_options = lzma_rs::compress::Options {
+        unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None),
+    };
+    let decode_options = lzma_rs::decompress::Options {
+        unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
+            data.len() as u64,
+        )),
+    };
+    round_trip_with_options(&data[..], &encode_options, &decode_options);
+}