Skip to content

Commit

Permalink
Merge pull request #50 from cccs-sadugas/memlimit
Browse files Browse the repository at this point in the history
lzbuffer: add memlimit option
  • Loading branch information
gendx committed Jul 14, 2020
2 parents acb788b + 52792cd commit 795bc5b
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 22 deletions.
49 changes: 38 additions & 11 deletions src/decode/lzbuffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub trait LZBuffer {
// Retrieve the n-th last byte
fn last_n(&self, dist: usize) -> error::Result<u8>;
// Append a literal
fn append_literal(&mut self, lit: u8) -> io::Result<()>;
fn append_literal(&mut self, lit: u8) -> error::Result<()>;
// Fetch an LZ sequence (length, distance) from inside the buffer
fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()>;
// Flush the buffer to the output
Expand All @@ -22,6 +22,7 @@ where
{
stream: &'a mut W, // Output sink
buf: Vec<u8>, // Buffer
memlimit: usize, // Buffer memory limit
len: usize, // Total number of bytes sent through the buffer
}

Expand All @@ -30,9 +31,14 @@ where
W: io::Write,
{
pub fn from_stream(stream: &'a mut W) -> Self {
Self::from_stream_with_memlimit(stream, std::usize::MAX)
}

pub fn from_stream_with_memlimit(stream: &'a mut W, memlimit: usize) -> Self {
Self {
stream,
buf: Vec::new(),
memlimit,
len: 0,
}
}
Expand Down Expand Up @@ -84,10 +90,19 @@ where
}

// Append a literal
fn append_literal(&mut self, lit: u8) -> io::Result<()> {
self.buf.push(lit);
self.len += 1;
Ok(())
fn append_literal(&mut self, lit: u8) -> error::Result<()> {
let new_len = self.len + 1;

if new_len > self.memlimit {
Err(error::Error::LZMAError(format!(
"exceeded memory limit of {}",
self.memlimit
)))
} else {
self.buf.push(lit);
self.len = new_len;
Ok(())
}
}

// Fetch an LZ sequence (length, distance) from inside the buffer
Expand Down Expand Up @@ -127,6 +142,7 @@ where
stream: &'a mut W, // Output sink
buf: Vec<u8>, // Circular buffer
dict_size: usize, // Length of the buffer
memlimit: usize, // Buffer memory limit
cursor: usize, // Current position
len: usize, // Total number of bytes sent through the buffer
}
Expand All @@ -135,12 +151,13 @@ impl<'a, W> LZCircularBuffer<'a, W>
where
W: io::Write,
{
pub fn from_stream(stream: &'a mut W, dict_size: usize) -> Self {
pub fn from_stream_with_memlimit(stream: &'a mut W, dict_size: usize, memlimit: usize) -> Self {
lzma_info!("Dict size in LZ buffer: {}", dict_size);
Self {
stream,
buf: Vec::new(),
dict_size,
memlimit,
cursor: 0,
len: 0,
}
Expand All @@ -150,11 +167,21 @@ where
*self.buf.get(index).unwrap_or(&0)
}

fn set(&mut self, index: usize, value: u8) {
if self.buf.len() < index + 1 {
self.buf.resize(index + 1, 0);
fn set(&mut self, index: usize, value: u8) -> error::Result<()> {
let new_len = index + 1;

if self.buf.len() < new_len {
if new_len <= self.memlimit {
self.buf.resize(new_len, 0);
} else {
return Err(error::Error::LZMAError(format!(
"exceeded memory limit of {}",
self.memlimit
)));
}
}
self.buf[index] = value;
Ok(())
}
}

Expand Down Expand Up @@ -195,8 +222,8 @@ where
}

// Append a literal
fn append_literal(&mut self, lit: u8) -> io::Result<()> {
self.set(self.cursor, lit);
fn append_literal(&mut self, lit: u8) -> error::Result<()> {
self.set(self.cursor, lit)?;
self.cursor += 1;
self.len += 1;

Expand Down
18 changes: 17 additions & 1 deletion src/decode/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,28 @@ pub fn new_circular<'a, W>(
output: &'a mut W,
params: LZMAParams,
) -> error::Result<DecoderState<lzbuffer::LZCircularBuffer<'a, W>>>
where
W: io::Write,
{
new_circular_with_memlimit(output, params, std::usize::MAX)
}

// Initialize decoder with circular buffer
pub fn new_circular_with_memlimit<'a, W>(
output: &'a mut W,
params: LZMAParams,
memlimit: usize,
) -> error::Result<DecoderState<lzbuffer::LZCircularBuffer<'a, W>>>
where
W: io::Write,
{
// Decoder
let decoder = DecoderState {
output: lzbuffer::LZCircularBuffer::from_stream(output, params.dict_size as usize),
output: lzbuffer::LZCircularBuffer::from_stream_with_memlimit(
output,
params.dict_size as usize,
memlimit,
),
lc: params.lc,
lp: params.lp,
pb: params.pb,
Expand Down
4 changes: 4 additions & 0 deletions src/decode/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ pub struct Options {
/// The default is
/// [`UnpackedSize::ReadFromHeader`](enum.UnpackedSize.html#variant.ReadFromHeader).
pub unpacked_size: UnpackedSize,
/// Defines whether the dictionary's dynamic size should be limited during decompression.
///
/// The default is unlimited.
pub memlimit: Option<usize>,
}

/// Alternatives for defining the unpacked size of the decoded data.
Expand Down
7 changes: 6 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ pub fn lzma_decompress_with_options<R: io::BufRead, W: io::Write>(
options: &decompress::Options,
) -> error::Result<()> {
let params = decode::lzma::LZMAParams::read_header(input, options)?;
let mut decoder = decode::lzma::new_circular(output, params)?;
let mut decoder = if let Some(memlimit) = options.memlimit {
decode::lzma::new_circular_with_memlimit(output, params, memlimit)?
} else {
decode::lzma::new_circular(output, params)?
};

let mut rangecoder = decode::rangecoder::RangeDecoder::new(input).or_else(|e| {
Err(error::Error::LZMAError(format!(
"LZMA stream too short: {}",
Expand Down
49 changes: 40 additions & 9 deletions tests/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ fn round_trip(x: &[u8]) {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader,
..Default::default()
};
round_trip_with_options(x, &encode_options, &decode_options);
assert_round_trip_with_options(x, &encode_options, &decode_options);
}

fn round_trip_no_options(x: &[u8]) {
Expand All @@ -31,7 +32,7 @@ fn round_trip_with_options(
x: &[u8],
encode_options: &lzma_rs::compress::Options,
decode_options: &lzma_rs::decompress::Options,
) {
) -> lzma_rs::error::Result<Vec<u8>> {
let mut compressed: Vec<u8> = Vec::new();
lzma_rs::lzma_compress_with_options(
&mut std::io::BufReader::new(x),
Expand All @@ -45,8 +46,19 @@ fn round_trip_with_options(
debug!("Compressed content: {:?}", compressed);
let mut bf = std::io::BufReader::new(compressed.as_slice());
let mut decomp: Vec<u8> = Vec::new();
lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options).unwrap();
assert_eq!(decomp, x)
lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options)?;
Ok(decomp)
}

fn assert_round_trip_with_options(
x: &[u8],
encode_options: &lzma_rs::compress::Options,
decode_options: &lzma_rs::decompress::Options,
) {
assert_eq!(
round_trip_with_options(x, encode_options, decode_options).unwrap(),
x
)
}

fn round_trip_file(filename: &str) {
Expand Down Expand Up @@ -170,8 +182,9 @@ fn unpacked_size_write_to_header() {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader,
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -182,8 +195,9 @@ fn unpacked_size_provided_outside() {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::UseProvided(Some(data.len() as u64)),
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -196,8 +210,9 @@ fn unpacked_size_write_some_to_header_but_use_provided_on_read() {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
data.len() as u64,
)),
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -210,8 +225,9 @@ fn unpacked_size_write_none_to_header_and_use_provided_on_read() {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
data.len() as u64,
)),
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -222,6 +238,21 @@ fn unpacked_size_write_none_to_header_and_use_provided_none_on_read() {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None),
..Default::default()
};
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
#[should_panic(expected = "exceeded memory limit of 0")]
fn memlimit() {
let data = b"Some data";
let encode_options = lzma_rs::compress::Options {
unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None),
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None),
memlimit: Some(0),
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
round_trip_with_options(&data[..], &encode_options, &decode_options).unwrap();
}

0 comments on commit 795bc5b

Please sign in to comment.