Skip to content

Commit

Permalink
Merge pull request #321 from jamesmudd/issue-318
Browse files Browse the repository at this point in the history
#318 Add handling in shuffle filter for mismatched length
  • Loading branch information
jamesmudd committed Nov 11, 2021
2 parents e0732fc + a074a96 commit c2e2453
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,17 @@ private ByteBuffer getDataBuffer(Chunk chunk) {
protected final class FilterPipelineLazyInitializer extends LazyInitializer<FilterPipeline> {
@Override
protected FilterPipeline initialize() {
logger.debug("Lazy initializing filter pipeline for '{}'", getPath());
logger.debug("Lazy initializing filter pipeline for [{}]", getPath());

// If the dataset has filters get the message
if (oh.hasMessageOfType(FilterPipelineMessage.class)) {
FilterPipelineMessage filterPipelineMessage = oh.getMessageOfType(FilterPipelineMessage.class);
return FilterManager.getPipeline(filterPipelineMessage);
FilterPipeline filterPipeline = FilterManager.getPipeline(filterPipelineMessage);
logger.info("Initialized filter pipeline [{}] for [{}]", filterPipeline, getPath());
return filterPipeline;
} else {
// No filters
logger.debug("No filters for [{}]", getPath());
return null;
}
}
Expand Down
7 changes: 7 additions & 0 deletions jhdf/src/main/java/io/jhdf/filter/ByteShuffleFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ public byte[] decode(byte[] data, int[] filterData) {
}
}

// Very special case here for mismatched length e.g use deflate then shuffle
// Not sure if there is actually any good use case
if(pos < data.length) {
// In the overrun section no shuffle is done just a straight copy
System.arraycopy(data, pos, out, pos, data.length - pos);
}

return out;
}
}
26 changes: 14 additions & 12 deletions jhdf/src/main/java/io/jhdf/filter/DeflatePipelineFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;

Expand All @@ -33,21 +34,22 @@ public String getName() {

@Override
public byte[] decode(byte[] compressedData, int[] filterData) {
try {
final Inflater inflater = new Inflater();

// Make a guess that the decompressed data is 3 times larger than compressed.
// This is a performance optimisation to avoid resizing of the stream byte
// array.
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(compressedData.length * 3)) {
// Setup the inflater
final Inflater inflater = new Inflater();
inflater.setInput(compressedData);

// Make a guess that the decompressed data is 3 times larger than compressed.
// This is a performance optimisation to avoid resizing of the stream byte
// array.
final ByteArrayOutputStream baos = new ByteArrayOutputStream(compressedData.length * 3);

final byte[] buffer = new byte[4096];

// Do the decompression
while (!inflater.finished()) {
int read = inflater.inflate(buffer);
if(read == 0) {
throw new HdfFilterException("Zero bytes inflated");
}
baos.write(buffer, 0, read);
}

Expand All @@ -57,13 +59,13 @@ public byte[] decode(byte[] compressedData, int[] filterData) {
inflater.getBytesWritten());
}

// Close the inflater
inflater.end();

return baos.toByteArray();

} catch (DataFormatException e) {
} catch (DataFormatException | IOException e) {
throw new HdfFilterException("Inflating failed", e);
} finally {
// Close the inflater
inflater.end();
}
}
}
17 changes: 17 additions & 0 deletions jhdf/src/main/java/io/jhdf/filter/FilterPipeline.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
import io.jhdf.exceptions.HdfFilterException;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

/**
* A collection of filters making up a ordered pipeline to decode chunks.
Expand All @@ -34,6 +37,14 @@ private PipelineFilterWithData(Filter filter, int[] filterData) {
private byte[] decode(byte[] data) {
return filter.decode(data, filterData);
}

@Override
public String toString() {
return "{" +
"filter=" + filter.getName() +
", filterData=" + Arrays.toString(filterData) +
'}';
}
}

private final List<PipelineFilterWithData> filters = new ArrayList<>();
Expand Down Expand Up @@ -62,4 +73,10 @@ public byte[] decode(byte[] encodedData) {
return encodedData;
}

@Override
public String toString() {
return "FilterPipeline{" +
filters.stream().map(Objects::toString).collect(Collectors.joining(" -> ")) +
'}';
}
}
60 changes: 60 additions & 0 deletions jhdf/src/test/java/io/jhdf/filter/ByteShuffleFilterTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* This file is part of jHDF. A pure Java library for accessing HDF5 files.
*
* http://jhdf.io
*
* Copyright (c) 2021 James Mudd
*
* MIT License see 'LICENSE' file
*/
package io.jhdf.filter;

import org.junit.jupiter.api.Test;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.is;

class ByteShuffleFilterTest {

@Test
void testShuffleElementSize2() {
byte[] input = new byte[]{1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
ByteShuffleFilter byteShuffleFilter = new ByteShuffleFilter();

byte[] output = byteShuffleFilter.decode(input, new int[]{2});
assertThat(output, is(new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}));
}

// See https://github.com/jamesmudd/jhdf/issues/318
// When input length is not a multiple of the element size
@Test
void testShuffleElementSize4() {
byte[] input = new byte[]{1, 5, 2, 6, 3, 7, 4, 8, 9, 10};
ByteShuffleFilter byteShuffleFilter = new ByteShuffleFilter();

byte[] output = byteShuffleFilter.decode(input, new int[]{4});
assertThat(output, is(new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}));
}

@Test
void testShuffleWithSingleValue() {
byte[] input = new byte[]{1};
ByteShuffleFilter byteShuffleFilter = new ByteShuffleFilter();

byte[] output = byteShuffleFilter.decode(input, new int[]{2});
assertThat(output, is(new byte[]{1}));
}

// See https://github.com/jamesmudd/jhdf/issues/318
// When input length is not a multiple of the element size
@Test
void testShuffleWithMismatchedLength() {
byte[] input = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, -40, -50, -60, -70};
ByteShuffleFilter byteShuffleFilter = new ByteShuffleFilter();

byte[] output = byteShuffleFilter.decode(input, new int[]{8});
assertThat(output, is(new byte[]{1, 2, 3, 4, 5, 6, 7, 8, -40, -50, -60, -70}));
}


}
58 changes: 58 additions & 0 deletions jhdf/src/test/java/io/jhdf/filter/DeflatePipelineFilterTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* This file is part of jHDF. A pure Java library for accessing HDF5 files.
*
* http://jhdf.io
*
* Copyright (c) 2021 James Mudd
*
* MIT License see 'LICENSE' file
*/
package io.jhdf.filter;

import io.jhdf.exceptions.HdfFilterException;
import org.apache.commons.lang3.StringUtils;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.junit.jupiter.api.Test;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.zip.Deflater;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.assertThrows;

class DeflatePipelineFilterTest {

@Test
void decodeWorks() {
byte[] input = StringUtils.repeat( "TestString", 50).getBytes(StandardCharsets.UTF_8);
byte[] compressedBuffer = new byte[1024];

Deflater deflater = new Deflater();
deflater.setInput(input);
deflater.finish(); // This is all the input

// Do the compression
int size = deflater.deflate(compressedBuffer, 0, input.length);
byte[] compressed = Arrays.copyOf(compressedBuffer, size);

DeflatePipelineFilter deflatePipelineFilter = new DeflatePipelineFilter();
byte[] decoded = deflatePipelineFilter.decode(compressed, new int[0]);

assertThat(decoded, is(input));
}

@Test
void decodeZeroLengthThrows() {
DeflatePipelineFilter deflatePipelineFilter = new DeflatePipelineFilter();
assertThrows(HdfFilterException.class, () -> deflatePipelineFilter.decode(new byte[0], new int[0]));
}

@Test
void decodeMalformedThrows() {
DeflatePipelineFilter deflatePipelineFilter = new DeflatePipelineFilter();
assertThrows(HdfFilterException.class, () -> deflatePipelineFilter.decode(new byte[]{1,2,3}, new int[0]));
}
}
Binary file not shown.

0 comments on commit c2e2453

Please sign in to comment.