From ebcc95b705b35c723920cceeba832232a6b7d544 Mon Sep 17 00:00:00 2001 From: Andre F de Miranda Date: Sat, 14 Oct 2017 17:57:19 +1100 Subject: [PATCH] COMPRESS-423 - Add ZStandard decompression support using Zstd-JNI --- pom.xml | 6 + .../compressors/CompressorStreamFactory.java | 25 ++- .../zstandard/ZstdCompressorInputStream.java | 95 +++++++++++ .../compressors/zstandard/ZstdUtils.java | 88 ++++++++++ .../ZstdCompressorInputStreamTest.java | 157 ++++++++++++++++++ src/test/resources/bla.tar.zst | Bin 0 -> 473 bytes src/test/resources/zstandard.testdata | 3 + src/test/resources/zstandard.testdata.zst | Bin 0 -> 94 bytes 8 files changed, 372 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java create mode 100644 src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java create mode 100644 src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java create mode 100644 src/test/resources/bla.tar.zst create mode 100644 src/test/resources/zstandard.testdata create mode 100644 src/test/resources/zstandard.testdata.zst diff --git a/pom.xml b/pom.xml index 7eca238994..2051463c1a 100644 --- a/pom.xml +++ b/pom.xml @@ -73,6 +73,12 @@ jar, tar, zip, dump, 7z, arj. 4.12 test + + com.github.luben + zstd-jni + 1.3.1-1 + true + org.brotli dec diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java index 4bd22aac46..b4469635f6 100644 --- a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java @@ -55,6 +55,8 @@ import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; import org.apache.commons.compress.compressors.xz.XZUtils; import org.apache.commons.compress.compressors.z.ZCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdUtils; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.Lists; import org.apache.commons.compress.utils.ServiceLoaderIterator; @@ -190,6 +192,14 @@ public class CompressorStreamFactory implements CompressorStreamProvider { */ public static final String LZ4_FRAMED = "lz4-framed"; + /** + * Constant (value {@value}) used to identify the ZStandard compression + * algorithm. Not supported as an output stream type. + * + * @since 1.15 + */ + public static final String ZSTANDARD = "zst"; + /** * Constructs a new sorted map from input stream provider names to provider * objects. @@ -279,7 +289,7 @@ private static ArrayList findCompressorStreamProviders public static String getBrotli() { return BROTLI; } - + public static String getBzip2() { return BZIP2; } @@ -328,6 +338,10 @@ public static String getLZ4Block() { return LZ4_BLOCK; } + public static String getZstandard() { + return ZSTANDARD; + } + static void putAll(final Set names, final CompressorStreamProvider provider, final TreeMap map) { for (final String name : names) { @@ -555,6 +569,13 @@ public CompressorInputStream createCompressorInputStream(final String name, fina return new XZCompressorInputStream(in, actualDecompressConcatenated, memoryLimitInKb); } + if (ZSTANDARD.equalsIgnoreCase(name)) { + if (!ZstdUtils.isZstdCompressionAvailable()) { + throw new CompressorException("XZ compression is not available."); + } + return new ZstdCompressorInputStream(in); + } + if (LZMA.equalsIgnoreCase(name)) { if (!LZMAUtils.isLZMACompressionAvailable()) { throw new CompressorException("LZMA compression is not available"); @@ -701,7 +722,7 @@ public Boolean getDecompressUntilEOF() { @Override public Set getInputStreamCompressorNames() { return Sets.newHashSet(GZIP, BROTLI, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_RAW, SNAPPY_FRAMED, Z, LZ4_BLOCK, - LZ4_FRAMED); + LZ4_FRAMED, ZSTANDARD); } @Override diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java new file mode 100644 index 0000000000..1e5dd8d86c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + + +import java.io.IOException; +import java.io.InputStream; + +import com.github.luben.zstd.ZstdInputStream; +import org.apache.commons.compress.compressors.CompressorInputStream; + +/** + * {@link CompressorInputStream} implementation to decode Zstandard encoded stream. + * Library relies on Zstandard JNI + * + * @since 1.15 + */ +public class ZstdCompressorInputStream extends CompressorInputStream { + + private final com.github.luben.zstd.ZstdInputStream decIS; + + public ZstdCompressorInputStream(final InputStream in) throws IOException { + this.decIS = new ZstdInputStream(in); + } + + @Override + public int available() throws IOException { + return decIS.available(); + } + + @Override + public void close() throws IOException { + decIS.close(); + } + + @Override + public int read(final byte[] b) throws IOException { + return decIS.read(b); + } + + @Override + public long skip(final long n) throws IOException { + return decIS.skip(n); + } + + @Override + public void mark(final int readlimit) { + decIS.mark(readlimit); + } + + @Override + public boolean markSupported() { + return decIS.markSupported(); + } + + @Override + public int read() throws IOException { + final int ret = decIS.read(); + count(ret == -1 ? 0 : 1); + return ret; + } + + @Override + public int read(final byte[] buf, final int off, final int len) throws IOException { + final int ret = decIS.read(buf, off, len); + count(ret); + return ret; + } + + @Override + public String toString() { + return decIS.toString(); + } + + @Override + public void reset() throws IOException { + decIS.reset(); + } + +} \ No newline at end of file diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java new file mode 100644 index 0000000000..0eb8fa15f0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.zstandard; + +/** + * Utility code for the Zstandard compression format. + * @ThreadSafe + * @since 1.14 + */ +public class ZstdUtils { + + static enum CachedAvailability { + DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE + } + + private static volatile CachedAvailability cachedZstdAvailability; + + static { + cachedZstdAvailability = CachedAvailability.DONT_CACHE; + try { + Class.forName("org.osgi.framework.BundleEvent"); + } catch (final Exception ex) { // NOSONAR + setCacheZstdAvailablity(true); + } + } + + /** Private constructor to prevent instantiation of this utility class. */ + private ZstdUtils() { + } + + /** + * Are the classes required to support Zstandard compression available? + * @return true if the classes required to support Zstandard compression are available + */ + public static boolean isZstdCompressionAvailable() { + final CachedAvailability cachedResult = cachedZstdAvailability; + if (cachedResult != CachedAvailability.DONT_CACHE) { + return cachedResult == CachedAvailability.CACHED_AVAILABLE; + } + return internalIsZstdCompressionAvailable(); + } + + private static boolean internalIsZstdCompressionAvailable() { + try { + Class.forName("com.github.luben.zstd.ZstdInputStream"); + return true; + } catch (NoClassDefFoundError | Exception error) { + return false; + } + } + + /** + * Whether to cache the result of the Zstandard for Java check. + * + *

This defaults to {@code false} in an OSGi environment and {@code true} otherwise.

+ * @param doCache whether to cache the result + */ + public static void setCacheZstdAvailablity(final boolean doCache) { + if (!doCache) { + cachedZstdAvailability = CachedAvailability.DONT_CACHE; + } else if (cachedZstdAvailability == CachedAvailability.DONT_CACHE) { + final boolean hasZstd = internalIsZstdCompressionAvailable(); + cachedZstdAvailability = hasZstd ? CachedAvailability.CACHED_AVAILABLE + : CachedAvailability.CACHED_UNAVAILABLE; + } + } + + // only exists to support unit tests + static CachedAvailability getCachedZstdAvailability() { + return cachedZstdAvailability; + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java new file mode 100644 index 0000000000..5ed276c74b --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class ZstdCompressorInputStreamTest extends AbstractTestCase { + + /** + * Test bridge works fine + * @throws {@link IOException} + */ + @Test + public void testZstdDecode() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + final File expected = getFile("zstandard.testdata"); + try (InputStream inputStream = new FileInputStream(input); + InputStream expectedStream = new FileInputStream(expected); + ZstdCompressorInputStream zstdInputStream = new ZstdCompressorInputStream(inputStream)) { + final byte[] b = new byte[97]; + IOUtils.readFully(expectedStream, b); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int readByte = -1; + while((readByte = zstdInputStream.read()) != -1) { + bos.write(readByte); + } + Assert.assertArrayEquals(b, bos.toByteArray()); + } + } + + @Test + public void testCachingIsEnabledByDefaultAndZstdUtilsPresent() { + assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, ZstdUtils.getCachedZstdAvailability()); + assertTrue(ZstdUtils.isZstdCompressionAvailable()); + } + + @Test + public void testCanTurnOffCaching() { + try { + ZstdUtils.setCacheZstdAvailablity(false); + assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, ZstdUtils.getCachedZstdAvailability()); + assertTrue(ZstdUtils.isZstdCompressionAvailable()); + } finally { + ZstdUtils.setCacheZstdAvailablity(true); + } + } + + @Test + public void testTurningOnCachingReEvaluatesAvailability() { + try { + ZstdUtils.setCacheZstdAvailablity(false); + assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, ZstdUtils.getCachedZstdAvailability()); + ZstdUtils.setCacheZstdAvailablity(true); + assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, ZstdUtils.getCachedZstdAvailability()); + } finally { + ZstdUtils.setCacheZstdAvailablity(true); + } + } + + @Test + public void shouldBeAbleToSkipAByte() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + Assert.assertEquals(1, in.skip(1)); + in.close(); + } + } + + @Test + public void singleByteReadWorksAsExpected() throws IOException { + + final File input = getFile("zstandard.testdata.zst"); + + final File original = getFile("zstandard.testdata"); + final long originalFileLength = original.length(); + + byte[] originalFileContent = new byte[((int) originalFileLength)]; + + try (InputStream ois = new FileInputStream(original)) { + ois.read(originalFileContent); + } + + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + + Assert.assertEquals(originalFileContent[0], in.read()); + in.close(); + } + } + + @Test + public void singleByteReadReturnsMinusOneAtEof() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void testZstandardUnarchive() throws Exception { + final File input = getFile("bla.tar.zst"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("zst", is); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + +} diff --git a/src/test/resources/bla.tar.zst b/src/test/resources/bla.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..d5fd6e06e6efeb3a29ca93e95f517c891e06edfc GIT binary patch literal 473 zcmV;~0Ve(^wJ-f-04H4z0LB(fCJ>Em1AD~YTtlu;1AigR)kd)MYBxo>U~JJ)%Lq#e zMg$J$Y%06_pk08_wh7W6=%9T#8&<~q&Rx9~Q(9L$T=%5@p$T3uUdW?DXvHzOYz zVU!-VGY@p{?kFP&Wp_8TO)idK`!@^9%vVD&lQKw(ptK|iiX4lgMIc28q1YW|86>SM zMogQ9t(ch`gXUGY!mj;WFfkknY`IRm*K=HVu>%4TX;1<5C)MLkOSKeAMeEzjn{T%& zqgd@ItgRpw-A4K8P5P9INaIa+cN#hpe4KXbXka`d4^+aN{zRt{2MQu-AWRFBIn8@q z|F;RyEa*15aN18~e6Icb=9`=*aj8V&U}!KLs5dDGb1n;;aDKb0+R^$JtxlJPExhRu zg@|^FP6`hQ(}3ltI{?NGdJyXr^mG!FVGe91P#cWNm)Wof%&C%dRM0pI0vf^s84gI& zL#nQzbL@cd)!VWmXz-N5p(cRC*lg2NEtvK}h0XirdNs#tcGF{;=z8$N99@Ue>(k_MyW2C;|;SVG~3mD$!it)c(qeTJ4Sa#)3!Qw3- z1aMd?Tj`N(sIl01w)7wLO+}( APXGV_ literal 0 HcmV?d00001