From 731edbd6306b36db0d70708101cbee3631b550c4 Mon Sep 17 00:00:00 2001 From: depryf Date: Mon, 24 Jul 2023 16:40:05 -0400 Subject: [PATCH] Fixed sonarcloud warnings --- build.gradle | 1 + .../com/imsweb/seerutils/SeerLRUCache.java | 1 + .../java/com/imsweb/seerutils/SeerUtils.java | 15 +- .../zip/ZipArchiveThresholdInputStream.java | 204 ++++++++++++++++++ .../zip/ZipEntryTooLargeException.java | 20 ++ .../ZipInvalidCompressionRatioException.java | 20 ++ .../imsweb/seerutils/zip/ZipSecureFile.java | 117 ++++++++++ .../com/imsweb/seerutils/SeerUtilsTest.java | 21 +- 8 files changed, 391 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/imsweb/seerutils/zip/ZipArchiveThresholdInputStream.java create mode 100644 src/main/java/com/imsweb/seerutils/zip/ZipEntryTooLargeException.java create mode 100644 src/main/java/com/imsweb/seerutils/zip/ZipInvalidCompressionRatioException.java create mode 100644 src/main/java/com/imsweb/seerutils/zip/ZipSecureFile.java diff --git a/build.gradle b/build.gradle index 3d4521f..c143560 100644 --- a/build.gradle +++ b/build.gradle @@ -22,6 +22,7 @@ repositories { dependencies { implementation 'org.apache.commons:commons-lang3:3.12.0' + implementation 'org.apache.commons:commons-compress:1.22' implementation 'commons-io:commons-io:2.13.0' testImplementation 'junit:junit:4.13.2' diff --git a/src/main/java/com/imsweb/seerutils/SeerLRUCache.java b/src/main/java/com/imsweb/seerutils/SeerLRUCache.java index 5a1124a..3cd9da4 100644 --- a/src/main/java/com/imsweb/seerutils/SeerLRUCache.java +++ b/src/main/java/com/imsweb/seerutils/SeerLRUCache.java @@ -11,6 +11,7 @@ * @param * @param */ +@SuppressWarnings({"java:S2160", "unused"}) // override equals, class not used public class SeerLRUCache extends LinkedHashMap { private static final long serialVersionUID = 4701170688038236784L; diff --git a/src/main/java/com/imsweb/seerutils/SeerUtils.java b/src/main/java/com/imsweb/seerutils/SeerUtils.java index ff79359..ac8877b 100644 --- a/src/main/java/com/imsweb/seerutils/SeerUtils.java +++ b/src/main/java/com/imsweb/seerutils/SeerUtils.java @@ -4,7 +4,6 @@ package com.imsweb.seerutils; import java.io.File; -import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; @@ -26,13 +25,15 @@ import java.util.zip.GZIPOutputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; -import java.util.zip.ZipInputStream; import java.util.zip.ZipOutputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; +import com.imsweb.seerutils.zip.ZipSecureFile; + /** * This class provides shared functionality that can be used by all the other modules in SEER*Utils. */ @@ -727,9 +728,11 @@ public static void unzipFile(File from, File to) throws IOException { if (!to.isDirectory()) throw new IOException("Target is not a directory."); - try (ZipFile file = new ZipFile(from); FileInputStream fis = new FileInputStream(from); ZipInputStream zipInput = new ZipInputStream(fis)) { - ZipEntry entry = zipInput.getNextEntry(); - while (entry != null) { + try (ZipSecureFile file = new ZipSecureFile(from)) { + Enumeration entries = file.getEntries(); + while (entries.hasMoreElements()) { + ZipArchiveEntry entry = entries.nextElement(); + File target = new File(to, entry.getName()); if (!target.getParentFile().exists()) if (!target.getParentFile().mkdirs()) @@ -744,8 +747,6 @@ public static void unzipFile(File from, File to) throws IOException { copyInputStreamToOutputStream(file.getInputStream(entry), fos); } } - - entry = zipInput.getNextEntry(); } } } diff --git a/src/main/java/com/imsweb/seerutils/zip/ZipArchiveThresholdInputStream.java b/src/main/java/com/imsweb/seerutils/zip/ZipArchiveThresholdInputStream.java new file mode 100644 index 0000000..6ebe28e --- /dev/null +++ b/src/main/java/com/imsweb/seerutils/zip/ZipArchiveThresholdInputStream.java @@ -0,0 +1,204 @@ +package com.imsweb.seerutils.zip; + +import java.io.EOFException; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.ZipException; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.compress.utils.InputStreamStatistics; + +@SuppressWarnings("unused") +public class ZipArchiveThresholdInputStream extends FilterInputStream { + + // don't alert for expanded sizes smaller than 100k + private static final long _GRACE_ENTRY_SIZE = 100 * 1024L; + + private static final String _MAX_ENTRY_SIZE_MSG = "The file exceeded the maximum entry size allowed"; + + private static final String _MIN_INFLATE_RATIO_MSG = "The file exceeded the maximum compression ratio allowed"; + + private double _minInflateRatio; + private long _maxEntrySize; + + /** + * the reference to the current entry is only used for a more detailed log message in case of an error + */ + private ZipArchiveEntry _entry; + private boolean _guardState = true; + + public ZipArchiveThresholdInputStream(InputStream is) { + super(is); + + if (!(is instanceof InputStreamStatistics)) + throw new IllegalArgumentException("InputStream of class " + is.getClass() + " is not implementing InputStreamStatistics."); + + // set defaults but they will always be set by ZipSecureFile.getInputStream + _minInflateRatio = 0.01d; + _maxEntrySize = 0xFFFFFFFFL; + } + + /** + * Sets the zip entry for a detailed logging + * @param entry the entry + */ + void setEntry(ZipArchiveEntry entry) { + this._entry = entry; + } + + void setMaxEntrySize(long maxEntrySize) { + _maxEntrySize = maxEntrySize; + } + + void setMinInflateRatio(double ratio) { + _minInflateRatio = ratio; + } + + @Override + public int read() throws IOException { + int b = super.read(); + if (b > -1) + checkThreshold(); + return b; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int cnt = super.read(b, off, len); + if (cnt > -1) + checkThreshold(); + return cnt; + } + + @Override + public long skip(long n) throws IOException { + long cnt = skipFully(super.in, n); + if (cnt > 0) + checkThreshold(); + return cnt; + } + + /** + * De-/activate threshold check. + * A disabled guard might make sense, when POI is processing its own temporary data (see #59743) + * @param guardState {@code true} (= default) enables the threshold check + */ + public void setGuardState(boolean guardState) { + this._guardState = guardState; + } + + private void checkThreshold() throws IOException { + if (!_guardState) + return; + + final InputStreamStatistics stats = (InputStreamStatistics)in; + final long payloadSize = stats.getUncompressedCount(); + + long rawSize; + try { + rawSize = stats.getCompressedCount(); + } + catch (NullPointerException e) { + // this can happen with a very specially crafted file (see https://issues.apache.org/jira/browse/COMPRESS-598 for a related bug-report) + // therefore we try to handle this gracefully for now this try/catch can be removed when COMPRESS-598 is fixed + rawSize = 0; + } + + final String entryName = _entry == null ? "not set" : _entry.getName(); + + // check the file size first, in case we are working on uncompressed streams; only check is max entry size is greater than 0 + if (_maxEntrySize > 0 && payloadSize > _maxEntrySize) + throw new ZipEntryTooLargeException(_MAX_ENTRY_SIZE_MSG); + + // don't alert for small expanded size + if (payloadSize <= _GRACE_ENTRY_SIZE) + return; + + // check the inflate ratio if min inflate ratio is greater than zero + double ratio = rawSize / (double)payloadSize; + if (_minInflateRatio > 0.0d && ratio >= _minInflateRatio) + return; + + // one of the limits was reached, report it + throw new ZipInvalidCompressionRatioException(_MIN_INFLATE_RATIO_MSG); + } + + ZipArchiveEntry getNextEntry() throws IOException { + if (!(in instanceof ZipArchiveInputStream)) + throw new IllegalStateException("getNextEntry() is only allowed for stream based zip processing."); + + try { + _entry = ((ZipArchiveInputStream)in).getNextZipEntry(); + return _entry; + } + catch (ZipException ze) { + if (ze.getMessage().startsWith("Unexpected record signature")) + throw new IllegalStateException("No valid entries or contents found, this is not a valid file", ze); + + throw ze; + } + catch (EOFException e) { + return null; + } + } + + /** + * Skips bytes from an input byte stream. + * This implementation guarantees that it will read as many bytes + * as possible before giving up; this may not always be the case for + * skip() implementations in subclasses of {@link InputStream}. + *

+ * Note that the implementation uses {@link InputStream#read(byte[], int, int)} rather + * than delegating to {@link InputStream#skip(long)}. + * This means that the method may be considerably less efficient than using the actual skip implementation, + * this is done to guarantee that the correct number of bytes are skipped. + *

+ * This mimics POI's readFully(InputStream, byte[]). + *

+ * If the end of file is reached before any bytes are read, returns {@code -1}. If + * the end of the file is reached after some bytes are read, returns the + * number of bytes read. If the end of the file isn't reached before {@code len} + * bytes have been read, will return {@code len} bytes. + * + *

+ * Copied nearly verbatim from commons-io 41a3e9c + * @param input byte stream to skip + * @param toSkip number of bytes to skip. + * @return number of bytes actually skipped. + * @throws IOException if there is a problem reading the file + * @throws IllegalArgumentException if toSkip is negative + * @see InputStream#skip(long) + */ + static long skipFully(final InputStream input, final long toSkip) throws IOException { + final int skipBufferSize = 2048; + + if (toSkip < 0) + throw new IllegalArgumentException("Skip count must be non-negative, actual: " + toSkip); + if (toSkip == 0) + return 0L; + + /* + * N.B. no need to synchronize this because: - we don't care if the buffer is created multiple times (the data + * is ignored) - we always use the same size buffer, so if it it is recreated it will still be OK (if the buffer + * size were variable, we would need to synch. to ensure some other thread did not create a smaller one) + */ + byte[] skipByteBuffer = new byte[skipBufferSize]; + + long remain = toSkip; + while (remain > 0) { + // See https://issues.apache.org/jira/browse/IO-203 for why we use read() rather than delegating to skip() + final long n = input.read(skipByteBuffer, 0, (int)Math.min(remain, skipBufferSize)); + if (n < 0) // EOF + break; + + remain -= n; + } + + if (toSkip == remain) + return -1L; + + return toSkip - remain; + } +} diff --git a/src/main/java/com/imsweb/seerutils/zip/ZipEntryTooLargeException.java b/src/main/java/com/imsweb/seerutils/zip/ZipEntryTooLargeException.java new file mode 100644 index 0000000..e017d4b --- /dev/null +++ b/src/main/java/com/imsweb/seerutils/zip/ZipEntryTooLargeException.java @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2022 Information Management Services, Inc. + */ +package com.imsweb.seerutils.zip; + +import java.io.IOException; + +public class ZipEntryTooLargeException extends IOException { + + /** + * Constructs an {@code ZipEntryTooLargeException} with a given message + * {@code String}. No underlying cause is set; + * {@code getCause} will return {@code null}. + * @param message the error message. + * @see #getMessage + */ + public ZipEntryTooLargeException(String message) { + super(message); + } +} diff --git a/src/main/java/com/imsweb/seerutils/zip/ZipInvalidCompressionRatioException.java b/src/main/java/com/imsweb/seerutils/zip/ZipInvalidCompressionRatioException.java new file mode 100644 index 0000000..5d570ed --- /dev/null +++ b/src/main/java/com/imsweb/seerutils/zip/ZipInvalidCompressionRatioException.java @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2022 Information Management Services, Inc. + */ +package com.imsweb.seerutils.zip; + +import java.io.IOException; + +public class ZipInvalidCompressionRatioException extends IOException { + + /** + * Constructs an {@code ZipInvalidCompressionRatioException} with a given message + * {@code String}. No underlying cause is set; + * {@code getCause} will return {@code null}. + * @param message the error message. + * @see #getMessage + */ + public ZipInvalidCompressionRatioException(String message) { + super(message); + } +} diff --git a/src/main/java/com/imsweb/seerutils/zip/ZipSecureFile.java b/src/main/java/com/imsweb/seerutils/zip/ZipSecureFile.java new file mode 100644 index 0000000..708bad8 --- /dev/null +++ b/src/main/java/com/imsweb/seerutils/zip/ZipSecureFile.java @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2023 Information Management Services, Inc. + */ +package com.imsweb.seerutils.zip; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.Enumeration; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.io.IOUtils; + +/** + * This class wraps a {@link ZipFile} in order to check the entries for zip bombs + * while reading the archive. + */ +@SuppressWarnings("unused") +public class ZipSecureFile extends ZipFile { + + /** + * The ratio between de- and inflated bytes to detect zip-bomb. It defaults to 0.75% (= 0.0075d), i.e. when the compression is better than + * 1% for any given read package part, the parsing will fail indicating a Zip-Bomb. + */ + private final double _minInflateRatio; + + /** + * Sets the maximum file size of a single zip entry. It defaults to 4GB, i.e. the 32-bit zip format maximum. + */ + private final long _maxEntrySize; + + public ZipSecureFile(File file) throws IOException { + this(file, 0.0075, 4294967296L); + } + + public ZipSecureFile(File file, double minInflationRatio, long maxEntrySize) throws IOException { + super(file); + + _minInflateRatio = minInflationRatio; + _maxEntrySize = maxEntrySize; + } + + /** + * Returns the current minimum compression rate that is used. + * @return The min accepted compression-ratio. + */ + public double getMinInflateRatio() { + return _minInflateRatio; + } + + /** + * Returns the current maximum allowed uncompressed file size. + * @return The max accepted uncompressed file size. + */ + public long getMaxEntrySize() { + return _maxEntrySize; + } + + /** + * Returns an input stream for reading the contents of the specified zip file entry. + *

+ * Closing this ZIP file will, in turn, close all input streams that have been returned by invocations of this method. + * @param entry the zip file entry + * @return the input stream for reading the contents of the specified zip file entry. + * @throws IOException if an I/O error has occurred + * @throws IllegalStateException if the zip file has been closed + */ + @Override + public InputStream getInputStream(ZipArchiveEntry entry) throws IOException { + ZipArchiveThresholdInputStream is = new ZipArchiveThresholdInputStream(super.getInputStream(entry)); + + is.setEntry(entry); + is.setMinInflateRatio(_minInflateRatio); + is.setMaxEntrySize(_maxEntrySize); + + return is; + } + + /** + * In-memory test of a ZIP file to ensure it is not a zip-bomb + * @param url location of zip file + * @param maxEntries maximum number of entries allowed in teh ZIP file + * @return true if a possible zip-bomb + * @throws IOException if a problem with reading the file + */ + public static boolean isZipBomb(URL url, int maxEntries) throws IOException { + boolean detected = false; + long numEntries = 0; + + try (ZipSecureFile z = new ZipSecureFile(new File(url.getFile()))) { + Enumeration entries = z.getEntries(); + while (entries.hasMoreElements()) { + ZipArchiveEntry entry = entries.nextElement(); + + numEntries++; + if (numEntries > maxEntries) { + detected = true; + break; + } + + try (InputStream inputStream = z.getInputStream(entry)) { + if (IOUtils.toByteArray(inputStream).length == 0) + throw new IllegalStateException("Error processing file"); + } + } + } + catch (ZipEntryTooLargeException | ZipInvalidCompressionRatioException e) { + detected = true; + } + + return detected; + } + +} + diff --git a/src/test/java/com/imsweb/seerutils/SeerUtilsTest.java b/src/test/java/com/imsweb/seerutils/SeerUtilsTest.java index 07b7c9d..e878879 100644 --- a/src/test/java/com/imsweb/seerutils/SeerUtilsTest.java +++ b/src/test/java/com/imsweb/seerutils/SeerUtilsTest.java @@ -260,8 +260,27 @@ public void testCopyDirectory() throws IOException { Assert.assertEquals("TEST3", SeerUtils.readFile(new File(newSubDir2, "test3.txt"))); } + @Test + public void testUnzipFile() throws IOException { + File dir = new File(getTestingDirectory(), "test-zip"); + if (dir.exists()) + FileUtils.deleteDirectory(dir); + Assert.assertFalse(dir.exists()); + Assert.assertTrue(dir.mkdir()); + File sourceFile = new File(dir, "test-zip.txt"); + SeerUtils.writeFile("TEST", sourceFile); + File targetFile = new File(dir, "test-zip.zip"); + SeerUtils.zipFile(sourceFile, targetFile); + Assert.assertTrue(targetFile.exists()); + + sourceFile = targetFile; + File targetDir = new File(dir, "test-zip-2"); + SeerUtils.unzipFile(sourceFile, targetDir); + Assert.assertTrue(targetDir.exists()); + } + private File getTestingDirectory() { - File workingDir = new File(System.getProperty("user.dir").replace(".idea\\modules\\", "")); + File workingDir = new File(System.getProperty("user.dir")); if (!workingDir.exists()) throw new RuntimeException("Unable to find " + workingDir.getPath()); File file = new File(workingDir, "build/test-data");