From c0a05bf189144bdfbad40a6c686aa5da13084aad Mon Sep 17 00:00:00 2001 From: Jens Reimann Date: Mon, 9 Jul 2018 11:41:43 +0200 Subject: [PATCH 1/2] [COMPRESS-459] Fix reading of multibyte name entries This fixes COMPRESS-459 by using the name number of bytes from the field in the stream instead of relying on the assumption that each character is exactly one byte, which isn't true for UTF-8, UTF-16 or other multi-byte character encodings. --- .../archivers/cpio/CpioArchiveEntry.java | 17 ++++++++++++++++- .../archivers/cpio/CpioArchiveInputStream.java | 4 ++-- .../cpio/CpioArchiveInputStreamTest.java | 16 ++++++++++++++++ src/test/resources/COMPRESS-459.cpio | Bin 0 -> 512 bytes 4 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 src/test/resources/COMPRESS-459.cpio diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java index 28e58238cc..3ad7c87da8 100644 --- a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java @@ -469,10 +469,25 @@ public int getAlignmentBoundary() { * @return the number of bytes needed to pad the header (0,1,2,3) */ public int getHeaderPadCount(){ + long namesize = name != null ? name.length() : 0; + return getHeaderPadCount(namesize); + } + + /** + * Get the number of bytes needed to pad the header to the alignment boundary. + * + * @param namesize + * The length of the name in bytes, as read in the stream. + * Without the trailing zero byte. + * @return the number of bytes needed to pad the header (0,1,2,3) + * + * @since 1.18 + */ + public int getHeaderPadCount(long namesize){ if (this.alignmentBoundary == 0) { return 0; } int size = this.headerSize + 1; // Name has terminating null if (name != null) { - size += name.length(); + size += namesize; } final int remain = size % this.alignmentBoundary; if (remain > 0){ diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java index ad8e125a9b..b64d091045 100644 --- a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java @@ -393,7 +393,7 @@ private CpioArchiveEntry readNewEntry(final boolean hasCrc) + ArchiveUtils.sanitize(name) + " Occured at byte: " + getBytesRead()); } - skip(ret.getHeaderPadCount()); + skip(ret.getHeaderPadCount(namesize-1)); return ret; } @@ -449,7 +449,7 @@ private CpioArchiveEntry readOldBinaryEntry(final boolean swapHalfWord) + ArchiveUtils.sanitize(name) + "Occured at byte: " + getBytesRead()); } - skip(ret.getHeaderPadCount()); + skip(ret.getHeaderPadCount(namesize-1)); return ret; } diff --git a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java index f1744053cb..762d4648e2 100644 --- a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java @@ -65,4 +65,20 @@ public void testCpioUnarchiveCreatedByRedlineRpm() throws Exception { assertEquals(count, 1); } + + @Test + public void testCpioUnarchiveMultibyteCharName() throws Exception { + final CpioArchiveInputStream in = + new CpioArchiveInputStream(new FileInputStream(getFile("COMPRESS-459.cpio")), "UTF-8"); + CpioArchiveEntry entry= null; + + int count = 0; + while ((entry = (CpioArchiveEntry) in.getNextEntry()) != null) { + count++; + assertNotNull(entry); + } + in.close(); + + assertEquals(2, count); + } } diff --git a/src/test/resources/COMPRESS-459.cpio b/src/test/resources/COMPRESS-459.cpio new file mode 100644 index 0000000000000000000000000000000000000000..8ae1662a0be2e9c8ed26a71b7be372bb684b6916 GIT binary patch literal 512 zcmXpoH!wFaG%&C*wlH%tG5`V#Lnjj;W$5T?VvIzCqzp}+OpJ{zES*6d1aNbKvcdWh z;xLw>1&rnF?;8;08XT-^Vrr>>c-!G6hxh1}RFp6<)z8FjwNG!EVE=0=uLWOgJRS(bLB@NKsJ{= Date: Wed, 11 Jul 2018 09:20:44 +0200 Subject: [PATCH 2/2] [COMPRESS-459] Fix writing of multibyte name entries --- .../archivers/cpio/CpioArchiveEntry.java | 26 +++++++++++++++++-- .../cpio/CpioArchiveOutputStream.java | 5 ++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java index 3ad7c87da8..0266da1393 100644 --- a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java @@ -19,6 +19,7 @@ package org.apache.commons.compress.archivers.cpio; import java.io.File; +import java.nio.charset.Charset; import java.util.Date; import org.apache.commons.compress.archivers.ArchiveEntry; @@ -466,11 +467,32 @@ public int getAlignmentBoundary() { /** * Get the number of bytes needed to pad the header to the alignment boundary. * + * @deprecated This method doesn't properly work for multi-byte encodings. And + * creates corrupt archives. Use {@link #getHeaderPadCount(Charset)} + * or {@link #getHeaderPadCount(long)} in any case. * @return the number of bytes needed to pad the header (0,1,2,3) */ + @Deprecated public int getHeaderPadCount(){ - long namesize = name != null ? name.length() : 0; - return getHeaderPadCount(namesize); + return getHeaderPadCount(null); + } + + /** + * Get the number of bytes needed to pad the header to the alignment boundary. + * + * @param charset + * The character set used to encode the entry name in the stream. + * @return the number of bytes needed to pad the header (0,1,2,3) + * @since 1.18 + */ + public int getHeaderPadCount(Charset charset){ + if (name==null) { + return 0; + } + if (charset==null) { + return getHeaderPadCount(name.length()); + } + return getHeaderPadCount(name.getBytes(charset).length); } /** diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStream.java index c0e04c475b..07fd53a08b 100644 --- a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStream.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; +import java.nio.charset.Charset; import java.util.HashMap; import org.apache.commons.compress.archivers.ArchiveEntry; @@ -302,7 +303,7 @@ private void writeNewEntry(final CpioArchiveEntry entry) throws IOException { writeAsciiLong(entry.getName().length() + 1L, 8, 16); writeAsciiLong(entry.getChksum(), 8, 16); writeCString(entry.getName()); - pad(entry.getHeaderPadCount()); + pad(entry.getHeaderPadCount(Charset.forName(encoding))); } private void writeOldAsciiEntry(final CpioArchiveEntry entry) @@ -363,7 +364,7 @@ private void writeOldBinaryEntry(final CpioArchiveEntry entry, writeBinaryLong(entry.getName().length() + 1L, 2, swapHalfWord); writeBinaryLong(entry.getSize(), 4, swapHalfWord); writeCString(entry.getName()); - pad(entry.getHeaderPadCount()); + pad(entry.getHeaderPadCount(Charset.forName(encoding))); } /*(non-Javadoc)