diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java index 713211a631..6d6d085a23 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -405,7 +405,7 @@ public SevenZArchiveEntry getNextEntry() throws IOException { if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { entry.setName(getDefaultName()); } - buildDecodingStream(currentEntryIndex); + buildDecodingStream(currentEntryIndex, false); uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; return entry; } @@ -1148,7 +1148,19 @@ private void calculateStreamMap(final Archive archive) throws IOException { archive.streamMap = streamMap; } - private void buildDecodingStream(int entryIndex) throws IOException { + /** + * Build the decoding stream for the entry to be read. + * This method may be called from a random access(getInputStream) or + * sequential access(getNextEntry). + * If this method is called from a random access, some entries may + * need to be skipped(we put them to the deferredBlockStreams and + * skip them when actually needed to improve the performance) + * + * @param entryIndex the index of the entry to be read + * @param isRandomAccess is this called in a random access + * @throws IOException if there are exceptions when reading the file + */ + private void buildDecodingStream(int entryIndex, boolean isRandomAccess) throws IOException { if (archive.streamMap == null) { throw new IOException("Archive doesn't contain stream information to read entries"); } @@ -1161,10 +1173,6 @@ private void buildDecodingStream(int entryIndex) throws IOException { } final SevenZArchiveEntry file = archive.files[entryIndex]; boolean isInSameFolder = false; - final Folder folder = archive.folders[folderIndex]; - final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; - final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + - archive.streamMap.packStreamOffsets[firstPackStreamIndex]; if (currentFolderIndex == folderIndex) { // (COMPRESS-320). // The current entry is within the same (potentially opened) folder. The @@ -1177,54 +1185,27 @@ private void buildDecodingStream(int entryIndex) throws IOException { // if this is called in a random access, then the content methods of previous entry may be null // the content methods should be set to methods of the first entry as it must not be null, // and the content methods would only be set if the content methods was not set - if(currentEntryIndex != entryIndex && file.getContentMethods() == null) { + if(isRandomAccess && file.getContentMethods() == null) { int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; file.setContentMethods(folderFirstFile.getContentMethods()); } isInSameFolder = true; } else { - // We're opening a new folder. Discard any queued streams/ folder stream. currentFolderIndex = folderIndex; - deferredBlockStreams.clear(); - if (currentFolderInputStream != null) { - currentFolderInputStream.close(); - currentFolderInputStream = null; - } - - currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); + // We're opening a new folder. Discard any queued streams/ folder stream. + reopenFolderInputStream(folderIndex, file); } - // if this mothod is called in a random access, then some entries need to be skipped, - // if the entry of entryIndex is in the same folder of the currentFolderIndex, - // then the entries between (currentEntryIndex + 1) and (entryIndex - 1) need to be skipped, - // otherwise it's a new folder, and the entries between firstFileInFolderIndex and (entryIndex - 1) need to be skipped - if(currentEntryIndex != entryIndex) { - int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; - if(isInSameFolder) { - if(currentEntryIndex < entryIndex) { - // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped - filesToSkipStartIndex = currentEntryIndex + 1; - } else { - // the entry is in the same folder of current entry, but it has already been read before, we need to reset - // the position of the currentFolderInputStream to the beginning of folder, and then skip the files - // from the start entry of the folder again - deferredBlockStreams.clear(); - channel.position(folderOffset); - } - } - - for(int i = filesToSkipStartIndex;i < entryIndex;i++) { - SevenZArchiveEntry fileToSkip = archive.files[i]; - InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); - if (fileToSkip.getHasCrc()) { - fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); - } - deferredBlockStreams.add(fileStreamToSkip); + boolean hasEntriesBeenSkipped = skipEntriesWhenNeeded(entryIndex, isRandomAccess, isInSameFolder, folderIndex); - // set the content methods as well, it equals to file.getContentMethods() because they are in same folder - fileToSkip.setContentMethods(file.getContentMethods()); - } + if (isRandomAccess && currentEntryIndex == entryIndex && !hasEntriesBeenSkipped) { + // we don't need to add another entry to the deferredBlockStreams when : + // 1. If this method is called in a random access and the entry index + // to be read equals to the current entry index, the input stream + // has already been put in the deferredBlockStreams + // 2. If this entry has not been read(which means no entries are skipped) + return; } InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); @@ -1235,6 +1216,113 @@ private void buildDecodingStream(int entryIndex) throws IOException { deferredBlockStreams.add(fileStream); } + /** + * Discard any queued streams/ folder stream, and reopen the current folder input stream. + * + * @param folderIndex the index of the folder to reopen + * @param file the 7z entry to read + * @throws IOException if exceptions occur when reading the 7z file + */ + private void reopenFolderInputStream(int folderIndex, SevenZArchiveEntry file) throws IOException { + deferredBlockStreams.clear(); + if (currentFolderInputStream != null) { + currentFolderInputStream.close(); + currentFolderInputStream = null; + } + final Folder folder = archive.folders[folderIndex]; + final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; + final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; + + currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); + } + + /** + * Skip all the entries if needed. + * Entries need to be skipped when: + *

+ * 1. it's a random access + * 2. one of these 2 condition is meet : + *

+ * 2.1 currentEntryIndex != entryIndex : this means there are some entries + * to be skipped(currentEntryIndex < entryIndex) or the entry has already + * been read(currentEntryIndex > entryIndex) + *

+ * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: + * if the entry to be read is the current entry, but some data of it has + * been read before, then we need to reopen the stream of the folder and + * skip all the entries before the current entries + * + * @param entryIndex the entry to be read + * @param isRandomAccess is this a random access + * @param isInSameFolder are the entry to be read and the current entry in the same folder + * @param folderIndex the index of the folder which contains the entry + * @return true if there are entries actually skipped + * @throws IOException there are exceptions when skipping entries + * @since 1.21 + */ + private boolean skipEntriesWhenNeeded(int entryIndex, boolean isRandomAccess, + boolean isInSameFolder, int folderIndex) throws IOException { + // entries will only need to be skipped if it's a random access + if (!isRandomAccess) { + return false; + } + + final SevenZArchiveEntry file = archive.files[entryIndex]; + boolean isNeedToSkipEntries; + boolean hasCurrentEntryBeenRead = false; + if (currentEntryIndex != entryIndex) { + // this means there are some entries to be skipped(currentEntryIndex < entryIndex) + // or the entry has already been read(currentEntryIndex > entryIndex) + isNeedToSkipEntries = true; + } else { + if (deferredBlockStreams.size() > 0) { + CRC32VerifyingInputStream currentEntryInputStream = (CRC32VerifyingInputStream) deferredBlockStreams.get(deferredBlockStreams.size() - 1); + hasCurrentEntryBeenRead = currentEntryInputStream.getBytesRemaining() != archive.files[currentEntryIndex].getSize(); + } + + // if the entry to be read is the current entry, but some data of it has + // been read before, then we need to reopen the stream of the folder and + // skip all the entries before the current entries + isNeedToSkipEntries = hasCurrentEntryBeenRead; + } + + if (!isNeedToSkipEntries) { + return false; + } + + // 1. if currentEntryIndex < entryIndex : + // this means there are some entries to be skipped(currentEntryIndex < entryIndex) + // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : + // this means the entry has already been read before, and we need to reopen the + // stream of the folder and skip all the entries before the current entries + int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; + if (isInSameFolder) { + if (currentEntryIndex < entryIndex) { + // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped + filesToSkipStartIndex = currentEntryIndex + 1; + } else { + // the entry is in the same folder of current entry, but it has already been read before, we need to reset + // the position of the currentFolderInputStream to the beginning of folder, and then skip the files + // from the start entry of the folder again + reopenFolderInputStream(folderIndex, file); + } + } + + for (int i = filesToSkipStartIndex; i < entryIndex; i++) { + SevenZArchiveEntry fileToSkip = archive.files[i]; + InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); + if (fileToSkip.getHasCrc()) { + fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); + } + deferredBlockStreams.add(fileStreamToSkip); + + // set the content methods as well, it equals to file.getContentMethods() because they are in same folder + fileToSkip.setContentMethods(file.getContentMethods()); + } + return isNeedToSkipEntries; + } + private InputStream buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { channel.position(folderOffset); @@ -1348,7 +1436,7 @@ public InputStream getInputStream(SevenZArchiveEntry entry) throws IOException { throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); } - buildDecodingStream(entryIndex); + buildDecodingStream(entryIndex, true); currentEntryIndex = entryIndex; currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; return getCurrentStream(); diff --git a/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java b/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java index 726acaabe0..18873343d0 100644 --- a/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java +++ b/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java @@ -109,4 +109,8 @@ public long skip(final long n) throws IOException { public void close() throws IOException { in.close(); } + + public long getBytesRemaining() { + return bytesRemaining; + } } diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java index 3fb99ced23..c47904b9f1 100644 --- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java @@ -531,12 +531,20 @@ public void testRandomAccessWhenJumpingBackwards() throws Exception { } } - sevenZFile.getNextEntry(); - sevenZFile.getNextEntry(); - // skip all the entries and jump backwards + // read the next entry and jump back using random access + SevenZArchiveEntry entryAfterTestTxtEntry = sevenZFile.getNextEntry(); + byte[] entryAfterTestTxtEntryContents = new byte[(int) entryAfterTestTxtEntry.getSize()]; + int off = 0; + while (off < entryAfterTestTxtEntryContents.length) { + final int bytesRead = sevenZFile.read(entryAfterTestTxtEntryContents, off, entryAfterTestTxtEntryContents.length - off); + assert (bytesRead >= 0); + off += bytesRead; + } + + // jump backwards byte[] contents = new byte[(int) testTxtEntry.getSize()]; try (InputStream inputStream = sevenZFile.getInputStream(testTxtEntry)) { - int off = 0; + off = 0; while (off < contents.length) { final int bytesRead = inputStream.read(contents, off, contents.length - off); assert (bytesRead >= 0); @@ -545,6 +553,112 @@ public void testRandomAccessWhenJumpingBackwards() throws Exception { assertEquals(SevenZMethod.LZMA2, testTxtEntry.getContentMethods().iterator().next().getMethod()); assertEquals(testTxtContents, new String(contents, "UTF-8")); } + + // then read the next entry using getNextEntry + SevenZArchiveEntry nextTestTxtEntry = sevenZFile.getNextEntry(); + byte[] nextTestContents = new byte[(int) nextTestTxtEntry.getSize()]; + off = 0; + while (off < nextTestContents.length) { + final int bytesRead = sevenZFile.read(nextTestContents, off, nextTestContents.length - off); + assert (bytesRead >= 0); + off += bytesRead; + } + + assertEquals(nextTestTxtEntry.getName(), entryAfterTestTxtEntry.getName()); + assertEquals(nextTestTxtEntry.getSize(), entryAfterTestTxtEntry.getSize()); + assertArrayEquals(nextTestContents, entryAfterTestTxtEntryContents); + } + } + + @Test + public void testRandomAccessWhenJumpingForwards() throws Exception { + try (SevenZFile sevenZFile = new SevenZFile(getFile("COMPRESS-256.7z"))) { + final String testTxtContents = "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011"; + + SevenZArchiveEntry testTxtEntry = null; + Iterable entries = sevenZFile.getEntries(); + Iterator iter = entries.iterator(); + while(iter.hasNext()) { + testTxtEntry = iter.next(); + if (testTxtEntry.getName().equals("commons-compress-1.7-src/src/test/resources/test.txt")) { + break; + } + } + SevenZArchiveEntry firstEntry = sevenZFile.getNextEntry(); + // only read some of the data of the first entry + byte[] contents = new byte[(int) firstEntry.getSize()/2]; + sevenZFile.read(contents); + + // and the third entry + sevenZFile.getNextEntry(); + SevenZArchiveEntry thirdEntry = sevenZFile.getNextEntry(); + contents = new byte[(int) thirdEntry.getSize()/2]; + sevenZFile.read(contents); + + // and then read a file after the first entry using random access + contents = new byte[(int) testTxtEntry.getSize()]; + int numberOfReads = 10; + while (numberOfReads-- > 0) { + try (InputStream inputStream = sevenZFile.getInputStream(testTxtEntry)) { + int off = 0; + while (off < contents.length) { + final int bytesRead = inputStream.read(contents, off, contents.length - off); + assert (bytesRead >= 0); + off += bytesRead; + } + assertEquals(SevenZMethod.LZMA2, testTxtEntry.getContentMethods().iterator().next().getMethod()); + assertEquals(testTxtContents, new String(contents, "UTF-8")); + } + } + } + } + + @Test + public void testRandomAccessMultipleReadSameFile() throws Exception { + try (SevenZFile sevenZFile = new SevenZFile(getFile("COMPRESS-256.7z"))) { + final String testTxtContents = "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011\n" + + "111111111111111111111111111000101011"; + + SevenZArchiveEntry entry; + SevenZArchiveEntry testTxtEntry = null; + while ((entry = sevenZFile.getNextEntry()) != null) { + if (entry.getName().equals("commons-compress-1.7-src/src/test/resources/test.txt")) { + testTxtEntry = entry; + break; + } + } + + byte[] contents = new byte[(int) testTxtEntry.getSize()]; + int numberOfReads = 10; + while (numberOfReads-- > 0) { + try (InputStream inputStream = sevenZFile.getInputStream(testTxtEntry)) { + int off = 0; + while (off < contents.length) { + final int bytesRead = inputStream.read(contents, off, contents.length - off); + assert (bytesRead >= 0); + off += bytesRead; + } + assertEquals(SevenZMethod.LZMA2, testTxtEntry.getContentMethods().iterator().next().getMethod()); + assertEquals(testTxtContents, new String(contents, "UTF-8")); + } + } } }