Skip to content

Commit

Permalink
Merge pull request #95 from PeterAlfredLee/COMPRESS-505
Browse files Browse the repository at this point in the history
COMPRESS-505 : bug fix for random access of 7z
  • Loading branch information
PeterAlfredLee committed Mar 24, 2020
2 parents 5401f24 + ccf4f3d commit 9ea37ac
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ public SevenZArchiveEntry getNextEntry() throws IOException {
if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) {
entry.setName(getDefaultName());
}
buildDecodingStream(currentEntryIndex);
buildDecodingStream(currentEntryIndex, false);
uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0;
return entry;
}
Expand Down Expand Up @@ -1148,7 +1148,19 @@ private void calculateStreamMap(final Archive archive) throws IOException {
archive.streamMap = streamMap;
}

private void buildDecodingStream(int entryIndex) throws IOException {
/**
* Build the decoding stream for the entry to be read.
* This method may be called from a random access(getInputStream) or
* sequential access(getNextEntry).
* If this method is called from a random access, some entries may
* need to be skipped(we put them to the deferredBlockStreams and
* skip them when actually needed to improve the performance)
*
* @param entryIndex the index of the entry to be read
* @param isRandomAccess is this called in a random access
* @throws IOException if there are exceptions when reading the file
*/
private void buildDecodingStream(int entryIndex, boolean isRandomAccess) throws IOException {
if (archive.streamMap == null) {
throw new IOException("Archive doesn't contain stream information to read entries");
}
Expand All @@ -1161,10 +1173,6 @@ private void buildDecodingStream(int entryIndex) throws IOException {
}
final SevenZArchiveEntry file = archive.files[entryIndex];
boolean isInSameFolder = false;
final Folder folder = archive.folders[folderIndex];
final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex];
final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos +
archive.streamMap.packStreamOffsets[firstPackStreamIndex];
if (currentFolderIndex == folderIndex) {
// (COMPRESS-320).
// The current entry is within the same (potentially opened) folder. The
Expand All @@ -1177,54 +1185,27 @@ private void buildDecodingStream(int entryIndex) throws IOException {
// if this is called in a random access, then the content methods of previous entry may be null
// the content methods should be set to methods of the first entry as it must not be null,
// and the content methods would only be set if the content methods was not set
if(currentEntryIndex != entryIndex && file.getContentMethods() == null) {
if(isRandomAccess && file.getContentMethods() == null) {
int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex];
SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex];
file.setContentMethods(folderFirstFile.getContentMethods());
}
isInSameFolder = true;
} else {
// We're opening a new folder. Discard any queued streams/ folder stream.
currentFolderIndex = folderIndex;
deferredBlockStreams.clear();
if (currentFolderInputStream != null) {
currentFolderInputStream.close();
currentFolderInputStream = null;
}

currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file);
// We're opening a new folder. Discard any queued streams/ folder stream.
reopenFolderInputStream(folderIndex, file);
}

// if this mothod is called in a random access, then some entries need to be skipped,
// if the entry of entryIndex is in the same folder of the currentFolderIndex,
// then the entries between (currentEntryIndex + 1) and (entryIndex - 1) need to be skipped,
// otherwise it's a new folder, and the entries between firstFileInFolderIndex and (entryIndex - 1) need to be skipped
if(currentEntryIndex != entryIndex) {
int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[folderIndex];
if(isInSameFolder) {
if(currentEntryIndex < entryIndex) {
// the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped
filesToSkipStartIndex = currentEntryIndex + 1;
} else {
// the entry is in the same folder of current entry, but it has already been read before, we need to reset
// the position of the currentFolderInputStream to the beginning of folder, and then skip the files
// from the start entry of the folder again
deferredBlockStreams.clear();
channel.position(folderOffset);
}
}

for(int i = filesToSkipStartIndex;i < entryIndex;i++) {
SevenZArchiveEntry fileToSkip = archive.files[i];
InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize());
if (fileToSkip.getHasCrc()) {
fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue());
}
deferredBlockStreams.add(fileStreamToSkip);
boolean hasEntriesBeenSkipped = skipEntriesWhenNeeded(entryIndex, isRandomAccess, isInSameFolder, folderIndex);

// set the content methods as well, it equals to file.getContentMethods() because they are in same folder
fileToSkip.setContentMethods(file.getContentMethods());
}
if (isRandomAccess && currentEntryIndex == entryIndex && !hasEntriesBeenSkipped) {
// we don't need to add another entry to the deferredBlockStreams when :
// 1. If this method is called in a random access and the entry index
// to be read equals to the current entry index, the input stream
// has already been put in the deferredBlockStreams
// 2. If this entry has not been read(which means no entries are skipped)
return;
}

InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize());
Expand All @@ -1235,6 +1216,113 @@ private void buildDecodingStream(int entryIndex) throws IOException {
deferredBlockStreams.add(fileStream);
}

/**
* Discard any queued streams/ folder stream, and reopen the current folder input stream.
*
* @param folderIndex the index of the folder to reopen
* @param file the 7z entry to read
* @throws IOException if exceptions occur when reading the 7z file
*/
private void reopenFolderInputStream(int folderIndex, SevenZArchiveEntry file) throws IOException {
deferredBlockStreams.clear();
if (currentFolderInputStream != null) {
currentFolderInputStream.close();
currentFolderInputStream = null;
}
final Folder folder = archive.folders[folderIndex];
final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex];
final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos +
archive.streamMap.packStreamOffsets[firstPackStreamIndex];

currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file);
}

/**
* Skip all the entries if needed.
* Entries need to be skipped when:
* <p>
* 1. it's a random access
* 2. one of these 2 condition is meet :
* <p>
* 2.1 currentEntryIndex != entryIndex : this means there are some entries
* to be skipped(currentEntryIndex < entryIndex) or the entry has already
* been read(currentEntryIndex > entryIndex)
* <p>
* 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead:
* if the entry to be read is the current entry, but some data of it has
* been read before, then we need to reopen the stream of the folder and
* skip all the entries before the current entries
*
* @param entryIndex the entry to be read
* @param isRandomAccess is this a random access
* @param isInSameFolder are the entry to be read and the current entry in the same folder
* @param folderIndex the index of the folder which contains the entry
* @return true if there are entries actually skipped
* @throws IOException there are exceptions when skipping entries
* @since 1.21
*/
private boolean skipEntriesWhenNeeded(int entryIndex, boolean isRandomAccess,
boolean isInSameFolder, int folderIndex) throws IOException {
// entries will only need to be skipped if it's a random access
if (!isRandomAccess) {
return false;
}

final SevenZArchiveEntry file = archive.files[entryIndex];
boolean isNeedToSkipEntries;
boolean hasCurrentEntryBeenRead = false;
if (currentEntryIndex != entryIndex) {
// this means there are some entries to be skipped(currentEntryIndex < entryIndex)
// or the entry has already been read(currentEntryIndex > entryIndex)
isNeedToSkipEntries = true;
} else {
if (deferredBlockStreams.size() > 0) {
CRC32VerifyingInputStream currentEntryInputStream = (CRC32VerifyingInputStream) deferredBlockStreams.get(deferredBlockStreams.size() - 1);
hasCurrentEntryBeenRead = currentEntryInputStream.getBytesRemaining() != archive.files[currentEntryIndex].getSize();
}

// if the entry to be read is the current entry, but some data of it has
// been read before, then we need to reopen the stream of the folder and
// skip all the entries before the current entries
isNeedToSkipEntries = hasCurrentEntryBeenRead;
}

if (!isNeedToSkipEntries) {
return false;
}

// 1. if currentEntryIndex < entryIndex :
// this means there are some entries to be skipped(currentEntryIndex < entryIndex)
// 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) :
// this means the entry has already been read before, and we need to reopen the
// stream of the folder and skip all the entries before the current entries
int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex];
if (isInSameFolder) {
if (currentEntryIndex < entryIndex) {
// the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped
filesToSkipStartIndex = currentEntryIndex + 1;
} else {
// the entry is in the same folder of current entry, but it has already been read before, we need to reset
// the position of the currentFolderInputStream to the beginning of folder, and then skip the files
// from the start entry of the folder again
reopenFolderInputStream(folderIndex, file);
}
}

for (int i = filesToSkipStartIndex; i < entryIndex; i++) {
SevenZArchiveEntry fileToSkip = archive.files[i];
InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize());
if (fileToSkip.getHasCrc()) {
fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue());
}
deferredBlockStreams.add(fileStreamToSkip);

// set the content methods as well, it equals to file.getContentMethods() because they are in same folder
fileToSkip.setContentMethods(file.getContentMethods());
}
return isNeedToSkipEntries;
}

private InputStream buildDecoderStack(final Folder folder, final long folderOffset,
final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException {
channel.position(folderOffset);
Expand Down Expand Up @@ -1348,7 +1436,7 @@ public InputStream getInputStream(SevenZArchiveEntry entry) throws IOException {
throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName);
}

buildDecodingStream(entryIndex);
buildDecodingStream(entryIndex, true);
currentEntryIndex = entryIndex;
currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex];
return getCurrentStream();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,8 @@ public long skip(final long n) throws IOException {
public void close() throws IOException {
in.close();
}

public long getBytesRemaining() {
return bytesRemaining;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -531,12 +531,20 @@ public void testRandomAccessWhenJumpingBackwards() throws Exception {
}
}

sevenZFile.getNextEntry();
sevenZFile.getNextEntry();
// skip all the entries and jump backwards
// read the next entry and jump back using random access
SevenZArchiveEntry entryAfterTestTxtEntry = sevenZFile.getNextEntry();
byte[] entryAfterTestTxtEntryContents = new byte[(int) entryAfterTestTxtEntry.getSize()];
int off = 0;
while (off < entryAfterTestTxtEntryContents.length) {
final int bytesRead = sevenZFile.read(entryAfterTestTxtEntryContents, off, entryAfterTestTxtEntryContents.length - off);
assert (bytesRead >= 0);
off += bytesRead;
}

// jump backwards
byte[] contents = new byte[(int) testTxtEntry.getSize()];
try (InputStream inputStream = sevenZFile.getInputStream(testTxtEntry)) {
int off = 0;
off = 0;
while (off < contents.length) {
final int bytesRead = inputStream.read(contents, off, contents.length - off);
assert (bytesRead >= 0);
Expand All @@ -545,6 +553,112 @@ public void testRandomAccessWhenJumpingBackwards() throws Exception {
assertEquals(SevenZMethod.LZMA2, testTxtEntry.getContentMethods().iterator().next().getMethod());
assertEquals(testTxtContents, new String(contents, "UTF-8"));
}

// then read the next entry using getNextEntry
SevenZArchiveEntry nextTestTxtEntry = sevenZFile.getNextEntry();
byte[] nextTestContents = new byte[(int) nextTestTxtEntry.getSize()];
off = 0;
while (off < nextTestContents.length) {
final int bytesRead = sevenZFile.read(nextTestContents, off, nextTestContents.length - off);
assert (bytesRead >= 0);
off += bytesRead;
}

assertEquals(nextTestTxtEntry.getName(), entryAfterTestTxtEntry.getName());
assertEquals(nextTestTxtEntry.getSize(), entryAfterTestTxtEntry.getSize());
assertArrayEquals(nextTestContents, entryAfterTestTxtEntryContents);
}
}

@Test
public void testRandomAccessWhenJumpingForwards() throws Exception {
try (SevenZFile sevenZFile = new SevenZFile(getFile("COMPRESS-256.7z"))) {
final String testTxtContents = "111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011";

SevenZArchiveEntry testTxtEntry = null;
Iterable<SevenZArchiveEntry> entries = sevenZFile.getEntries();
Iterator<SevenZArchiveEntry> iter = entries.iterator();
while(iter.hasNext()) {
testTxtEntry = iter.next();
if (testTxtEntry.getName().equals("commons-compress-1.7-src/src/test/resources/test.txt")) {
break;
}
}
SevenZArchiveEntry firstEntry = sevenZFile.getNextEntry();
// only read some of the data of the first entry
byte[] contents = new byte[(int) firstEntry.getSize()/2];
sevenZFile.read(contents);

// and the third entry
sevenZFile.getNextEntry();
SevenZArchiveEntry thirdEntry = sevenZFile.getNextEntry();
contents = new byte[(int) thirdEntry.getSize()/2];
sevenZFile.read(contents);

// and then read a file after the first entry using random access
contents = new byte[(int) testTxtEntry.getSize()];
int numberOfReads = 10;
while (numberOfReads-- > 0) {
try (InputStream inputStream = sevenZFile.getInputStream(testTxtEntry)) {
int off = 0;
while (off < contents.length) {
final int bytesRead = inputStream.read(contents, off, contents.length - off);
assert (bytesRead >= 0);
off += bytesRead;
}
assertEquals(SevenZMethod.LZMA2, testTxtEntry.getContentMethods().iterator().next().getMethod());
assertEquals(testTxtContents, new String(contents, "UTF-8"));
}
}
}
}

@Test
public void testRandomAccessMultipleReadSameFile() throws Exception {
try (SevenZFile sevenZFile = new SevenZFile(getFile("COMPRESS-256.7z"))) {
final String testTxtContents = "111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011\n" +
"111111111111111111111111111000101011";

SevenZArchiveEntry entry;
SevenZArchiveEntry testTxtEntry = null;
while ((entry = sevenZFile.getNextEntry()) != null) {
if (entry.getName().equals("commons-compress-1.7-src/src/test/resources/test.txt")) {
testTxtEntry = entry;
break;
}
}

byte[] contents = new byte[(int) testTxtEntry.getSize()];
int numberOfReads = 10;
while (numberOfReads-- > 0) {
try (InputStream inputStream = sevenZFile.getInputStream(testTxtEntry)) {
int off = 0;
while (off < contents.length) {
final int bytesRead = inputStream.read(contents, off, contents.length - off);
assert (bytesRead >= 0);
off += bytesRead;
}
assertEquals(SevenZMethod.LZMA2, testTxtEntry.getContentMethods().iterator().next().getMethod());
assertEquals(testTxtContents, new String(contents, "UTF-8"));
}
}
}
}

Expand Down

0 comments on commit 9ea37ac

Please sign in to comment.