Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bug-57342] Excel compatible Zip64 implementation #154

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
124 changes: 124 additions & 0 deletions src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java
@@ -0,0 +1,124 @@
package org.apache.poi.xssf.streaming;
rzymek marked this conversation as resolved.
Show resolved Hide resolved


import org.apache.poi.xssf.streaming.Zip64Impl.Entry;

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.*;

/**
* ZIP64 OutputStream implementation compatible with MS Excel.
* Drop in replacement for `java.util.ZipOutputStream`.
*
* For more information see https://github.com/rzymek/opczip
*
* @author Krzysztof Rzymkowski
*/
class OpcOutputStream extends DeflaterOutputStream {

private final Zip64Impl spec;
private final List<Entry> entries = new ArrayList<>();
private final CRC32 crc = new CRC32();
private Entry current;
private int written = 0;
private boolean finished = false;

/**
* Creates ZIP64 output stream
*
* @param out target stream to write compressed data to
*/
public OpcOutputStream(OutputStream out) {
super(out, new Deflater(Deflater.DEFAULT_COMPRESSION, true));
this.spec = new Zip64Impl(out);
}

/**
* @see Deflater#setLevel(int)
*/
public void setLevel(int level) {
super.def.setLevel(level);
}

/**
* @see ZipOutputStream#putNextEntry(ZipEntry)
*/
public void putNextEntry(String name) throws IOException {
if (current != null) {
closeEntry();
}
current = new Entry(name);
current.offset = written;
written += spec.writeLFH(current);
entries.add(current);
}

/**
* @see ZipOutputStream#closeEntry()
*/
public void closeEntry() throws IOException {
if (current == null) {
throw new IllegalStateException("not current zip current");
}
def.finish();
while (!def.finished()) {
deflate();
}

current.size = def.getBytesRead();
current.compressedSize = (int) def.getBytesWritten();
current.crc = crc.getValue();

written += current.compressedSize;
written += spec.writeDAT(current);
current = null;
def.reset();
crc.reset();
}


/**
* @see ZipOutputStream#finish()
*/
@Override
public void finish() throws IOException {
if(finished){
return;
}
if(current != null) {
closeEntry();
}
int offset = written;
for (Entry entry : entries) {
written += spec.writeCEN(entry);
}
written += spec.writeEND(entries.size(), offset, written - offset);
finished = true;
}

/**
* @see ZipOutputStream#write(byte[], int, int)
*/
@Override
public synchronized void write(byte[] b, int off, int len) throws IOException {
if (off < 0 || len < 0 || off > b.length - len) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return;
}
super.write(b, off, len);
crc.update(b, off, len);
}

/**
* @see ZipOutputStream#close()
*/
@Override
public void close() throws IOException {
finish();
out.close();
}
}
@@ -0,0 +1,63 @@
package org.apache.poi.xssf.streaming;

import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;

import java.io.IOException;
import java.io.OutputStream;

class OpcZipArchiveOutputStream extends ZipArchiveOutputStream {
private final OpcOutputStream out;

OpcZipArchiveOutputStream(OutputStream out) {
super(out);
this.out = new OpcOutputStream(out);
}

@Override
public void setLevel(int level) {
out.setLevel(level);
}


@Override
public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException {
out.putNextEntry(archiveEntry.getName());
}

@Override
public void closeArchiveEntry() throws IOException {
out.closeEntry();
}


@Override
public void finish() throws IOException {
out.finish();
}

@Override
public void write(byte[] b, int off, int len) throws IOException {
out.write(b, off, len);
}

@Override
public void close() throws IOException {
out.close();
}

@Override
public void write(int b) throws IOException {
out.write(b);
}

@Override
public void flush() throws IOException {
out.flush();
}

@Override
public void write(byte[] b) throws IOException {
out.write(b);
}
}
14 changes: 12 additions & 2 deletions src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
Expand Up @@ -32,6 +32,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import java.util.Map;
import java.util.NoSuchElementException;

import org.apache.commons.compress.archivers.ArchiveOutputStream;
import org.apache.commons.compress.archivers.zip.Zip64Mode;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
Expand Down Expand Up @@ -385,8 +386,7 @@ private XSSFSheet getSheetFromZipEntryName(String sheetRef)
}

protected void injectData(ZipEntrySource zipEntrySource, OutputStream out) throws IOException {
ZipArchiveOutputStream zos = new ZipArchiveOutputStream(out);
zos.setUseZip64(zip64Mode);
ArchiveOutputStream zos = createArchiveOutputStream(out);
try {
Enumeration<? extends ZipArchiveEntry> en = zipEntrySource.getEntries();
while (en.hasMoreElements()) {
Expand Down Expand Up @@ -421,6 +421,16 @@ protected void injectData(ZipEntrySource zipEntrySource, OutputStream out) throw
}
}

protected ZipArchiveOutputStream createArchiveOutputStream(OutputStream out) {
if (Zip64Mode.Always.equals(zip64Mode)) {
return new OpcZipArchiveOutputStream(out);
} else {
ZipArchiveOutputStream zos = new ZipArchiveOutputStream(out);
zos.setUseZip64(zip64Mode);
return zos;
}
}

private static void copyStreamAndInjectWorksheet(InputStream in, OutputStream out, InputStream worksheetData) throws IOException {
InputStreamReader inReader = new InputStreamReader(in, StandardCharsets.UTF_8);
OutputStreamWriter outWriter = new OutputStreamWriter(out, StandardCharsets.UTF_8);
Expand Down
167 changes: 167 additions & 0 deletions src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java
@@ -0,0 +1,167 @@
package org.apache.poi.xssf.streaming;

import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.ZipEntry;

import static java.nio.charset.StandardCharsets.US_ASCII;

/**
* Excel compatible Zip64 implementation.
* For more information see https://github.com/rzymek/opczip
*
* @author Krzysztof Rzymkowski
*/
class Zip64Impl {
private static final long PK0102 = 0x02014b50L;
private static final long PK0304 = 0x04034b50L;
private static final long PK0506 = 0x06054b50L;
private static final long PK0708 = 0x08074b50L;

private static final int VERSION_20 = 20;
private static final int VERSION_45 = 45;
private static final int DATA_DESCRIPTOR_USED = 0x08;
private static final int ZIP64_FIELD = 0x0001;
private static final long MAX32 = 0xffffffffL;

private final OutputStream out;
private int written = 0;

static class Entry {
final String filename;
long crc;
long size;
int compressedSize;
int offset;

Entry(String filename) {
this.filename = filename;
}
}

Zip64Impl(OutputStream out) {
this.out = out;
}

/**
* Write Local File Header
*/
int writeLFH(Entry entry) throws IOException {
written = 0;
writeInt(PK0304); // "PK\003\004"
writeShort(VERSION_45); // version required: 4.5
writeShort(DATA_DESCRIPTOR_USED); // flags: 8 = data descriptor used
writeShort(ZipEntry.DEFLATED); // compression method: 8 = deflate
writeInt(0); // file modification time & date
writeInt(entry.crc); // CRC-32
writeInt(0); // compressed file size
writeInt(0); // uncompressed file size
writeShort(entry.filename.length()); // filename length
writeShort(0); // extra flags size
byte[] filenameBytes = entry.filename.getBytes(US_ASCII);
out.write(filenameBytes); // filename characters
return written + filenameBytes.length;
}

/**
* Write Data Descriptor
*/
int writeDAT(Entry entry) throws IOException {
written = 0;
writeInt(PK0708); // data descriptor signature "PK\007\008"
writeInt(entry.crc); // crc-32
writeLong(entry.compressedSize); // compressed size (zip64)
writeLong(entry.size); // uncompressed size (zip64)
return written;
}

/**
* Write Central directory file header
*/
int writeCEN(Entry entry) throws IOException {
written = 0;
boolean useZip64 = entry.size > MAX32;
writeInt(PK0102); // "PK\001\002"
writeShort(VERSION_45); // version made by: 4.5
writeShort(useZip64 ? VERSION_45 : VERSION_20);// version required: 4.5
writeShort(DATA_DESCRIPTOR_USED); // flags: 8 = data descriptor used
writeShort(ZipEntry.DEFLATED); // compression method: 8 = deflate
writeInt(0); // file modification time & date
writeInt(entry.crc); // CRC-32
writeInt(entry.compressedSize); // compressed size
writeInt(useZip64 ? MAX32 : entry.size); // uncompressed size
writeShort(entry.filename.length()); // filename length
writeShort(useZip64
? (2 + 2 + 8) /* short + short + long*/
: 0); // extra field len
writeShort(0); // comment length
writeShort(0); // disk number where file starts
writeShort(0); // internal file attributes (unused)
writeInt(0); // external file attributes (unused)
writeInt(entry.offset); // LFH offset
byte[] filenameBytes = entry.filename.getBytes(US_ASCII);
out.write(filenameBytes); // filename characters
if (useZip64) {
// Extra field:
writeShort(ZIP64_FIELD); // ZIP64 field signature
writeShort(8); // size of extra field (below)
writeLong(entry.size); // uncompressed size
}
return written + filenameBytes.length;
}

/**
* Write End of central directory record (EOCD)
*/
int writeEND(int entriesCount, int offset, int length) throws IOException {
written = 0;
writeInt(PK0506); // "PK\005\006"
writeShort(0); // number of this disk
writeShort(0); // central directory start disk
writeShort(entriesCount); // number of directory entries on disk
writeShort(entriesCount); // total number of directory entries
writeInt(length); // length of central directory
writeInt(offset); // offset of central directory
writeShort(0); // comment length
return written;
}

/**
* Writes a 16-bit short to the output stream in little-endian byte order.
*/
private void writeShort(int v) throws IOException {
OutputStream out = this.out;
out.write((v >>> 0) & 0xff);
out.write((v >>> 8) & 0xff);
written += 2;
}

/**
* Writes a 32-bit int to the output stream in little-endian byte order.
*/
private void writeInt(long v) throws IOException {
OutputStream out = this.out;
out.write((int) ((v >>> 0) & 0xff));
out.write((int) ((v >>> 8) & 0xff));
out.write((int) ((v >>> 16) & 0xff));
out.write((int) ((v >>> 24) & 0xff));
written += 4;
}

/**
* Writes a 64-bit int to the output stream in little-endian byte order.
*/
private void writeLong(long v) throws IOException {
OutputStream out = this.out;
out.write((int) ((v >>> 0) & 0xff));
out.write((int) ((v >>> 8) & 0xff));
out.write((int) ((v >>> 16) & 0xff));
out.write((int) ((v >>> 24) & 0xff));
out.write((int) ((v >>> 32) & 0xff));
out.write((int) ((v >>> 40) & 0xff));
out.write((int) ((v >>> 48) & 0xff));
out.write((int) ((v >>> 56) & 0xff));
written += 8;
}

}