From 8cb78d307ae35ca52855873a049e4eaa383802b3 Mon Sep 17 00:00:00 2001 From: Krzysztof Rzymkowski Date: Wed, 12 Jun 2019 20:53:08 +0200 Subject: [PATCH 1/2] [bug-57342] Excel compatible Zip64 implementation For more information see https://github.com/rzymek/opczip --- .../poi/xssf/streaming/OpcOutputStream.java | 124 +++++++++++++ .../streaming/OpcZipArchiveOutputStream.java | 63 +++++++ .../poi/xssf/streaming/SXSSFWorkbook.java | 14 +- .../apache/poi/xssf/streaming/Zip64Impl.java | 167 ++++++++++++++++++ 4 files changed, 366 insertions(+), 2 deletions(-) create mode 100644 src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java create mode 100644 src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java create mode 100644 src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java new file mode 100644 index 00000000000..50745ef4a62 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java @@ -0,0 +1,124 @@ +package org.apache.poi.xssf.streaming; + + +import org.apache.poi.xssf.streaming.Zip64Impl.Entry; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.*; + +/** + * ZIP64 OutputStream implementation compatible with MS Excel. + * Drop in replacement for `java.util.ZipOutputStream`. + * + * For more information see https://github.com/rzymek/opczip + * + * @author Krzysztof Rzymkowski + */ +class OpcOutputStream extends DeflaterOutputStream { + + private final Zip64Impl spec; + private final List entries = new ArrayList<>(); + private final CRC32 crc = new CRC32(); + private Entry current; + private int written = 0; + private boolean finished = false; + + /** + * Creates ZIP64 output stream + * + * @param out target stream to write compressed data to + */ + public OpcOutputStream(OutputStream out) { + super(out, new Deflater(Deflater.DEFAULT_COMPRESSION, true)); + this.spec = new Zip64Impl(out); + } + + /** + * @see Deflater#setLevel(int) + */ + public void setLevel(int level) { + super.def.setLevel(level); + } + + /** + * @see ZipOutputStream#putNextEntry(ZipEntry) + */ + public void putNextEntry(String name) throws IOException { + if (current != null) { + closeEntry(); + } + current = new Entry(name); + current.offset = written; + written += spec.writeLFH(current); + entries.add(current); + } + + /** + * @see ZipOutputStream#closeEntry() + */ + public void closeEntry() throws IOException { + if (current == null) { + throw new IllegalStateException("not current zip current"); + } + def.finish(); + while (!def.finished()) { + deflate(); + } + + current.size = def.getBytesRead(); + current.compressedSize = (int) def.getBytesWritten(); + current.crc = crc.getValue(); + + written += current.compressedSize; + written += spec.writeDAT(current); + current = null; + def.reset(); + crc.reset(); + } + + + /** + * @see ZipOutputStream#finish() + */ + @Override + public void finish() throws IOException { + if(finished){ + return; + } + if(current != null) { + closeEntry(); + } + int offset = written; + for (Entry entry : entries) { + written += spec.writeCEN(entry); + } + written += spec.writeEND(entries.size(), offset, written - offset); + finished = true; + } + + /** + * @see ZipOutputStream#write(byte[], int, int) + */ + @Override + public synchronized void write(byte[] b, int off, int len) throws IOException { + if (off < 0 || len < 0 || off > b.length - len) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + super.write(b, off, len); + crc.update(b, off, len); + } + + /** + * @see ZipOutputStream#close() + */ + @Override + public void close() throws IOException { + finish(); + out.close(); + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java new file mode 100644 index 00000000000..6a8ea497f0d --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java @@ -0,0 +1,63 @@ +package org.apache.poi.xssf.streaming; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; + +import java.io.IOException; +import java.io.OutputStream; + +class OpcZipArchiveOutputStream extends ZipArchiveOutputStream { + private final OpcOutputStream out; + + OpcZipArchiveOutputStream(OutputStream out) { + super(out); + this.out = new OpcOutputStream(out); + } + + @Override + public void setLevel(int level) { + out.setLevel(level); + } + + + @Override + public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException { + out.putNextEntry(archiveEntry.getName()); + } + + @Override + public void closeArchiveEntry() throws IOException { + out.closeEntry(); + } + + + @Override + public void finish() throws IOException { + out.finish(); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + } + + @Override + public void close() throws IOException { + out.close(); + } + + @Override + public void write(int b) throws IOException { + out.write(b); + } + + @Override + public void flush() throws IOException { + out.flush(); + } + + @Override + public void write(byte[] b) throws IOException { + out.write(b); + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java b/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java index 57d1f4f5904..5cf22eead5b 100644 --- a/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java @@ -32,6 +32,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more import java.util.Map; import java.util.NoSuchElementException; +import org.apache.commons.compress.archivers.ArchiveOutputStream; import org.apache.commons.compress.archivers.zip.Zip64Mode; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; @@ -385,8 +386,7 @@ private XSSFSheet getSheetFromZipEntryName(String sheetRef) } protected void injectData(ZipEntrySource zipEntrySource, OutputStream out) throws IOException { - ZipArchiveOutputStream zos = new ZipArchiveOutputStream(out); - zos.setUseZip64(zip64Mode); + ArchiveOutputStream zos = createArchiveOutputStream(out); try { Enumeration en = zipEntrySource.getEntries(); while (en.hasMoreElements()) { @@ -421,6 +421,16 @@ protected void injectData(ZipEntrySource zipEntrySource, OutputStream out) throw } } + protected ZipArchiveOutputStream createArchiveOutputStream(OutputStream out) { + if (Zip64Mode.Always.equals(zip64Mode)) { + return new OpcZipArchiveOutputStream(out); + } else { + ZipArchiveOutputStream zos = new ZipArchiveOutputStream(out); + zos.setUseZip64(zip64Mode); + return zos; + } + } + private static void copyStreamAndInjectWorksheet(InputStream in, OutputStream out, InputStream worksheetData) throws IOException { InputStreamReader inReader = new InputStreamReader(in, StandardCharsets.UTF_8); OutputStreamWriter outWriter = new OutputStreamWriter(out, StandardCharsets.UTF_8); diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java b/src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java new file mode 100644 index 00000000000..cedbdb0ee49 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java @@ -0,0 +1,167 @@ +package org.apache.poi.xssf.streaming; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.ZipEntry; + +import static java.nio.charset.StandardCharsets.US_ASCII; + +/** + * Excel compatible Zip64 implementation. + * For more information see https://github.com/rzymek/opczip + * + * @author Krzysztof Rzymkowski + */ +class Zip64Impl { + private static final long PK0102 = 0x02014b50L; + private static final long PK0304 = 0x04034b50L; + private static final long PK0506 = 0x06054b50L; + private static final long PK0708 = 0x08074b50L; + + private static final int VERSION_20 = 20; + private static final int VERSION_45 = 45; + private static final int DATA_DESCRIPTOR_USED = 0x08; + private static final int ZIP64_FIELD = 0x0001; + private static final long MAX32 = 0xffffffffL; + + private final OutputStream out; + private int written = 0; + + static class Entry { + final String filename; + long crc; + long size; + int compressedSize; + int offset; + + Entry(String filename) { + this.filename = filename; + } + } + + Zip64Impl(OutputStream out) { + this.out = out; + } + + /** + * Write Local File Header + */ + int writeLFH(Entry entry) throws IOException { + written = 0; + writeInt(PK0304); // "PK\003\004" + writeShort(VERSION_45); // version required: 4.5 + writeShort(DATA_DESCRIPTOR_USED); // flags: 8 = data descriptor used + writeShort(ZipEntry.DEFLATED); // compression method: 8 = deflate + writeInt(0); // file modification time & date + writeInt(entry.crc); // CRC-32 + writeInt(0); // compressed file size + writeInt(0); // uncompressed file size + writeShort(entry.filename.length()); // filename length + writeShort(0); // extra flags size + byte[] filenameBytes = entry.filename.getBytes(US_ASCII); + out.write(filenameBytes); // filename characters + return written + filenameBytes.length; + } + + /** + * Write Data Descriptor + */ + int writeDAT(Entry entry) throws IOException { + written = 0; + writeInt(PK0708); // data descriptor signature "PK\007\008" + writeInt(entry.crc); // crc-32 + writeLong(entry.compressedSize); // compressed size (zip64) + writeLong(entry.size); // uncompressed size (zip64) + return written; + } + + /** + * Write Central directory file header + */ + int writeCEN(Entry entry) throws IOException { + written = 0; + boolean useZip64 = entry.size > MAX32; + writeInt(PK0102); // "PK\001\002" + writeShort(VERSION_45); // version made by: 4.5 + writeShort(useZip64 ? VERSION_45 : VERSION_20);// version required: 4.5 + writeShort(DATA_DESCRIPTOR_USED); // flags: 8 = data descriptor used + writeShort(ZipEntry.DEFLATED); // compression method: 8 = deflate + writeInt(0); // file modification time & date + writeInt(entry.crc); // CRC-32 + writeInt(entry.compressedSize); // compressed size + writeInt(useZip64 ? MAX32 : entry.size); // uncompressed size + writeShort(entry.filename.length()); // filename length + writeShort(useZip64 + ? (2 + 2 + 8) /* short + short + long*/ + : 0); // extra field len + writeShort(0); // comment length + writeShort(0); // disk number where file starts + writeShort(0); // internal file attributes (unused) + writeInt(0); // external file attributes (unused) + writeInt(entry.offset); // LFH offset + byte[] filenameBytes = entry.filename.getBytes(US_ASCII); + out.write(filenameBytes); // filename characters + if (useZip64) { + // Extra field: + writeShort(ZIP64_FIELD); // ZIP64 field signature + writeShort(8); // size of extra field (below) + writeLong(entry.size); // uncompressed size + } + return written + filenameBytes.length; + } + + /** + * Write End of central directory record (EOCD) + */ + int writeEND(int entriesCount, int offset, int length) throws IOException { + written = 0; + writeInt(PK0506); // "PK\005\006" + writeShort(0); // number of this disk + writeShort(0); // central directory start disk + writeShort(entriesCount); // number of directory entries on disk + writeShort(entriesCount); // total number of directory entries + writeInt(length); // length of central directory + writeInt(offset); // offset of central directory + writeShort(0); // comment length + return written; + } + + /** + * Writes a 16-bit short to the output stream in little-endian byte order. + */ + private void writeShort(int v) throws IOException { + OutputStream out = this.out; + out.write((v >>> 0) & 0xff); + out.write((v >>> 8) & 0xff); + written += 2; + } + + /** + * Writes a 32-bit int to the output stream in little-endian byte order. + */ + private void writeInt(long v) throws IOException { + OutputStream out = this.out; + out.write((int) ((v >>> 0) & 0xff)); + out.write((int) ((v >>> 8) & 0xff)); + out.write((int) ((v >>> 16) & 0xff)); + out.write((int) ((v >>> 24) & 0xff)); + written += 4; + } + + /** + * Writes a 64-bit int to the output stream in little-endian byte order. + */ + private void writeLong(long v) throws IOException { + OutputStream out = this.out; + out.write((int) ((v >>> 0) & 0xff)); + out.write((int) ((v >>> 8) & 0xff)); + out.write((int) ((v >>> 16) & 0xff)); + out.write((int) ((v >>> 24) & 0xff)); + out.write((int) ((v >>> 32) & 0xff)); + out.write((int) ((v >>> 40) & 0xff)); + out.write((int) ((v >>> 48) & 0xff)); + out.write((int) ((v >>> 56) & 0xff)); + written += 8; + } + +} From e05e600df4dd7efd90b42d441baeaffc60affefd Mon Sep 17 00:00:00 2001 From: Krzysztof Rzymkowski Date: Wed, 12 Jun 2019 23:05:09 +0200 Subject: [PATCH 2/2] [bug-57342] apache licence header --- .../poi/xssf/streaming/OpcOutputStream.java | 18 +++++++++++++++++- .../streaming/OpcZipArchiveOutputStream.java | 17 +++++++++++++++++ .../apache/poi/xssf/streaming/Zip64Impl.java | 17 +++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java index 50745ef4a62..35ddffc8d4a 100644 --- a/src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcOutputStream.java @@ -1,5 +1,21 @@ -package org.apache.poi.xssf.streaming; +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.streaming; import org.apache.poi.xssf.streaming.Zip64Impl.Entry; diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java index 6a8ea497f0d..7bdc3dcdcc8 100644 --- a/src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/OpcZipArchiveOutputStream.java @@ -1,3 +1,20 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + package org.apache.poi.xssf.streaming; import org.apache.commons.compress.archivers.ArchiveEntry; diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java b/src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java index cedbdb0ee49..4977e967baf 100644 --- a/src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/Zip64Impl.java @@ -1,3 +1,20 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + package org.apache.poi.xssf.streaming; import java.io.IOException;