/
zipinputstream.go
237 lines (213 loc) · 6.94 KB
/
zipinputstream.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
package siga
import (
"archive/zip"
"bytes"
"compress/flate"
"encoding/binary"
"io"
"io/ioutil"
"github.com/pkg/errors"
)
// forZipInputStream wraps w with a new io.Writer which expects an ASiC-E
// container stream to be written to it. It modifies the stream by removing the
// data descriptor of the first file ("mimetype") and updating its local file
// header with the data from the descriptor. All other files must not have a
// data descriptor or be compressed using DEFLATE.
//
// The wrapper also updates any changed offsets in central directory entries.
// The modified stream is written to w.
//
// This acts as a workaround for limited ZIP-archive parsing methods like
// java.util.ZipInputStream, which do not consult the central directory and
// therefore only support a subset of ZIP-archives.
//
// The returned io.Writer performs very little verification on the input and
// writing non-valid ZIP-archives to it results in undefined behavior. It does
// not work with multi-disk ZIP-archives and/or ZIP64.
func forZipInputStream(w io.Writer) io.Writer {
return &zipInputStream{output: w, decomp: flate.NewReader(nil)}
}
const (
zipLocalSignature = "\x50\x4b\x03\x04"
zipDescriptorSignature = "\x50\x4b\x07\x08"
zipCentralSignature = "\x50\x4b\x01\x02"
zipEOCDSignature = "\x50\x4b\x05\x06"
asiceMimetype = "application/vnd.etsi.asic-e+zip"
asiceMimetypeCRC32 = "\x8a\x21\xf9\x45"
asiceMimetypeSize = "\x1f\x00\x00\x00"
asiceMimetypeDescriptor = zipDescriptorSignature +
asiceMimetypeCRC32 + asiceMimetypeSize + asiceMimetypeSize
)
type zipInputStream struct {
buf bytes.Buffer
err error
output io.Writer
written int64
decomp io.ReadCloser
recalc bool
}
func (z *zipInputStream) Write(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
z.buf.Write(p)
// Flush as much of the data as possible.
for ok := true; ok; { // do-while(ok)
if z.buf.Len() < 4 {
break // Not enough data to continue.
}
switch sig := string(z.buf.Bytes()[:4]); sig {
case zipLocalSignature:
ok = z.flushLocal()
case zipCentralSignature:
ok = z.flushCentral()
case zipEOCDSignature:
ok = z.flushEOCD()
default:
z.err = errors.Errorf("unknown signature: %x", sig)
ok = false
}
}
return len(p), z.err
}
// flushLocal attempts to process and flush a single local file entry from the
// buffer. It returns false if it did not succeed.
//
// Note that it can return false without encountering an error (z.err == nil):
// this happens if the buffer does not have enough data.
func (z *zipInputStream) flushLocal() bool {
buf := z.buf.Bytes()
if len(buf) < 30 {
return false
}
descriptor := buf[6]&8 == 8
compression := binary.LittleEndian.Uint16(buf[8:10])
size := binary.LittleEndian.Uint32(buf[18:22])
name := binary.LittleEndian.Uint16(buf[26:28])
extra := binary.LittleEndian.Uint16(buf[28:30])
header := 30 + int(name) + int(extra)
if len(buf) < header {
return false
}
// If no descriptor is used, then try to flush the header and data.
if !descriptor {
return z.flushBytes(header + int(size))
}
// If DEFLATE compression is used, then try to flush the header, data,
// and descriptor.
if compression == zip.Deflate {
// Check if we have the entire compressed stream (DEFLATE
// indicates which block is final).
r := bytes.NewReader(buf[header:])
z.decomp.(flate.Resetter).Reset(r, nil) // Never fails.
_, err := io.Copy(ioutil.Discard, z.decomp)
if err == nil {
err = z.decomp.Close()
}
if err == io.ErrUnexpectedEOF {
return false // Not enough data yet.
}
if err != nil {
z.err = errors.WithStack(err)
return false
}
// Flush read portion of buf + 16 data descriptor bytes.
return z.flushBytes(len(buf) - r.Len() + 16)
}
// Otherwise must be "mimetype".
if string(buf[30:30+name]) != "mimetype" {
z.err = errors.New("only mimetype may use a data descriptor and be uncompressed")
return false
}
if z.written > 0 {
z.err = errors.New("mimetype not first file in stream")
return false
}
// Do not attempt to scan for the data descriptor signature over raw
// data, compare against known value instead.
end := header + len(asiceMimetype) + len(asiceMimetypeDescriptor)
if len(buf) < end {
return false
}
if data := string(buf[header:end]); data != asiceMimetype+asiceMimetypeDescriptor {
z.err = errors.Errorf("unexpected mimetype data: %q", data)
return false
}
// Update local file header and flush it with data. Skip descriptor.
buf[6] &^= 8
copy(buf[14:], asiceMimetypeCRC32)
copy(buf[18:], asiceMimetypeSize)
copy(buf[22:], asiceMimetypeSize)
ok := z.flushBytes(header + len(asiceMimetype))
if ok {
z.buf.Next(len(asiceMimetypeDescriptor))
z.recalc = true // Offsets need to be recalculated.
}
return ok
}
// flushCentral attempts to process and flush a single central directory file
// entry from the buffer. It returns false if it did not succeed.
//
// Note that it can return false without encountering an error (z.err == nil):
// this happens if the buffer does not have enough data.
func (z *zipInputStream) flushCentral() bool {
buf := z.buf.Bytes()
if len(buf) < 46 {
return false
}
name := binary.LittleEndian.Uint16(buf[28:30])
extra := binary.LittleEndian.Uint16(buf[30:32])
comment := binary.LittleEndian.Uint16(buf[32:34])
offset := binary.LittleEndian.Uint32(buf[42:46])
// Ensure enough data for flushBytes before recalculating so it is only
// done at most once.
header := 46 + int(name) + int(extra) + int(comment)
if len(buf) < header {
return false
}
// If the mimetype data descriptor was removed and offsets need to be
// recalculated, then do so for all entries except for the first one.
if z.recalc && offset > 0 {
offset -= uint32(len(asiceMimetypeDescriptor))
binary.LittleEndian.PutUint32(buf[42:46], offset)
}
return z.flushBytes(header)
}
// flushEOCD attempts to process and flush the end of central directory record
// from the buffer. It returns false if it did not succeed.
//
// Note that it can return false without encountering an error (z.err == nil):
// this happens if the buffer does not have enough data.
func (z *zipInputStream) flushEOCD() bool {
buf := z.buf.Bytes()
if len(buf) < 22 {
return false
}
offset := binary.LittleEndian.Uint32(buf[16:20])
comment := binary.LittleEndian.Uint16(buf[20:22])
// Ensure enough data for flushBytes before recalculating so it is only
// done at most once.
header := 22 + int(comment)
if len(buf) < header {
return false
}
// If the mimetype data descriptor was removed and offsets need to be
// recalculated, then do so for the start of central directory offset.
if z.recalc {
offset -= uint32(len(asiceMimetypeDescriptor))
binary.LittleEndian.PutUint32(buf[16:20], offset)
}
return z.flushBytes(header)
}
func (z *zipInputStream) flushBytes(n int) bool {
if z.buf.Len() < n {
return false
}
n, err := z.output.Write(z.buf.Next(n))
z.written += int64(n)
if err != nil {
z.err = errors.WithStack(err)
return false
}
return true
}