-
Notifications
You must be signed in to change notification settings - Fork 0
/
etag.go
330 lines (309 loc) · 10.2 KB
/
etag.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
// MinIO Cloud Storage, (C) 2021 MinIO, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package etag provides an implementation of S3 ETags.
//
// Each S3 object has an associated ETag that can be
// used to e.g. quickly compare objects or check whether
// the content of an object has changed.
//
// In general, an S3 ETag is an MD5 checksum of the object
// content. However, there are many exceptions to this rule.
//
//
// Single-part Upload
//
// In case of a basic single-part PUT operation - without server
// side encryption or object compression - the ETag of an object
// is its content MD5.
//
//
// Multi-part Upload
//
// The ETag of an object does not correspond to its content MD5
// when the object is uploaded in multiple parts via the S3
// multipart API. Instead, S3 first computes a MD5 of each part:
// e1 := MD5(part-1)
// e2 := MD5(part-2)
// ...
// eN := MD5(part-N)
//
// Then, the ETag of the object is computed as MD5 of all individual
// part checksums. S3 also encodes the number of parts into the ETag
// by appending a -<number-of-parts> at the end:
// ETag := MD5(e1 || e2 || e3 ... || eN) || -N
//
// For example: ceb8853ddc5086cc4ab9e149f8f09c88-5
//
// However, this scheme is only used for multipart objects that are
// not encrypted.
//
// Server-side Encryption
//
// S3 specifies three types of server-side-encryption - SSE-C, SSE-S3
// and SSE-KMS - with different semantics w.r.t. ETags.
// In case of SSE-S3, the ETag of an object is computed the same as
// for single resp. multipart plaintext objects. In particular,
// the ETag of a singlepart SSE-S3 object is its content MD5.
//
// In case of SSE-C and SSE-KMS, the ETag of an object is computed
// differently. For singlepart uploads the ETag is not the content
// MD5 of the object. For multipart uploads the ETag is also not
// the MD5 of the individual part checksums but it still contains
// the number of parts as suffix.
//
// Instead, the ETag is kind of unpredictable for S3 clients when
// an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3
// computes the ETag as MD5 of the encrypted content but there is
// no way to verify this assumption since the encryption happens
// inside AWS S3.
// Therefore, S3 clients must not make any assumption about ETags
// in case of SSE-C or SSE-KMS except that the ETag is well-formed.
//
// To put all of this into a simple rule:
// SSE-S3 : ETag == MD5
// SSE-C : ETag != MD5
// SSE-KMS: ETag != MD5
//
//
// Encrypted ETags
//
// An S3 implementation has to remember the content MD5 of objects
// in case of SSE-S3. However, storing the ETag of an encrypted
// object in plaintext may reveal some information about the object.
// For example, two objects with the same ETag are identical with
// a very high probability.
//
// Therefore, an S3 implementation may encrypt an ETag before storing
// it. In this case, the stored ETag may not be a well-formed S3 ETag.
// For example, it can be larger due to a checksum added by authenticated
// encryption schemes. Such an ETag must be decrypted before sent to an
// S3 client.
//
//
// S3 Clients
//
// There are many different S3 client implementations. Most of them
// access the ETag by looking for the HTTP response header key "Etag".
// However, some of them assume that the header key has to be "ETag"
// (case-sensitive) and will fail otherwise.
// Further, some clients require that the ETag value is a double-quoted
// string. Therefore, this package provides dedicated functions for
// adding and extracing the ETag to/from HTTP headers.
package etag
import (
"bytes"
"crypto/md5"
"encoding/base64"
"encoding/hex"
"errors"
"fmt"
"net/http"
"strconv"
"strings"
)
// ETag is a single S3 ETag.
//
// An S3 ETag sometimes corresponds to the MD5 of
// the S3 object content. However, when an object
// is encrypted, compressed or uploaded using
// the S3 multipart API then its ETag is not
// necessarily the MD5 of the object content.
//
// For a more detailed description of S3 ETags
// take a look at the package documentation.
type ETag []byte
// String returns the string representation of the ETag.
//
// The returned string is a hex representation of the
// binary ETag with an optional '-<part-number>' suffix.
func (e ETag) String() string {
if e.IsMultipart() {
return hex.EncodeToString(e[:16]) + string(e[16:])
}
return hex.EncodeToString(e)
}
// IsEncrypted reports whether the ETag is encrypted.
func (e ETag) IsEncrypted() bool {
return len(e) > 16 && !bytes.ContainsRune(e, '-')
}
// IsMultipart reports whether the ETag belongs to an
// object that has been uploaded using the S3 multipart
// API.
// An S3 multipart ETag has a -<part-number> suffix.
func (e ETag) IsMultipart() bool {
return len(e) > 16 && bytes.ContainsRune(e, '-')
}
// Parts returns the number of object parts that are
// referenced by this ETag. It returns 1 if the object
// has been uploaded using the S3 singlepart API.
//
// Parts may panic if the ETag is an invalid multipart
// ETag.
func (e ETag) Parts() int {
if !e.IsMultipart() {
return 1
}
n := bytes.IndexRune(e, '-')
parts, err := strconv.Atoi(string(e[n+1:]))
if err != nil {
panic(err) // malformed ETag
}
return parts
}
var _ Tagger = ETag{} // compiler check
// ETag returns the ETag itself.
//
// By providing this method ETag implements
// the Tagger interface.
func (e ETag) ETag() ETag { return e }
// FromContentMD5 decodes and returns the Content-MD5
// as ETag, if set. If no Content-MD5 header is set
// it returns an empty ETag and no error.
func FromContentMD5(h http.Header) (ETag, error) {
v, ok := h["Content-Md5"]
if !ok {
return nil, nil
}
if v[0] == "" {
return nil, errors.New("etag: content-md5 is set but contains no value")
}
b, err := base64.StdEncoding.Strict().DecodeString(v[0])
if err != nil {
return nil, err
}
if len(b) != md5.Size {
return nil, errors.New("etag: invalid content-md5")
}
return ETag(b), nil
}
// Multipart computes an S3 multipart ETag given a list of
// S3 singlepart ETags. It returns nil if the list of
// ETags is empty.
//
// Any encrypted or multipart ETag will be ignored and not
// used to compute the returned ETag.
func Multipart(etags ...ETag) ETag {
if len(etags) == 0 {
return nil
}
var n int64
h := md5.New()
for _, etag := range etags {
if !etag.IsMultipart() && !etag.IsEncrypted() {
h.Write(etag)
n++
}
}
etag := append(h.Sum(nil), '-')
return strconv.AppendInt(etag, n, 10)
}
// Set adds the ETag to the HTTP headers. It overwrites any
// existing ETag entry.
//
// Due to legacy S3 clients, that make incorrect assumptions
// about HTTP headers, Set should be used instead of
// http.Header.Set(...). Otherwise, some S3 clients will not
// able to extract the ETag.
func Set(etag ETag, h http.Header) {
// Some (broken) S3 clients expect the ETag header to
// literally "ETag" - not "Etag". Further, some clients
// expect an ETag in double quotes. Therefore, we set the
// ETag directly as map entry instead of using http.Header.Set
h["ETag"] = []string{`"` + etag.String() + `"`}
}
// Get extracts and parses an ETag from the given HTTP headers.
// It returns an error when the HTTP headers do not contain
// an ETag entry or when the ETag is malformed.
//
// Get only accepts AWS S3 compatible ETags - i.e. no
// encrypted ETags - and therefore is stricter than Parse.
func Get(h http.Header) (ETag, error) {
const strict = true
if v := h.Get("Etag"); v != "" {
return parse(v, strict)
}
v, ok := h["ETag"]
if !ok || len(v) == 0 {
return nil, errors.New("etag: HTTP header does not contain an ETag")
}
return parse(v[0], strict)
}
// Equal returns true if and only if the two ETags are
// identical.
func Equal(a, b ETag) bool { return bytes.Equal(a, b) }
// Parse parses s as an S3 ETag, returning the result.
// The string can be an encrypted, singlepart
// or multipart S3 ETag. It returns an error if s is
// not a valid textual representation of an ETag.
func Parse(s string) (ETag, error) {
const strict = false
return parse(s, strict)
}
// parse parse s as an S3 ETag, returning the result.
// It operates in one of two modes:
// - strict
// - non-strict
//
// In strict mode, parse only accepts ETags that
// are AWS S3 compatible. In particular, an AWS
// S3 ETag always consists of a 128 bit checksum
// value and an optional -<part-number> suffix.
// Therefore, s must have the following form in
// strict mode: <32-hex-characters>[-<integer>]
//
// In non-strict mode, parse also accepts ETags
// that are not AWS S3 compatible - e.g. encrypted
// ETags.
func parse(s string, strict bool) (ETag, error) {
// An S3 ETag may be a double-quoted string.
// Therefore, we remove double quotes at the
// start and end, if any.
if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) {
s = s[1 : len(s)-1]
}
// An S3 ETag may be a multipart ETag that
// contains a '-' followed by a number.
// If the ETag does not a '-' is is either
// a singlepart or encrypted ETag.
n := strings.IndexRune(s, '-')
if n == -1 {
etag, err := hex.DecodeString(s)
if err != nil {
return nil, err
}
if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long
return nil, fmt.Errorf("etag: invalid length %d", len(etag))
}
return ETag(etag), nil
}
prefix, suffix := s[:n], s[n:]
if len(prefix) != 32 {
return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix))
}
if len(suffix) <= 1 {
return nil, errors.New("etag: suffix is not a part number")
}
etag, err := hex.DecodeString(prefix)
if err != nil {
return nil, err
}
partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1]
if err != nil {
return nil, err
}
if strict && (partNumber == 0 || partNumber > 10000) {
return nil, fmt.Errorf("etag: invalid part number %d", partNumber)
}
return ETag(append(etag, suffix...)), nil
}