forked from graymeta/stow
/
container.go
314 lines (273 loc) · 9.84 KB
/
container.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
package s3
import (
"context"
"fmt"
"io"
"strconv"
"strings"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/flyteorg/stow"
"github.com/pkg/errors"
)
// Amazon S3 bucket contains a creation date and a name.
type container struct {
// name is needed to retrieve items.
name string
// client is responsible for performing the requests.
client *s3.S3
// region describes the AWS Availability Zone of the S3 Bucket.
region string
customEndpoint string
}
func (c *container) PreSignRequest(ctx context.Context, clientMethod stow.ClientMethod, id string,
params stow.PresignRequestParams) (response stow.PresignResponse, err error) {
var req *request.Request
var requestHeaders map[string]string
switch clientMethod {
case stow.ClientMethodGet:
req, _ = c.client.GetObjectRequest(&s3.GetObjectInput{
Bucket: aws.String(c.name),
Key: aws.String(id),
})
case stow.ClientMethodPut:
var contentMD5 *string
if len(params.ContentMD5) > 0 {
contentMD5 = aws.String(params.ContentMD5)
}
metadata := make(map[string]*string)
requestHeaders = map[string]string{"Content-Length": strconv.Itoa(len(params.ContentMD5)), "Content-MD5": params.ContentMD5}
if params.AddContentMD5Metadata {
metadata[stow.FlyteContentMD5] = aws.String(params.ContentMD5)
requestHeaders[fmt.Sprintf("x-amz-meta-%s", stow.FlyteContentMD5)] = params.ContentMD5
}
req, _ = c.client.PutObjectRequest(&s3.PutObjectInput{
Bucket: aws.String(c.name),
Key: aws.String(id),
ContentMD5: contentMD5,
Metadata: metadata,
})
default:
return stow.PresignResponse{}, fmt.Errorf("unsupported client method [%v]", clientMethod.String())
}
req.SetContext(ctx)
url, err := req.Presign(params.ExpiresIn)
return stow.PresignResponse{Url: url, RequiredRequestHeaders: requestHeaders}, err
}
// ID returns a string value which represents the name of the container.
func (c *container) ID() string {
return c.name
}
// Name returns a string value which represents the name of the container.
func (c *container) Name() string {
return c.name
}
// Item returns a stow.Item instance of a container based on the name of the container and the key representing. The
// retrieved item only contains metadata about the object. This ensures that only the minimum amount of information is
// transferred. Calling item.Open() will actually do a get request and open a stream to read from.
func (c *container) Item(id string) (stow.Item, error) {
return c.getItem(id)
}
// Items sends a request to retrieve a list of items that are prepended with
// the prefix argument. The 'cursor' variable facilitates pagination.
func (c *container) Items(prefix, cursor string, count int) ([]stow.Item, string, error) {
itemLimit := int64(count)
params := &s3.ListObjectsV2Input{
Bucket: aws.String(c.Name()),
StartAfter: &cursor,
MaxKeys: &itemLimit,
Prefix: &prefix,
}
response, err := c.client.ListObjectsV2(params)
if err != nil {
return nil, "", errors.Wrap(err, "Items, listing objects")
}
var containerItems []stow.Item
for _, object := range response.Contents {
if *object.StorageClass == "GLACIER" {
continue
}
etag := cleanEtag(*object.ETag) // Copy etag value and remove the strings.
object.ETag = &etag // Assign the value to the object field representing the item.
newItem := &item{
container: c,
client: c.client,
properties: properties{
ETag: object.ETag,
Key: object.Key,
LastModified: object.LastModified,
Owner: object.Owner,
Size: object.Size,
StorageClass: object.StorageClass,
},
}
containerItems = append(containerItems, newItem)
}
// Create a marker and determine if the list of items to retrieve is complete.
// If not, the last file is the input to the value of after which item to start
startAfter := ""
if *response.IsTruncated {
startAfter = containerItems[len(containerItems)-1].Name()
}
return containerItems, startAfter, nil
}
func (c *container) RemoveItem(id string) error {
params := &s3.DeleteObjectInput{
Bucket: aws.String(c.Name()),
Key: aws.String(id),
}
_, err := c.client.DeleteObject(params)
if err != nil {
return errors.Wrapf(err, "RemoveItem, deleting object %+v", params)
}
return nil
}
// Put sends a request to upload content to the container. The arguments
// received are the name of the item (S3 Object), a reader representing the
// content, and the size of the file. Many more attributes can be given to the
// file, including metadata. Keeping it simple for now.
func (c *container) Put(name string, r io.Reader, size int64, metadata map[string]interface{}) (stow.Item, error) {
// Convert map[string]interface{} to map[string]*string
mdPrepped, err := prepMetadata(metadata)
if err != nil {
return nil, errors.Wrap(err, "unable to create or update item, preparing metadata")
}
uploader := s3manager.NewUploaderWithClient(c.client)
_, err = uploader.Upload(&s3manager.UploadInput{
Bucket: aws.String(c.name), // Required
Key: aws.String(name), // Required
Body: r,
Metadata: mdPrepped, // map[string]*string
})
if err != nil {
return nil, errors.Wrap(err, "PutObject, putting object")
}
i, err := c.client.HeadObject(&s3.HeadObjectInput{
Key: aws.String(name),
Bucket: aws.String(c.name),
})
var etag string
if err == nil && i.ETag != nil {
etag = cleanEtag(*i.ETag)
}
// Some fields are empty because this information isn't included in the response.
// May have to involve sending a request if we want more specific information.
// Keeping it simple for now.
// s3.Object info: https://github.com/aws/aws-sdk-go/blob/master/service/s3/api.go#L7092-L7107
// Response: https://github.com/aws/aws-sdk-go/blob/master/service/s3/api.go#L8193-L8227
newItem := &item{
container: c,
client: c.client,
properties: properties{
ETag: &etag,
Key: &name,
Size: &size,
//LastModified *time.Time
//Owner *s3.Owner
//StorageClass *string
},
}
return newItem, nil
}
// Region returns a string representing the region/availability zone of the container.
func (c *container) Region() string {
return c.region
}
// A request to retrieve a single item includes information that is more specific than
// a PUT. Instead of doing a request within the PUT, make this method available so that the
// request can be made by the field retrieval methods when necessary. This is the case for
// fields that are left out, such as the object's last modified date. This also needs to be
// done only once since the requested information is retained.
// May be simpler to just stick it in PUT and and do a request every time, please vouch
// for this if so.
func (c *container) getItem(id string) (*item, error) {
params := &s3.HeadObjectInput{
Bucket: aws.String(c.name),
Key: aws.String(id),
}
res, err := c.client.HeadObject(params)
if err != nil {
// stow needs ErrNotFound to pass the test but amazon returns an opaque error
if aerr, ok := err.(awserr.Error); ok && aerr.Code() == "NotFound" {
return nil, stow.ErrNotFound
}
return nil, errors.Wrap(err, "getItem, getting the object")
}
etag := cleanEtag(*res.ETag) // etag string value contains quotations. Remove them.
md, err := parseMetadata(res.Metadata)
if err != nil {
return nil, errors.Wrap(err, "unable to retrieve Item information, parsing metadata")
}
i := &item{
container: c,
client: c.client,
properties: properties{
ETag: &etag,
Key: &id,
LastModified: res.LastModified,
Owner: nil, // not returned in the response.
Size: res.ContentLength,
StorageClass: res.StorageClass,
Metadata: md,
},
}
return i, nil
}
// Remove quotation marks from beginning and end. This includes quotations that
// are escaped. Also removes leading `W/` from prefix for weak Etags.
//
// Based on the Etag spec, the full etag value (<FULL ETAG VALUE>) can include:
// - W/"<ETAG VALUE>"
// - "<ETAG VALUE>"
// - ""
// Source: https://tools.ietf.org/html/rfc7232#section-2.3
//
// Based on HTTP spec, forward slash is a separator and must be enclosed in
// quotes to be used as a valid value. Hence, the returned value may include:
// - "<FULL ETAG VALUE>"
// - \"<FULL ETAG VALUE>\"
// Source: https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
//
// This function contains a loop to check for the presence of the three possible
// filler characters and strips them, resulting in only the Etag value.
func cleanEtag(etag string) string {
for {
// Check if the filler characters are present
if strings.HasPrefix(etag, `\"`) {
etag = strings.Trim(etag, `\"`)
} else if strings.HasPrefix(etag, `"`) {
etag = strings.Trim(etag, `"`)
} else if strings.HasPrefix(etag, `W/`) {
etag = strings.Replace(etag, `W/`, "", 1)
} else {
break
}
}
return etag
}
// prepMetadata parses a raw map into the native type required by S3 to set metadata (map[string]*string).
// TODO: validation for key values. This function also assumes that the value of a key value pair is a string.
func prepMetadata(md map[string]interface{}) (map[string]*string, error) {
m := make(map[string]*string, len(md))
for key, value := range md {
strValue, valid := value.(string)
if !valid {
return nil, errors.Errorf(`value of key '%s' in metadata must be of type string`, key)
}
m[key] = aws.String(strValue)
}
return m, nil
}
// The first letter of a dash separated key value is capitalized, so perform a ToLower on it.
// This Key transformation of returning lowercase is consistent with other locations..
func parseMetadata(md map[string]*string) (map[string]interface{}, error) {
m := make(map[string]interface{}, len(md))
for key, value := range md {
k := strings.ToLower(key)
m[k] = *value
}
return m, nil
}