forked from pachyderm/pachyderm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
obj.go
387 lines (358 loc) · 11.8 KB
/
obj.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
package obj
import (
"fmt"
"io"
"io/ioutil"
"net"
"net/url"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/cenkalti/backoff"
"github.com/pachyderm/pachyderm/src/client"
"github.com/pachyderm/pachyderm/src/client/pkg/uuid"
log "github.com/sirupsen/logrus"
"golang.org/x/net/context"
)
// Client is an interface to object storage.
type Client interface {
// Writer returns a writer which writes to an object.
// It should error if the object already exists or we don't have sufficient
// permissions to write it.
Writer(name string) (io.WriteCloser, error)
// Reader returns a reader which reads from an object.
// If `size == 0`, the reader should read from the offset till the end of the object.
// It should error if the object doesn't exist or we don't have sufficient
// permission to read it.
Reader(name string, offset uint64, size uint64) (io.ReadCloser, error)
// Delete deletes an object.
// It should error if the object doesn't exist or we don't have sufficient
// permission to delete it.
Delete(name string) error
// Walk calls `fn` with the names of objects which can be found under `prefix`.
Walk(prefix string, fn func(name string) error) error
// Exsits checks if a given object already exists
Exists(name string) bool
// IsRetryable determines if an operation should be retried given an error
IsRetryable(err error) bool
// IsNotExist returns true if err is a non existence error
IsNotExist(err error) bool
// IsIgnorable returns true if the error can be ignored
IsIgnorable(err error) bool
}
// NewGoogleClient creates a google client with the given bucket name.
func NewGoogleClient(ctx context.Context, bucket string) (Client, error) {
return newGoogleClient(ctx, bucket)
}
func readSecretFile(name string) (string, error) {
bytes, err := ioutil.ReadFile(filepath.Join("/", client.StorageSecretName, name))
if err != nil {
return "", err
}
return strings.TrimSpace(string(bytes)), nil
}
// NewGoogleClientFromSecret creates a google client by reading credentials
// from a mounted GoogleSecret. You may pass "" for bucket in which case it
// will read the bucket from the secret.
func NewGoogleClientFromSecret(ctx context.Context, bucket string) (Client, error) {
var err error
if bucket == "" {
bucket, err = readSecretFile("/google-bucket")
if err != nil {
return nil, fmt.Errorf("google-bucket not found")
}
}
return NewGoogleClient(ctx, bucket)
}
// NewMicrosoftClient creates a microsoft client:
// container - Azure Blob Container name
// accountName - Azure Storage Account name
// accountKey - Azure Storage Account key
func NewMicrosoftClient(container string, accountName string, accountKey string) (Client, error) {
return newMicrosoftClient(container, accountName, accountKey)
}
// NewMicrosoftClientFromSecret creates a microsoft client by reading
// credentials from a mounted MicrosoftSecret. You may pass "" for container in
// which case it will read the container from the secret.
func NewMicrosoftClientFromSecret(container string) (Client, error) {
var err error
if container == "" {
container, err = readSecretFile("/microsoft-container")
if err != nil {
return nil, fmt.Errorf("microsoft-container not found")
}
}
id, err := readSecretFile("/microsoft-id")
if err != nil {
return nil, fmt.Errorf("microsoft-id not found")
}
secret, err := readSecretFile("/microsoft-secret")
if err != nil {
return nil, fmt.Errorf("microsoft-secret not found")
}
return NewMicrosoftClient(container, id, secret)
}
// NewMinioClient creates an s3 compatible client with the following credentials:
// endpoint - S3 compatible endpoint
// bucket - S3 bucket name
// id - AWS access key id
// secret - AWS secret access key
// secure - Set to true if connection is secure.
func NewMinioClient(endpoint, bucket, id, secret string, secure bool) (Client, error) {
return newMinioClient(endpoint, bucket, id, secret, secure)
}
// NewAmazonClient creates an amazon client with the following credentials:
// bucket - S3 bucket name
// distribution - cloudfront distribution ID
// id - AWS access key id
// secret - AWS secret access key
// token - AWS access token
// region - AWS region
func NewAmazonClient(bucket string, distribution string, id string, secret string, token string,
region string) (Client, error) {
return newAmazonClient(bucket, distribution, id, secret, token, region)
}
// NewMinioClientFromSecret constructs an s3 compatible client by reading
// credentials from a mounted AmazonSecret. You may pass "" for bucket in which case it
// will read the bucket from the secret.
func NewMinioClientFromSecret(bucket string) (Client, error) {
var err error
if bucket == "" {
bucket, err = readSecretFile("/minio-bucket")
if err != nil {
return nil, err
}
}
endpoint, err := readSecretFile("/minio-endpoint")
if err != nil {
return nil, err
}
id, err := readSecretFile("/minio-id")
if err != nil {
return nil, err
}
secret, err := readSecretFile("/minio-secret")
if err != nil {
return nil, err
}
secure, err := readSecretFile("/minio-secure")
if err != nil {
return nil, err
}
return NewMinioClient(endpoint, bucket, id, secret, secure == "1")
}
// NewAmazonClientFromSecret constructs an amazon client by reading credentials
// from a mounted AmazonSecret. You may pass "" for bucket in which case it
// will read the bucket from the secret.
func NewAmazonClientFromSecret(bucket string) (Client, error) {
var distribution string
var err error
if bucket == "" {
bucket, err = readSecretFile("/amazon-bucket")
if err != nil {
return nil, err
}
distribution, err = readSecretFile("/amazon-distribution")
if err != nil {
// Distribution is not required, but we can log a warning
log.Warnln("AWS deployed without cloudfront distribution\n")
} else {
log.Infof("AWS deployed with cloudfront distribution at %v\n", string(distribution))
}
}
// It's ok if we can't find static credentials; we will use IAM roles
// in that case.
id, err := readSecretFile("/amazon-id")
if err != nil && !os.IsNotExist(err) {
return nil, err
}
secret, err := readSecretFile("/amazon-secret")
if err != nil && !os.IsNotExist(err) {
return nil, err
}
token, err := readSecretFile("/amazon-token")
if err != nil && !os.IsNotExist(err) {
return nil, err
}
// region is required for constructing an AWS client
region, err := readSecretFile("/amazon-region")
if err != nil {
return nil, fmt.Errorf("amazon-region not found")
}
return NewAmazonClient(bucket, distribution, id, secret, token, region)
}
// NewClientFromURLAndSecret constructs a client by parsing `URL` and then
// constructing the correct client for that URL using secrets.
func NewClientFromURLAndSecret(ctx context.Context, url *ObjectStoreURL) (Client, error) {
switch url.Store {
case "s3":
return NewAmazonClientFromSecret(url.Bucket)
case "gcs":
fallthrough
case "gs":
return NewGoogleClientFromSecret(ctx, url.Bucket)
case "as":
fallthrough
case "wasb":
// In Azure, the first part of the path is the container name.
return NewMicrosoftClientFromSecret(url.Bucket)
}
return nil, fmt.Errorf("unrecognized object store: %s", url.Bucket)
}
// ObjectStoreURL represents a parsed URL to an object in an object store.
type ObjectStoreURL struct {
// The object store, e.g. s3, gcs, as...
Store string
// The "bucket" (in AWS parlance) or the "container" (in Azure parlance).
Bucket string
// The object itself.
Object string
}
// ParseURL parses an URL into ObjectStoreURL.
func ParseURL(urlStr string) (*ObjectStoreURL, error) {
url, err := url.Parse(urlStr)
if err != nil {
return nil, fmt.Errorf("error parsing url %v: %v", urlStr, err)
}
switch url.Scheme {
case "s3", "gcs", "gs":
return &ObjectStoreURL{
Store: url.Scheme,
Bucket: url.Host,
Object: strings.Trim(url.Path, "/"),
}, nil
case "as", "wasb":
// In Azure, the first part of the path is the container name.
parts := strings.Split(strings.Trim(url.Path, "/"), "/")
if len(parts) < 1 {
return nil, fmt.Errorf("malformed Azure URI: %v", urlStr)
}
return &ObjectStoreURL{
Store: url.Scheme,
Bucket: parts[0],
Object: strings.Trim(path.Join(parts[1:]...), "/"),
}, nil
}
return nil, fmt.Errorf("unrecognized object store: %s", url.Scheme)
}
// NewExponentialBackOffConfig creates an exponential back-off config with
// longer wait times than the default.
func NewExponentialBackOffConfig() *backoff.ExponentialBackOff {
config := backoff.NewExponentialBackOff()
// We want to backoff more aggressively (i.e. wait longer) than the default
config.InitialInterval = 1 * time.Second
config.Multiplier = 2
config.MaxInterval = 15 * time.Minute
return config
}
// RetryError is used to log retry attempts.
type RetryError struct {
Err string
TimeTillNextRetry string
BytesProcessed int
}
// BackoffReadCloser retries with exponential backoff in the case of failures
type BackoffReadCloser struct {
client Client
reader io.ReadCloser
backoffConfig *backoff.ExponentialBackOff
}
func newBackoffReadCloser(client Client, reader io.ReadCloser) io.ReadCloser {
return &BackoffReadCloser{
client: client,
reader: reader,
backoffConfig: NewExponentialBackOffConfig(),
}
}
func (b *BackoffReadCloser) Read(data []byte) (int, error) {
bytesRead := 0
var n int
var err error
backoff.RetryNotify(func() error {
n, err = b.reader.Read(data[bytesRead:])
bytesRead += n
if err != nil && IsRetryable(b.client, err) {
return err
}
return nil
}, b.backoffConfig, func(err error, d time.Duration) {
log.Infof("Error reading; retrying in %s: %#v", d, RetryError{
Err: err.Error(),
TimeTillNextRetry: d.String(),
BytesProcessed: bytesRead,
})
})
return bytesRead, err
}
// Close closes the ReaderCloser contained in b.
func (b *BackoffReadCloser) Close() error {
return b.reader.Close()
}
// BackoffWriteCloser retries with exponential backoff in the case of failures
type BackoffWriteCloser struct {
client Client
writer io.WriteCloser
backoffConfig *backoff.ExponentialBackOff
}
func newBackoffWriteCloser(client Client, writer io.WriteCloser) io.WriteCloser {
return &BackoffWriteCloser{
client: client,
writer: writer,
backoffConfig: NewExponentialBackOffConfig(),
}
}
func (b *BackoffWriteCloser) Write(data []byte) (int, error) {
bytesWritten := 0
var n int
var err error
backoff.RetryNotify(func() error {
n, err = b.writer.Write(data[bytesWritten:])
bytesWritten += n
if err != nil && IsRetryable(b.client, err) {
return err
}
return nil
}, b.backoffConfig, func(err error, d time.Duration) {
log.Infof("Error writing; retrying in %s: %#v", d, RetryError{
Err: err.Error(),
TimeTillNextRetry: d.String(),
BytesProcessed: bytesWritten,
})
})
return bytesWritten, err
}
// Close closes the WriteCloser contained in b.
func (b *BackoffWriteCloser) Close() error {
err := b.writer.Close()
if b.client.IsIgnorable(err) {
return nil
}
return err
}
// IsRetryable determines if an operation should be retried given an error
func IsRetryable(client Client, err error) bool {
return isNetRetryable(err) || client.IsRetryable(err)
}
func byteRange(offset uint64, size uint64) string {
if offset == 0 && size == 0 {
return ""
} else if size == 0 {
return fmt.Sprintf("%d-", offset)
}
return fmt.Sprintf("%d-%d", offset, offset+size-1)
}
func isNetRetryable(err error) bool {
netErr, ok := err.(net.Error)
return ok && netErr.Temporary()
}
// TestIsNotExist is a defensive method for checking to make sure IsNotExist is
// satisfying its semantics.
func TestIsNotExist(c Client) error {
_, err := c.Reader(uuid.NewWithoutDashes(), 0, 0)
if !c.IsNotExist(err) {
return fmt.Errorf("storage is unable to discern NotExist errors, \"%s\" should count as NotExist", err.Error())
}
return nil
}