/
gcs.go
135 lines (122 loc) · 5.18 KB
/
gcs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package gcs contains helpers that facilitate data transfer of Resources into Google Cloud
// Storage.
package gcs
import (
"context"
"errors"
"io"
"net/http"
"strings"
"cloud.google.com/go/storage"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
)
// DefaultCloudStorageEndpoint represents the default cloud storage API endpoint.
// This should be passed to NewClient unless in a test environment.
const DefaultCloudStorageEndpoint = "https://storage.googleapis.com/"
// ErrInvalidGCSPath is an error indicating the GCS path is not valid.
var ErrInvalidGCSPath = errors.New("the GCS path is not valid. a bucket and folder must be included, along with a gs:// prefix. For example gs://bucket/folder")
// Client represents a GCS API client belonging to some project.
type Client struct {
*storage.Client
endpointURL string
bucketName string
}
// NewClient creates and returns a new gcs client for use in writing resources to an existing GCS
// bucket. Note `bucketName` must belong to an existing bucket. See here for how to create a GCS
// bucket: https://cloud.google.com/storage/docs/creating-buckets.
// TODO(b/243677730): Add support for creating buckets.
func NewClient(ctx context.Context, bucketName, endpointURL string) (Client, error) {
var storageClient *storage.Client
var err error
if endpointURL == DefaultCloudStorageEndpoint {
storageClient, err = storage.NewClient(ctx)
} else {
// When not using the default Cloud Storage endpoint, we provide an empty
// http.Client. This case is generally used in the test, so that the
// storage.Client doesn't complain about not being able to find
// credentials in the test environment.
// TODO(b/211028663): we should try to find a better way to handle this
// case, perhaps we can set fake default creds in the test setup.
storageClient, err = storage.NewClient(ctx, option.WithHTTPClient(&http.Client{}), option.WithEndpoint(endpointURL))
}
gcsClient := Client{endpointURL: endpointURL, bucketName: bucketName, Client: storageClient}
return gcsClient, err
}
// GetFileWriter returns a write closer that allows the user to write to a file named `fileName` in
// the pre defined GCS bucket.
// Closing the write closer will send the written data to GCS.
func (gcsClient Client) GetFileWriter(ctx context.Context, fileName string) io.WriteCloser {
bkt := gcsClient.Bucket(gcsClient.bucketName)
obj := bkt.Object(fileName)
return obj.NewWriter(ctx)
}
// GetFileReader returns a reader for a file in GCS named `fileName`.
// ErrObjectNotExist will be returned if the object is not found.
//
// The caller must call Close on the returned Reader when done reading.
func (gcsClient Client) GetFileReader(ctx context.Context, fileName string) (io.ReadCloser, error) {
bkt := gcsClient.Bucket(gcsClient.bucketName)
return bkt.Object(fileName).NewReader(ctx)
}
// IsBucketInProject returns true if the bucket is in the GCP project.
func (gcsClient Client) IsBucketInProject(ctx context.Context, project string) (bool, error) {
it := gcsClient.Buckets(ctx, project)
it.Prefix = gcsClient.bucketName
for {
bucketAttrs, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
return false, err
}
if bucketAttrs.Name == gcsClient.bucketName {
return true, nil
}
}
return false, nil
}
// JoinPath is roughly equivalent to path/filepath.Join, except that it always
// uses forward slashes regardless of platform (because GCS does not recognize
// backslashes used by windows).
//
// Each path element backslashes converted to forward slashes, and has leading
// and trailing slashes removed. Elements are then joined with forward slashes.
//
// Warning: this may not be fully compatible with how directory paths are
// supposed to work, and should not be used except for writing to GCS. For
// writing files to a local filesystem, use path/filepath.Join.
func JoinPath(elems ...string) string {
var cleaned []string
for _, e := range elems {
cleaned = append(cleaned, strings.Trim(strings.ReplaceAll(e, `\`, `/`), `/`))
}
return strings.Join(cleaned, `/`)
}
// PathComponents takes a GCS path (e.g. gs://some_bucket/relative/path) and returns the bucket name
// and the relative path. For example, gs://some_bucket/relative/path would return some_bucket and
// relative/path. At least a bucket and a folder must be included.
func PathComponents(uri string) (bucket, relativePath string, err error) {
if !strings.HasPrefix(uri, "gs://") {
return "", "", ErrInvalidGCSPath
}
bucket, relativePath, ok := strings.Cut(strings.TrimPrefix(uri, "gs://"), "/")
if !ok || relativePath == "" {
return "", "", ErrInvalidGCSPath
}
return bucket, relativePath, nil
}