From 3e671a326ba3c3a77eb0e4d7dd36d6bbb7d51ec8 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Thu, 15 Feb 2024 16:00:56 -0800 Subject: [PATCH] feat(storage): make it possible to disable Content-Type sniffing As described in https://github.com/google/go-cloud/issues/3298#issuecomment-1856821618, we want to disable automatic `Content-Type` detection when inserting an object to Google Cloud Storage (GCS). Previously it wasn't possible to disable this auto-detection, even though `googleapi.MediaOptions` provides a `ForceEmptyContentType` option (https://github.com/googleapis/google-api-go-client/blob/v0.165.0/googleapi/googleapi.go#L283). We enable this by adding a `Writer` option to set this value. Closes https://github.com/googleapis/google-cloud-go/issues/9430 --- storage/client.go | 3 +++ storage/grpc_client.go | 2 +- storage/http_client.go | 2 +- storage/integration_test.go | 34 +++++++++++++++++++++++++++------- storage/writer.go | 30 ++++++++++++++++++------------ 5 files changed, 50 insertions(+), 21 deletions(-) diff --git a/storage/client.go b/storage/client.go index 4906b1d1f704..70b2a280e3b3 100644 --- a/storage/client.go +++ b/storage/client.go @@ -254,6 +254,9 @@ type openWriterParams struct { // attrs - see `Writer.ObjectAttrs`. // Required. attrs *ObjectAttrs + // forceEmptyContentType - Disables auto-detect of Content-Type + // Optional. + forceEmptyContentType bool // conds - see `Writer.o.conds`. // Optional. conds *Conditions diff --git a/storage/grpc_client.go b/storage/grpc_client.go index e9e95993011b..8f22adcc3309 100644 --- a/storage/grpc_client.go +++ b/storage/grpc_client.go @@ -550,7 +550,7 @@ func (c *grpcStorageClient) UpdateObject(ctx context.Context, params *updateObje if uattrs.TemporaryHold != nil { fieldMask.Paths = append(fieldMask.Paths, "temporary_hold") } - if uattrs.ContentType != nil { + if uattrs.ContentType != nil || params.forceEmptyContentType { fieldMask.Paths = append(fieldMask.Paths, "content_type") } if uattrs.ContentLanguage != nil { diff --git a/storage/http_client.go b/storage/http_client.go index a0f3c00a7373..e3e0d761bb08 100644 --- a/storage/http_client.go +++ b/storage/http_client.go @@ -885,7 +885,7 @@ func (c *httpStorageClient) OpenWriter(params *openWriterParams, opts ...storage mediaOpts := []googleapi.MediaOption{ googleapi.ChunkSize(params.chunkSize), } - if c := attrs.ContentType; c != "" { + if c := attrs.ContentType; c != "" || params.forceEmptyContentType { mediaOpts = append(mediaOpts, googleapi.ContentType(c)) } if params.chunkRetryDeadline != 0 { diff --git a/storage/integration_test.go b/storage/integration_test.go index 0278b7005078..18f2b5e89bd1 100644 --- a/storage/integration_test.go +++ b/storage/integration_test.go @@ -2415,8 +2415,9 @@ func TestIntegration_WriterContentType(t *testing.T) { multiTransportTest(ctx, t, func(t *testing.T, ctx context.Context, bucket, _ string, client *Client) { obj := client.Bucket(bucket).Object("content") testCases := []struct { - content string - setType, wantType string + content string + setType, wantType string + forceEmptyContentType bool }{ { // Sniffed content type. @@ -2438,9 +2439,17 @@ func TestIntegration_WriterContentType(t *testing.T) { setType: "image/jpeg", wantType: "image/jpeg", }, + { + // Content type sniffing disabled. + content: "My first page", + setType: "", + wantType: "", + forceEmptyContentType: true, + }, } for i, tt := range testCases { - if err := writeObject(ctx, obj, tt.setType, []byte(tt.content)); err != nil { + writer := newWriter(ctx, obj, tt.setType, tt.forceEmptyContentType) + if err := writeContents(writer, []byte(tt.content)); err != nil { t.Errorf("writing #%d: %v", i, err) } attrs, err := obj.Attrs(ctx) @@ -5649,10 +5658,7 @@ func deleteObjectIfExists(o *ObjectHandle, retryOpts ...RetryOption) error { return nil } -func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, contents []byte) error { - w := obj.Retryer(WithPolicy(RetryAlways)).NewWriter(ctx) - w.ContentType = contentType - +func writeContents(w *Writer, contents []byte) error { if contents != nil { if _, err := w.Write(contents); err != nil { _ = w.Close() @@ -5662,6 +5668,20 @@ func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, con return w.Close() } +func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, contents []byte) error { + w := newWriter(ctx, obj, contentType, false) + + return writeContents(w, contents) +} + +func newWriter(ctx context.Context, obj *ObjectHandle, contentType string, forceEmptyContentType bool) *Writer { + w := obj.Retryer(WithPolicy(RetryAlways)).NewWriter(ctx) + w.ContentType = contentType + w.ForceEmptyContentType = forceEmptyContentType + + return w +} + func readObject(ctx context.Context, obj *ObjectHandle) ([]byte, error) { r, err := obj.NewReader(ctx) if err != nil { diff --git a/storage/writer.go b/storage/writer.go index aeb7ed418c8d..43a0f0d10937 100644 --- a/storage/writer.go +++ b/storage/writer.go @@ -88,6 +88,11 @@ type Writer struct { // cancellation. ChunkRetryDeadline time.Duration + // ForceEmptyContentType is an optional parameter that is used to disable + // auto-detection of Content-Type. By default, if a blank Content-Type + // is provided, then gax.DetermineContentType is called to sniff the type. + ForceEmptyContentType bool + // ProgressFunc can be used to monitor the progress of a large write // operation. If ProgressFunc is not nil and writing requires multiple // calls to the underlying service (see @@ -180,18 +185,19 @@ func (w *Writer) openWriter() (err error) { isIdempotent := w.o.conds != nil && (w.o.conds.GenerationMatch >= 0 || w.o.conds.DoesNotExist == true) opts := makeStorageOpts(isIdempotent, w.o.retry, w.o.userProject) params := &openWriterParams{ - ctx: w.ctx, - chunkSize: w.ChunkSize, - chunkRetryDeadline: w.ChunkRetryDeadline, - bucket: w.o.bucket, - attrs: &w.ObjectAttrs, - conds: w.o.conds, - encryptionKey: w.o.encryptionKey, - sendCRC32C: w.SendCRC32C, - donec: w.donec, - setError: w.error, - progress: w.progress, - setObj: func(o *ObjectAttrs) { w.obj = o }, + ctx: w.ctx, + chunkSize: w.ChunkSize, + chunkRetryDeadline: w.ChunkRetryDeadline, + bucket: w.o.bucket, + attrs: &w.ObjectAttrs, + conds: w.o.conds, + encryptionKey: w.o.encryptionKey, + sendCRC32C: w.SendCRC32C, + donec: w.donec, + setError: w.error, + progress: w.progress, + setObj: func(o *ObjectAttrs) { w.obj = o }, + forceEmptyContentType: w.ForceEmptyContentType, } if err := w.ctx.Err(); err != nil { return err // short-circuit