From 1c610eabd4bbb53f88d8428aa4a546402b5997e7 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sat, 9 Jul 2022 07:32:12 +1000 Subject: [PATCH 1/2] Correctly set ContentType and ContentEncoding for resumable uploads These were being set in multipart uploads, but not in resumable uploads. I opted for the query param to override the body, but I'm not sure if this is the same behaviour as real GCS. --- fakestorage/upload.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fakestorage/upload.go b/fakestorage/upload.go index a50860e27a..dff8a8f276 100644 --- a/fakestorage/upload.go +++ b/fakestorage/upload.go @@ -383,10 +383,14 @@ func (s *Server) resumableUpload(bucketName string, r *http.Request) jsonRespons if objName == "" { objName = metadata.Name } + if contentEncoding == "" { + contentEncoding = metadata.ContentEncoding + } obj := Object{ ObjectAttrs: ObjectAttrs{ BucketName: bucketName, Name: objName, + ContentType: metadata.ContentType, ContentEncoding: contentEncoding, ACL: getObjectACL(predefinedACL), Metadata: metadata.Metadata, From 87997717412e176826396fdb559544010acad179 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sat, 9 Jul 2022 08:35:03 +1000 Subject: [PATCH 2/2] Add support for gzip transcoding When an object has Content-Encoding: gzip, and an object download request does not have Accept-Encoding: gzip, GCS will decompress the object before serving it. We now replicate this behaviour. --- fakestorage/object.go | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/fakestorage/object.go b/fakestorage/object.go index 35687fa77b..cac7efe231 100644 --- a/fakestorage/object.go +++ b/fakestorage/object.go @@ -5,6 +5,8 @@ package fakestorage import ( + "compress/gzip" + "bytes" "encoding/json" "errors" "fmt" @@ -647,7 +649,7 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) { } status := http.StatusOK - ranged, start, lastByte, content, satisfiable := s.handleRange(obj, r) + ranged, start, lastByte, content, satisfiable, transcoded := s.handleRange(obj, r) if ranged && satisfiable { status = http.StatusPartialContent @@ -670,7 +672,8 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) { if obj.ContentType != "" { w.Header().Set(contentTypeHeader, obj.ContentType) } - if obj.ContentEncoding != "" { + // If content was transcoded, the underlying encoding was removed so we shouldn't report it. + if obj.ContentEncoding != "" && !transcoded { w.Header().Set("Content-Encoding", obj.ContentEncoding) } } @@ -681,12 +684,31 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) { } } -func (s *Server) handleRange(obj Object, r *http.Request) (ranged bool, start int64, lastByte int64, content []byte, satisfiable bool) { +func (s *Server) handleRange(obj Object, r *http.Request) (ranged bool, start int64, lastByte int64, content []byte, satisfiable bool, transcoded bool) { + // This should also be false if the Cache-Control metadata field == "no-transform", + // but we don't currently support that field. + // See https://cloud.google.com/storage/docs/transcoding + if obj.ContentEncoding == "gzip" && r.Header.Get("accept-encoding") != "gzip" { + // GCS will transparently decompress gzipped content, see + // https://cloud.google.com/storage/docs/transcoding + // In this case, any Range header is ignored and the full content is returned. + + // If the content is not a valid gzip file, ignore errors and continue + // without transcoding. Otherwise, return decompressed content. + gzipReader, err := gzip.NewReader(bytes.NewReader(obj.Content)) + if err == nil { + content, err := io.ReadAll(gzipReader) + if err == nil { + return false, 0, 0, content, false, true + } + } + } + contentLength := int64(len(obj.Content)) start, end, err := parseRange(r.Header.Get("Range"), contentLength) if err != nil { // If the range isn't valid, GCS returns all content. - return false, 0, 0, obj.Content, false + return false, 0, 0, obj.Content, false, false } // GCS is pretty flexible when it comes to invalid ranges. A 416 http // response is only returned when the range start is beyond the length of @@ -698,12 +720,12 @@ func (s *Server) handleRange(obj Object, r *http.Request) (ranged bool, start in // Length: 40, Range: bytes=50- case start >= contentLength: // This IS a ranged request, but it ISN'T satisfiable. - return true, 0, 0, []byte{}, false + return true, 0, 0, []byte{}, false, false // Negative range, ignore range and return all content. // Examples: // Length: 40, Range: bytes=30-20 case end < start: - return false, 0, 0, obj.Content, false + return false, 0, 0, obj.Content, false, false // Return range. Clamp start and end. // Examples: // Length: 40, Range: bytes=-100 @@ -715,7 +737,7 @@ func (s *Server) handleRange(obj Object, r *http.Request) (ranged bool, start in if end >= contentLength { end = contentLength - 1 } - return true, start, end, obj.Content[start : end+1], true + return true, start, end, obj.Content[start : end+1], true, false } }