diff --git a/openapi/Swarm.yaml b/openapi/Swarm.yaml index 6bf0e89fc3c..6e9088cc45d 100644 --- a/openapi/Swarm.yaml +++ b/openapi/Swarm.yaml @@ -342,7 +342,7 @@ paths: post: summary: "Upload a file or collection of files" description: - "Upload single files or collections of files. To upload a collection, send a multipart request with files in the form data with appropriate headers. Tar files can be uploaded with the `swarm-collection` header to extract and upload the directory structure. Without the `swarm-collection` header, requests are treated as single file uploads. Multipart requests are always treated as collections; use the `swarm-index-document` header to specify a single file to serve." + "Upload single files or collections of files. For a single file, `Content-Type` is optional: when present it is stored as metadata as-is; when absent the server infers a type from the start of the body. To upload a collection, send a multipart request with files in the form data with appropriate headers. Tar files can be uploaded with the `swarm-collection` header to extract and upload the directory structure. Without the `swarm-collection` header, requests are treated as single file uploads. Multipart requests are always treated as collections; use the `swarm-index-document` header to specify a single file to serve." tags: - BZZ parameters: diff --git a/openapi/SwarmCommon.yaml b/openapi/SwarmCommon.yaml index ab39fa48555..5bf0a0fbc81 100644 --- a/openapi/SwarmCommon.yaml +++ b/openapi/SwarmCommon.yaml @@ -1176,7 +1176,7 @@ components: name: Content-Type schema: type: string - description: The specified content-type is preserved for download of the asset + description: "Single file: trimmed Content-Type is stored as-is or, if omitted or empty, inferred from the first bytes without validating against the body; tar (`swarm-collection`) and multipart collection uploads still need a full-body Content-Type (e.g. `application/x-tar` or `multipart/form-data` with boundary) so the request can be parsed." SwarmIndexDocumentParameter: in: header diff --git a/pkg/api/api.go b/pkg/api/api.go index 93595168cc3..799f62f0053 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -16,7 +16,6 @@ import ( "io" "math" "math/big" - "mime" "net/http" "reflect" "strconv" @@ -302,10 +301,6 @@ func New( s.chainBackend = chainBackend s.metricsRegistry = newDebugMetrics() s.preMapHooks = map[string]func(v string) (string, error){ - "mimeMediaType": func(v string) (string, error) { - typ, _, err := mime.ParseMediaType(v) - return typ, err - }, "decBase64url": func(v string) (string, error) { buf, err := base64.URLEncoding.DecodeString(v) return string(buf), err diff --git a/pkg/api/bzz.go b/pkg/api/bzz.go index d99594bc682..dbd1ad8a423 100644 --- a/pkg/api/bzz.go +++ b/pkg/api/bzz.go @@ -5,10 +5,13 @@ package api import ( + "bytes" "context" "encoding/hex" "errors" "fmt" + "io" + "mime" "net/http" "path" "path/filepath" @@ -51,6 +54,8 @@ const ( largeFileBufferSize = 16 * 32 * 1024 largeBufferFilesizeThreshold = 10 * 1000000 // ten megs + + contentTypeSniffLen = 512 ) func lookaheadBufferSize(size int64) int { @@ -65,7 +70,7 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) { defer span.Finish() headers := struct { - ContentType string `map:"Content-Type,mimeMediaType" validate:"required"` + ContentType string `map:"Content-Type"` BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"` SwarmTag uint64 `map:"Swarm-Tag"` Pin bool `map:"Swarm-Pin"` @@ -137,11 +142,24 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) { logger: logger, } - if headers.IsDir || headers.ContentType == multiPartFormData { - s.dirUploadHandler(ctx, logger, span, ow, r, putter, r.Header.Get(ContentTypeHeader), headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress) + contentTypeHdr := strings.TrimSpace(headers.ContentType) + r.Header.Set(ContentTypeHeader, contentTypeHdr) + mt, _, errParseCT := mime.ParseMediaType(contentTypeHdr) + isMultipart := errParseCT == nil && mt == multiPartFormData + + isDirUpload := headers.IsDir || isMultipart + if !isDirUpload { + s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress) + return + } + + if contentTypeHdr == "" { + logger.Error(nil, "content-type required for directory upload") + jsonhttp.BadRequest(w, errInvalidContentType) return } - s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress) + + s.dirUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress) } // bzzUploadResponse is returned when an HTTP request to upload a file is successful @@ -174,8 +192,24 @@ func (s *Service) fileUploadHandler( p := requestPipelineFn(putter, encrypt, rLevel) + var body io.Reader = r.Body + if r.Header.Get(ContentTypeHeader) == "" { + sniffBuf := make([]byte, contentTypeSniffLen) + n, err := io.ReadFull(r.Body, sniffBuf) + sniffBuf = sniffBuf[:n] + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + logger.Debug("body read failed", "file_name", queries.FileName, "error", err) + logger.Error(nil, "body read failed", "file_name", queries.FileName) + jsonhttp.BadRequest(w, "failed to read request body") + return + } + + r.Header.Set(ContentTypeHeader, http.DetectContentType(sniffBuf)) + body = io.MultiReader(bytes.NewReader(sniffBuf), r.Body) + } + // first store the file and get its reference - fr, err := p(ctx, r.Body) + fr, err := p(ctx, body) if err != nil { logger.Debug("file store failed", "file_name", queries.FileName, "error", err) logger.Error(nil, "file store failed", "file_name", queries.FileName) @@ -240,7 +274,7 @@ func (s *Service) fileUploadHandler( } fileMtdt := map[string]string{ - manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader), // Content-Type has already been validated. + manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader), manifest.EntryMetadataFilenameKey: queries.FileName, } diff --git a/pkg/api/bzz_test.go b/pkg/api/bzz_test.go index 3f444e6cb07..08052b97bbf 100644 --- a/pkg/api/bzz_test.go +++ b/pkg/api/bzz_test.go @@ -465,6 +465,44 @@ func TestBzzFiles(t *testing.T) { ) }) + t.Run("omit-content-type-uses-sniff", func(t *testing.T) { + fileName := "plain.txt" + var resp api.BzzUploadResponse + jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource+"?name="+fileName, http.StatusCreated, + jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"), + jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr), + jsonhttptest.WithRequestBody(bytes.NewReader(simpleData)), + jsonhttptest.WithUnmarshalJSONResponse(&resp), + ) + rootHash := resp.Reference.String() + jsonhttptest.Request(t, client, http.MethodGet, fileDownloadResource(rootHash), http.StatusOK, + jsonhttptest.WithExpectedResponse(simpleData), + jsonhttptest.WithExpectedContentLength(len(simpleData)), + jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, "text/plain; charset=utf-8"), + jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, fmt.Sprintf(`inline; filename="%s"`, fileName)), + ) + }) + + t.Run("image-content-type-preserved", func(t *testing.T) { + ct := "image/png" + fileName := "test.txt" + var resp api.BzzUploadResponse + jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource+"?name="+fileName, http.StatusCreated, + jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"), + jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr), + jsonhttptest.WithRequestHeader(api.ContentTypeHeader, ct), + jsonhttptest.WithRequestBody(bytes.NewReader(simpleData)), + jsonhttptest.WithUnmarshalJSONResponse(&resp), + ) + rootHash := resp.Reference.String() + jsonhttptest.Request(t, client, http.MethodGet, fileDownloadResource(rootHash), http.StatusOK, + jsonhttptest.WithExpectedResponse(simpleData), + jsonhttptest.WithExpectedContentLength(len(simpleData)), + jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, ct), + jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, fmt.Sprintf(`inline; filename="%s"`, fileName)), + ) + }) + t.Run("upload-then-download-and-check-data", func(t *testing.T) { fileName := "sample.html" rootHash := "36e6c1bbdfee6ac21485d5f970479fd1df458d36df9ef4e8179708ed46da557f" diff --git a/pkg/api/dirs.go b/pkg/api/dirs.go index 58160790857..bfa9f764a2a 100644 --- a/pkg/api/dirs.go +++ b/pkg/api/dirs.go @@ -44,7 +44,6 @@ func (s *Service) dirUploadHandler( w http.ResponseWriter, r *http.Request, putter storer.PutterSession, - contentTypeString string, encrypt bool, tag uint64, rLevel redundancy.Level, @@ -58,7 +57,7 @@ func (s *Service) dirUploadHandler( } // The error is ignored because the header was already validated by the caller. - mediaType, params, _ := mime.ParseMediaType(contentTypeString) + mediaType, params, _ := mime.ParseMediaType(r.Header.Get(ContentTypeHeader)) var dReader dirReader switch mediaType {