Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openapi/Swarm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ paths:
post:
summary: "Upload a file or collection of files"
description:
"Upload single files or collections of files. To upload a collection, send a multipart request with files in the form data with appropriate headers. Tar files can be uploaded with the `swarm-collection` header to extract and upload the directory structure. Without the `swarm-collection` header, requests are treated as single file uploads. Multipart requests are always treated as collections; use the `swarm-index-document` header to specify a single file to serve."
"Upload single files or collections of files. For a single file, `Content-Type` is optional: when present it is stored as metadata as-is; when absent the server infers a type from the start of the body. To upload a collection, send a multipart request with files in the form data with appropriate headers. Tar files can be uploaded with the `swarm-collection` header to extract and upload the directory structure. Without the `swarm-collection` header, requests are treated as single file uploads. Multipart requests are always treated as collections; use the `swarm-index-document` header to specify a single file to serve."
tags:
- BZZ
parameters:
Expand Down
2 changes: 1 addition & 1 deletion openapi/SwarmCommon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,7 @@ components:
name: Content-Type
schema:
type: string
description: The specified content-type is preserved for download of the asset
description: "Single file: trimmed Content-Type is stored as-is or, if omitted or empty, inferred from the first bytes without validating against the body; tar (`swarm-collection`) and multipart collection uploads still need a full-body Content-Type (e.g. `application/x-tar` or `multipart/form-data` with boundary) so the request can be parsed."

SwarmIndexDocumentParameter:
in: header
Expand Down
5 changes: 0 additions & 5 deletions pkg/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"io"
"math"
"math/big"
"mime"
"net/http"
"reflect"
"strconv"
Expand Down Expand Up @@ -302,10 +301,6 @@ func New(
s.chainBackend = chainBackend
s.metricsRegistry = newDebugMetrics()
s.preMapHooks = map[string]func(v string) (string, error){
"mimeMediaType": func(v string) (string, error) {
typ, _, err := mime.ParseMediaType(v)
return typ, err
},
"decBase64url": func(v string) (string, error) {
buf, err := base64.URLEncoding.DecodeString(v)
return string(buf), err
Expand Down
46 changes: 40 additions & 6 deletions pkg/api/bzz.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
package api

import (
"bytes"
"context"
"encoding/hex"
"errors"
"fmt"
"io"
"mime"
"net/http"
"path"
"path/filepath"
Expand Down Expand Up @@ -51,6 +54,8 @@ const (
largeFileBufferSize = 16 * 32 * 1024

largeBufferFilesizeThreshold = 10 * 1000000 // ten megs

contentTypeSniffLen = 512
)

func lookaheadBufferSize(size int64) int {
Expand All @@ -65,7 +70,7 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) {
defer span.Finish()

headers := struct {
ContentType string `map:"Content-Type,mimeMediaType" validate:"required"`
ContentType string `map:"Content-Type"`
BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"`
SwarmTag uint64 `map:"Swarm-Tag"`
Pin bool `map:"Swarm-Pin"`
Expand Down Expand Up @@ -137,11 +142,24 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) {
logger: logger,
}

if headers.IsDir || headers.ContentType == multiPartFormData {
s.dirUploadHandler(ctx, logger, span, ow, r, putter, r.Header.Get(ContentTypeHeader), headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
contentTypeHdr := strings.TrimSpace(headers.ContentType)
r.Header.Set(ContentTypeHeader, contentTypeHdr)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this necessary?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before my changes the dirUploadHandler was accepting contentTypeString (although it is present request).
So I changed that here and did trim and set the new value in main handler.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that I agree with @martinconic that this is not really clear why this is needed. If you're already reading, cleaning, parsing and checking the content type header - it does not make any more sense to update the value in the Header type.

mt, _, errParseCT := mime.ParseMediaType(contentTypeHdr)
isMultipart := errParseCT == nil && mt == multiPartFormData

isDirUpload := headers.IsDir || isMultipart
if !isDirUpload {
s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
return
}

if contentTypeHdr == "" {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a test for empty content type if there is none? Also if no content type and no body?

logger.Error(nil, "content-type required for directory upload")
jsonhttp.BadRequest(w, errInvalidContentType)
return
}
s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)

s.dirUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
}

// bzzUploadResponse is returned when an HTTP request to upload a file is successful
Expand Down Expand Up @@ -174,8 +192,24 @@ func (s *Service) fileUploadHandler(

p := requestPipelineFn(putter, encrypt, rLevel)

var body io.Reader = r.Body
if r.Header.Get(ContentTypeHeader) == "" {
sniffBuf := make([]byte, contentTypeSniffLen)
n, err := io.ReadFull(r.Body, sniffBuf)
sniffBuf = sniffBuf[:n]
if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
logger.Debug("body read failed", "file_name", queries.FileName, "error", err)
logger.Error(nil, "body read failed", "file_name", queries.FileName)
jsonhttp.BadRequest(w, "failed to read request body")
return
}

r.Header.Set(ContentTypeHeader, http.DetectContentType(sniffBuf))
body = io.MultiReader(bytes.NewReader(sniffBuf), r.Body)
}

// first store the file and get its reference
fr, err := p(ctx, r.Body)
fr, err := p(ctx, body)
if err != nil {
logger.Debug("file store failed", "file_name", queries.FileName, "error", err)
logger.Error(nil, "file store failed", "file_name", queries.FileName)
Expand Down Expand Up @@ -240,7 +274,7 @@ func (s *Service) fileUploadHandler(
}

fileMtdt := map[string]string{
manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader), // Content-Type has already been validated.
manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader),
manifest.EntryMetadataFilenameKey: queries.FileName,
}

Expand Down
38 changes: 38 additions & 0 deletions pkg/api/bzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,44 @@ func TestBzzFiles(t *testing.T) {
)
})

t.Run("omit-content-type-uses-sniff", func(t *testing.T) {
fileName := "plain.txt"
var resp api.BzzUploadResponse
jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource+"?name="+fileName, http.StatusCreated,
jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"),
jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr),
jsonhttptest.WithRequestBody(bytes.NewReader(simpleData)),
jsonhttptest.WithUnmarshalJSONResponse(&resp),
)
rootHash := resp.Reference.String()
jsonhttptest.Request(t, client, http.MethodGet, fileDownloadResource(rootHash), http.StatusOK,
jsonhttptest.WithExpectedResponse(simpleData),
jsonhttptest.WithExpectedContentLength(len(simpleData)),
jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, "text/plain; charset=utf-8"),
jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, fmt.Sprintf(`inline; filename="%s"`, fileName)),
)
})

t.Run("image-content-type-preserved", func(t *testing.T) {
ct := "image/png"
fileName := "test.txt"
var resp api.BzzUploadResponse
jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource+"?name="+fileName, http.StatusCreated,
jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"),
jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr),
jsonhttptest.WithRequestHeader(api.ContentTypeHeader, ct),
jsonhttptest.WithRequestBody(bytes.NewReader(simpleData)),
jsonhttptest.WithUnmarshalJSONResponse(&resp),
)
rootHash := resp.Reference.String()
jsonhttptest.Request(t, client, http.MethodGet, fileDownloadResource(rootHash), http.StatusOK,
jsonhttptest.WithExpectedResponse(simpleData),
jsonhttptest.WithExpectedContentLength(len(simpleData)),
jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, ct),
jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, fmt.Sprintf(`inline; filename="%s"`, fileName)),
)
})

t.Run("upload-then-download-and-check-data", func(t *testing.T) {
fileName := "sample.html"
rootHash := "36e6c1bbdfee6ac21485d5f970479fd1df458d36df9ef4e8179708ed46da557f"
Expand Down
3 changes: 1 addition & 2 deletions pkg/api/dirs.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ func (s *Service) dirUploadHandler(
w http.ResponseWriter,
r *http.Request,
putter storer.PutterSession,
contentTypeString string,
encrypt bool,
tag uint64,
rLevel redundancy.Level,
Expand All @@ -58,7 +57,7 @@ func (s *Service) dirUploadHandler(
}

// The error is ignored because the header was already validated by the caller.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is not relevant anymore, right? the header is not validated by the caller. Maybe something like this:

// Parse error is ignored; unsupported media types are caught by the default case below.

, or do we want to validate here?

mediaType, params, _ := mime.ParseMediaType(contentTypeString)
mediaType, params, _ := mime.ParseMediaType(r.Header.Get(ContentTypeHeader))

var dReader dirReader
switch mediaType {
Expand Down
Loading