diff --git a/CHANGELOG.md b/CHANGELOG.md index 8565c5676..bb864a5a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,11 @@ The following emojis are used to highlight certain changes: ### Changed +* `boxo/gateway` + * 🛠 The `IPFSBackend` interface was updated to make the responses of the + `Head` method more explicit. It now returns a `HeadResponse` instead of a + `files.Node`. + ### Removed ### Fixed diff --git a/gateway/blocks_backend.go b/gateway/blocks_backend.go index 208c92062..fc0685d62 100644 --- a/gateway/blocks_backend.go +++ b/gateway/blocks_backend.go @@ -153,11 +153,34 @@ func (bb *BlocksBackend) Get(ctx context.Context, path ImmutablePath, ranges ... return md, nil, err } + // Only a single range is supported in responses to HTTP Range Requests. + // When more than one is passed in the Range header, this library will + // return a response for the first one and ignores remaining ones. + var ra *ByteRange + if len(ranges) > 0 { + ra = &ranges[0] + } + rootCodec := nd.Cid().Prefix().GetCodec() + // This covers both Raw blocks and terminal IPLD codecs like dag-cbor and dag-json // Note: while only cbor, json, dag-cbor, and dag-json are currently supported by gateways this could change + // Note: For the raw codec we return just the relevant range rather than the entire block if rootCodec != uint64(mc.DagPb) { - return md, NewGetResponseFromFile(files.NewBytesFile(nd.RawData())), nil + f := files.NewBytesFile(nd.RawData()) + + fileSize, err := f.Size() + if err != nil { + return ContentPathMetadata{}, nil, err + } + + if rootCodec == uint64(mc.Raw) { + if err := seekToRangeStart(f, ra); err != nil { + return ContentPathMetadata{}, nil, err + } + } + + return md, NewGetResponseFromReader(f, fileSize), nil } // This code path covers full graph, single file/directory, and range requests @@ -179,10 +202,23 @@ func (bb *BlocksBackend) Get(ctx context.Context, path ImmutablePath, ranges ... if sz < 0 { return ContentPathMetadata{}, nil, fmt.Errorf("directory cumulative DAG size cannot be negative") } - return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx)), nil + return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx), nil), nil } if file, ok := f.(files.File); ok { - return md, NewGetResponseFromFile(file), nil + fileSize, err := f.Size() + if err != nil { + return ContentPathMetadata{}, nil, err + } + + if err := seekToRangeStart(file, ra); err != nil { + return ContentPathMetadata{}, nil, err + } + + if s, ok := f.(*files.Symlink); ok { + return md, NewGetResponseFromSymlink(s, fileSize), nil + } + + return md, NewGetResponseFromReader(file, fileSize), nil } return ContentPathMetadata{}, nil, fmt.Errorf("data was not a valid file or directory: %w", ErrInternalServerError) // TODO: should there be a gateway invalid content type to abstract over the various IPLD error types? @@ -211,7 +247,7 @@ func (bb *BlocksBackend) GetBlock(ctx context.Context, path ImmutablePath) (Cont return md, files.NewBytesFile(nd.RawData()), nil } -func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) { +func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { md, nd, err := bb.getNode(ctx, path) if err != nil { return md, nil, err @@ -219,7 +255,7 @@ func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentP rootCodec := nd.Cid().Prefix().GetCodec() if rootCodec != uint64(mc.DagPb) { - return md, files.NewBytesFile(nd.RawData()), nil + return md, NewHeadResponseForFile(files.NewBytesFile(nd.RawData()), int64(len(nd.RawData()))), nil } // TODO: We're not handling non-UnixFS dag-pb. There's a bit of a discrepancy @@ -229,7 +265,24 @@ func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentP return ContentPathMetadata{}, nil, err } - return md, fileNode, nil + sz, err := fileNode.Size() + if err != nil { + return ContentPathMetadata{}, nil, err + } + + if _, ok := fileNode.(files.Directory); ok { + return md, NewHeadResponseForDirectory(sz), nil + } + + if _, ok := fileNode.(*files.Symlink); ok { + return md, NewHeadResponseForSymlink(sz), nil + } + + if f, ok := fileNode.(files.File); ok { + return md, NewHeadResponseForFile(f, sz), nil + } + + return ContentPathMetadata{}, nil, fmt.Errorf("unsupported UnixFS file type") } // emptyRoot is a CAR root with the empty identity CID. CAR files are recommended diff --git a/gateway/gateway.go b/gateway/gateway.go index 19b801bba..089dd236c 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -260,21 +260,74 @@ type ByteRange struct { } type GetResponse struct { - bytes files.File + bytes io.ReadCloser + bytesSize int64 + symlink *files.Symlink directoryMetadata *directoryMetadata } +func (r *GetResponse) Close() error { + if r.bytes != nil { + return r.bytes.Close() + } + if r.symlink != nil { + return r.symlink.Close() + } + if r.directoryMetadata != nil { + if r.directoryMetadata.closeFn == nil { + return nil + } + return r.directoryMetadata.closeFn() + } + // Should be unreachable + return nil +} + +var _ io.Closer = (*GetResponse)(nil) + type directoryMetadata struct { dagSize uint64 entries <-chan unixfs.LinkResult + closeFn func() error +} + +func NewGetResponseFromReader(file io.ReadCloser, fullFileSize int64) *GetResponse { + return &GetResponse{bytes: file, bytesSize: fullFileSize} +} + +func NewGetResponseFromSymlink(symlink *files.Symlink, size int64) *GetResponse { + return &GetResponse{symlink: symlink, bytesSize: size} } -func NewGetResponseFromFile(file files.File) *GetResponse { - return &GetResponse{bytes: file} +func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult, closeFn func() error) *GetResponse { + return &GetResponse{directoryMetadata: &directoryMetadata{dagSize: dagSize, entries: entries, closeFn: closeFn}} } -func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult) *GetResponse { - return &GetResponse{directoryMetadata: &directoryMetadata{dagSize, entries}} +type HeadResponse struct { + bytesSize int64 + startingBytes io.ReadCloser + isFile bool + isSymLink bool + isDir bool +} + +func (r *HeadResponse) Close() error { + if r.startingBytes != nil { + return r.startingBytes.Close() + } + return nil +} + +func NewHeadResponseForFile(startingBytes io.ReadCloser, size int64) *HeadResponse { + return &HeadResponse{startingBytes: startingBytes, isFile: true, bytesSize: size} +} + +func NewHeadResponseForSymlink(symlinkSize int64) *HeadResponse { + return &HeadResponse{isSymLink: true, bytesSize: symlinkSize} +} + +func NewHeadResponseForDirectory(dagSize int64) *HeadResponse { + return &HeadResponse{isDir: true, bytesSize: dagSize} } // IPFSBackend is the required set of functionality used to implement the IPFS @@ -305,6 +358,9 @@ type IPFSBackend interface { // file will still need magic bytes from the very beginning for content // type sniffing). // - A range request for a directory currently holds no semantic meaning. + // - For non-UnixFS (and non-raw data) such as terminal IPLD dag-cbor/json, etc. blocks the returned response + // bytes should be the complete block and returned as an [io.ReadSeekCloser] starting at the beginning of the + // block rather than as an [io.ReadCloser] that starts at the beginning of the range request. // // [HTTP Byte Ranges]: https://httpwg.org/specs/rfc9110.html#rfc.section.14.1.2 Get(context.Context, ImmutablePath, ...ByteRange) (ContentPathMetadata, *GetResponse, error) @@ -316,12 +372,16 @@ type IPFSBackend interface { // GetBlock returns a single block of data GetBlock(context.Context, ImmutablePath) (ContentPathMetadata, files.File, error) - // Head returns a file or directory depending on what the path is that has been requested. - // For UnixFS files should return a file which has the correct file size and either returns the ContentType in ContentPathMetadata or - // enough data (e.g. 3kiB) such that the content type can be determined by sniffing. - // For all other data types returning just size information is sufficient - // TODO: give function more explicit return types - Head(context.Context, ImmutablePath) (ContentPathMetadata, files.Node, error) + // Head returns a [HeadResponse] depending on what the path is that has been requested. + // For UnixFS files (and raw blocks) should return the size of the file and either set the ContentType in + // ContentPathMetadata or send back a reader from the beginning of the file with enough data (e.g. 3kiB) such that + // the content type can be determined by sniffing. + // + // For UnixFS directories and symlinks only setting the size and type are necessary. + // + // For all other data types (e.g. (DAG-)CBOR/JSON blocks) returning the size information as a file while setting + // the content-type is sufficient. + Head(context.Context, ImmutablePath) (ContentPathMetadata, *HeadResponse, error) // ResolvePath resolves the path using UnixFS resolver. If the path does not // exist due to a missing link, it should return an error of type: diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 98996acb3..d041cad55 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -719,7 +719,7 @@ func (mb *errorMockBackend) GetBlock(ctx context.Context, path ImmutablePath) (C return ContentPathMetadata{}, nil, mb.err } -func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) { +func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { return ContentPathMetadata{}, nil, mb.err } @@ -803,7 +803,7 @@ func (mb *panicMockBackend) GetBlock(ctx context.Context, immutablePath Immutabl panic("i am panicking") } -func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, files.Node, error) { +func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { panic("i am panicking") } diff --git a/gateway/handler.go b/gateway/handler.go index ecf505617..af20e2b6e 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -92,13 +92,19 @@ func NewHandler(c Config, backend IPFSBackend) http.Handler { return newHandlerWithMetrics(&c, backend) } -// serveContent replies to the request using the content in the provided ReadSeeker +// serveContent replies to the request using the content in the provided Reader // and returns the status code written and any error encountered during a write. -// It wraps http.serveContent which takes care of If-None-Match+Etag, +// It wraps httpServeContent (a close clone of http.ServeContent) which takes care of If-None-Match+Etag, // Content-Length and range requests. -func serveContent(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, content io.ReadSeeker) (int, bool, error) { +// +// Notes: +// 1. For HEAD requests the io.Reader may be nil/undefined +// 2. When the io.Reader is needed it must start at the beginning of the first Range Request component if it exists +// 3. Only a single HTTP Range Request is supported, if more than one are requested only the first will be honored +// 4. The Content-Type header must already be set +func serveContent(w http.ResponseWriter, req *http.Request, modtime time.Time, size int64, content io.Reader) (int, bool, error) { ew := &errRecordingResponseWriter{ResponseWriter: w} - http.ServeContent(ew, req, name, modtime, content) + httpServeContent(ew, req, modtime, size, content) // When we calculate some metrics we want a flag that lets us to ignore // errors and 304 Not Modified, and only care when requested data @@ -554,40 +560,6 @@ func etagMatch(ifNoneMatchHeader string, etagsToCheck ...string) bool { return false } -// scanETag determines if a syntactically valid ETag is present at s. If so, -// the ETag and remaining text after consuming ETag is returned. Otherwise, -// it returns "", "". -// (This is the same logic as one executed inside of http.ServeContent) -func scanETag(s string) (etag string, remain string) { - s = textproto.TrimString(s) - start := 0 - if strings.HasPrefix(s, "W/") { - start = 2 - } - if len(s[start:]) < 2 || s[start] != '"' { - return "", "" - } - // ETag is either W/"text" or "text". - // See RFC 7232 2.3. - for i := start + 1; i < len(s); i++ { - c := s[i] - switch { - // Character values allowed in ETags. - case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80: - case c == '"': - return s[:i+1], s[i+1:] - default: - return "", "" - } - } - return "", "" -} - -// etagWeakMatch reports whether a and b match using weak ETag comparison. -func etagWeakMatch(a, b string) bool { - return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/") -} - // getEtag generates an ETag value based on an HTTP Request, a CID and a response // format. This function DOES NOT generate ETags for CARs or IPNS Records. func getEtag(r *http.Request, cid cid.Cid, responseFormat string) string { @@ -776,6 +748,13 @@ func (i *handler) handleWebRequestErrors(w http.ResponseWriter, r *http.Request, return ImmutablePath{}, false } + // If the error is not an IPLD traversal error then we should not be looking for _redirects or legacy 404s + if !isErrNotFound(err) { + err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err) + i.webError(w, r, err, http.StatusInternalServerError) + return ImmutablePath{}, false + } + // If we have origin isolation (subdomain gw, DNSLink website), // and response type is UnixFS (default for website hosting) // we can leverage the presence of an _redirects file and apply rules defined there. diff --git a/gateway/handler_block.go b/gateway/handler_block.go index dbff9a7ad..9d2a93b38 100644 --- a/gateway/handler_block.go +++ b/gateway/handler_block.go @@ -38,9 +38,19 @@ func (i *handler) serveRawBlock(ctx context.Context, w http.ResponseWriter, r *h w.Header().Set("Content-Type", rawResponseFormat) w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + sz, err := data.Size() + if err != nil { + i.handleRequestErrors(w, r, rq.contentPath, err) + return false + } + + if !i.seekToStartOfFirstRange(w, r, data) { + return false + } + // ServeContent will take care of // If-None-Match+Etag, Content-Length and range requests - _, dataSent, _ := serveContent(w, r, name, modtime, data) + _, dataSent, _ := serveContent(w, r, modtime, sz, data) if dataSent { // Update metrics diff --git a/gateway/handler_codec.go b/gateway/handler_codec.go index 007a52fda..97dfaad0a 100644 --- a/gateway/handler_codec.go +++ b/gateway/handler_codec.go @@ -69,10 +69,16 @@ func (i *handler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http defer data.Close() setIpfsRootsHeader(w, rq, &pathMetadata) - return i.renderCodec(ctx, w, r, rq, data) + + blockSize, err := data.Size() + if !i.handleRequestErrors(w, r, rq.contentPath, err) { + return false + } + + return i.renderCodec(ctx, w, r, rq, blockSize, data) } -func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, rq *requestData, blockData io.ReadSeekCloser) bool { +func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, rq *requestData, blockSize int64, blockData io.ReadSeekCloser) bool { resolvedPath := rq.pathMetadata.LastSegment ctx, span := spanTrace(ctx, "Handler.RenderCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", rq.responseFormat))) defer span.End() @@ -105,7 +111,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt // Set HTTP headers (for caching, etc). Etag will be replaced if handled by serveCodecHTML. modtime := addCacheControlHeaders(w, r, rq.contentPath, resolvedPath.Cid(), responseContentType) - name := setCodecContentDisposition(w, r, resolvedPath, responseContentType) + _ = setCodecContentDisposition(w, r, resolvedPath, responseContentType) w.Header().Set("Content-Type", responseContentType) w.Header().Set("X-Content-Type-Options", "nosniff") @@ -121,7 +127,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt } else { // This covers CIDs with codec 'json' and 'cbor' as those do not have // an explicit requested content type. - return i.serveCodecRaw(ctx, w, r, blockData, rq.contentPath, name, modtime, rq.begin) + return i.serveCodecRaw(ctx, w, r, blockSize, blockData, rq.contentPath, modtime, rq.begin) } } @@ -131,7 +137,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt if ok { for _, skipCodec := range skipCodecs { if skipCodec == cidCodec { - return i.serveCodecRaw(ctx, w, r, blockData, rq.contentPath, name, modtime, rq.begin) + return i.serveCodecRaw(ctx, w, r, blockSize, blockData, rq.contentPath, modtime, rq.begin) } } } @@ -149,7 +155,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt return i.serveCodecConverted(ctx, w, r, blockCid, blockData, rq.contentPath, toCodec, modtime, rq.begin) } -func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.ReadSeekCloser, resolvedPath ipath.Resolved, contentPath ipath.Path) bool { +func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.Reader, resolvedPath ipath.Resolved, contentPath ipath.Path) bool { // WithHostname may have constructed an IPFS (or IPNS) path using the Host header. // In this case, we need the original path for constructing the redirect. requestURI, err := url.ParseRequestURI(r.RequestURI) @@ -207,7 +213,7 @@ func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r * // parseNode does a best effort attempt to parse this request's block such that // a preview can be displayed in the gateway. If something fails along the way, // returns nil, therefore not displaying the preview. -func parseNode(blockCid cid.Cid, blockData io.ReadSeekCloser) *assets.ParsedNode { +func parseNode(blockCid cid.Cid, blockData io.Reader) *assets.ParsedNode { codec := blockCid.Prefix().Codec decoder, err := multicodec.LookupDecoder(codec) if err != nil { @@ -229,10 +235,14 @@ func parseNode(blockCid cid.Cid, blockData io.ReadSeekCloser) *assets.ParsedNode } // serveCodecRaw returns the raw block without any conversion -func (i *handler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, blockData io.ReadSeekCloser, contentPath ipath.Path, name string, modtime, begin time.Time) bool { +func (i *handler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, blockSize int64, blockData io.ReadSeekCloser, contentPath ipath.Path, modtime, begin time.Time) bool { // ServeContent will take care of - // If-None-Match+Etag, Content-Length and range requests - _, dataSent, _ := serveContent(w, r, name, modtime, blockData) + // If-None-Match+Etag, Content-Length and setting range request headers after we've already seeked to the start of + // the first range + if !i.seekToStartOfFirstRange(w, r, blockData) { + return false + } + _, dataSent, _ := serveContent(w, r, modtime, blockSize, blockData) if dataSent { // Update metrics @@ -243,7 +253,7 @@ func (i *handler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *h } // serveCodecConverted returns payload converted to codec specified in toCodec -func (i *handler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.ReadSeekCloser, contentPath ipath.Path, toCodec mc.Code, modtime, begin time.Time) bool { +func (i *handler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.ReadCloser, contentPath ipath.Path, toCodec mc.Code, modtime, begin time.Time) bool { codec := blockCid.Prefix().Codec decoder, err := multicodec.LookupDecoder(codec) if err != nil { diff --git a/gateway/handler_defaults.go b/gateway/handler_defaults.go index de31c1fc1..22c397ade 100644 --- a/gateway/handler_defaults.go +++ b/gateway/handler_defaults.go @@ -4,12 +4,12 @@ import ( "context" "errors" "fmt" + "io" "net/http" "net/textproto" "strconv" "strings" - "github.com/ipfs/boxo/files" mc "github.com/multiformats/go-multicodec" "go.opentelemetry.io/otel/attribute" @@ -21,25 +21,23 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h defer span.End() var ( - pathMetadata ContentPathMetadata - bytesResponse files.File - isDirectoryHeadRequest bool - directoryMetadata *directoryMetadata - err error - ranges []ByteRange + pathMetadata ContentPathMetadata + err error + ranges []ByteRange + headResp *HeadResponse + getResp *GetResponse ) switch r.Method { case http.MethodHead: - var data files.Node - pathMetadata, data, err = i.backend.Head(ctx, rq.mostlyResolvedPath()) + pathMetadata, headResp, err = i.backend.Head(ctx, rq.mostlyResolvedPath()) if err != nil { if isWebRequest(rq.responseFormat) { forwardedPath, continueProcessing := i.handleWebRequestErrors(w, r, rq.mostlyResolvedPath(), rq.immutablePath, rq.contentPath, err, rq.logger) if !continueProcessing { return false } - pathMetadata, data, err = i.backend.Head(ctx, forwardedPath) + pathMetadata, headResp, err = i.backend.Head(ctx, forwardedPath) if err != nil { err = fmt.Errorf("failed to resolve %s: %w", debugStr(rq.contentPath.String()), err) i.webError(w, r, err, http.StatusInternalServerError) @@ -51,30 +49,21 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h } } } - defer data.Close() - if _, ok := data.(files.Directory); ok { - isDirectoryHeadRequest = true - } else if f, ok := data.(files.File); ok { - bytesResponse = f - } else { - i.webError(w, r, fmt.Errorf("unsupported response type"), http.StatusInternalServerError) - return false - } + defer headResp.Close() case http.MethodGet: rangeHeader := r.Header.Get("Range") if rangeHeader != "" { // TODO: Add tests for range parsing - ranges, err = parseRange(rangeHeader) + ranges, err = parseRangeWithoutLength(rangeHeader) if err != nil { i.webError(w, r, fmt.Errorf("invalid range request: %w", err), http.StatusBadRequest) return false } } - var getResp *GetResponse // TODO: passing only resolved path here, instead of contentPath is // harming content routing. Knowing original immutableContentPath will - // allow backend to find providers for parents, even when internal + // allow backend to find providers for parents, even when internal // CIDs are not announced, and will provide better key for caching // related DAGs. pathMetadata, getResp, err = i.backend.Get(ctx, rq.mostlyResolvedPath(), ranges...) @@ -96,13 +85,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h } } } - if getResp.bytes != nil { - bytesResponse = getResp.bytes - defer bytesResponse.Close() - } else { - directoryMetadata = getResp.directoryMetadata - } - + defer getResp.Close() default: // This shouldn't be possible to reach which is why it is a 500 rather than 4XX error i.webError(w, r, fmt.Errorf("invalid method: cannot use this HTTP method with the given request"), http.StatusInternalServerError) @@ -114,27 +97,58 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h resolvedPath := pathMetadata.LastSegment switch mc.Code(resolvedPath.Cid().Prefix().Codec) { case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor: - if bytesResponse == nil { // This should never happen - i.webError(w, r, fmt.Errorf("decoding error: data not usable as a file"), http.StatusInternalServerError) - return false - } rq.logger.Debugw("serving codec", "path", rq.contentPath) - return i.renderCodec(r.Context(), w, r, rq, bytesResponse) + var blockSize int64 + var dataToRender io.ReadSeekCloser + if headResp != nil { + blockSize = headResp.bytesSize + dataToRender = nil + } else { + blockSize = getResp.bytesSize + dataAsReadSeekCloser, ok := getResp.bytes.(io.ReadSeekCloser) + if !ok { + i.webError(w, r, fmt.Errorf("expected returned non-UnixFS data to be seekable"), http.StatusInternalServerError) + } + dataToRender = dataAsReadSeekCloser + } + + return i.renderCodec(r.Context(), w, r, rq, blockSize, dataToRender) default: rq.logger.Debugw("serving unixfs", "path", rq.contentPath) ctx, span := spanTrace(ctx, "Handler.ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() - // Handling Unixfs file - if bytesResponse != nil { - rq.logger.Debugw("serving unixfs file", "path", rq.contentPath) - return i.serveFile(ctx, w, r, resolvedPath, rq.contentPath, bytesResponse, pathMetadata.ContentType, rq.begin) - } - - // Handling Unixfs directory - if directoryMetadata != nil || isDirectoryHeadRequest { - rq.logger.Debugw("serving unixfs directory", "path", rq.contentPath) - return i.serveDirectory(ctx, w, r, resolvedPath, rq.contentPath, isDirectoryHeadRequest, directoryMetadata, ranges, rq.begin, rq.logger) + // Handle UnixFS HEAD requests + if headResp != nil { + if headResp.isFile { + rq.logger.Debugw("serving unixfs file", "path", rq.contentPath) + return i.serveFile(ctx, w, r, resolvedPath, rq.contentPath, headResp.bytesSize, headResp.startingBytes, false, true, pathMetadata.ContentType, rq.begin) + } else if headResp.isSymLink { + rq.logger.Debugw("serving unixfs file", "path", rq.contentPath) + return i.serveFile(ctx, w, r, resolvedPath, rq.contentPath, headResp.bytesSize, nil, true, true, pathMetadata.ContentType, rq.begin) + } else if headResp.isDir { + rq.logger.Debugw("serving unixfs directory", "path", rq.contentPath) + return i.serveDirectory(ctx, w, r, resolvedPath, rq.contentPath, true, nil, ranges, rq.begin, rq.logger) + } + } else { + if getResp.bytes != nil { + rq.logger.Debugw("serving unixfs file", "path", rq.contentPath) + rangeRequestStartsAtZero := true + if len(ranges) > 0 { + ra := ranges[0] + if ra.From != 0 { + rangeRequestStartsAtZero = false + } + } + return i.serveFile(ctx, w, r, resolvedPath, rq.contentPath, getResp.bytesSize, getResp.bytes, false, rangeRequestStartsAtZero, pathMetadata.ContentType, rq.begin) + } else if getResp.symlink != nil { + rq.logger.Debugw("serving unixfs file", "path", rq.contentPath) + // Note: this ignores range requests against symlinks + return i.serveFile(ctx, w, r, resolvedPath, rq.contentPath, getResp.bytesSize, getResp.symlink, true, true, pathMetadata.ContentType, rq.begin) + } else if getResp.directoryMetadata != nil { + rq.logger.Debugw("serving unixfs directory", "path", rq.contentPath) + return i.serveDirectory(ctx, w, r, resolvedPath, rq.contentPath, false, getResp.directoryMetadata, ranges, rq.begin, rq.logger) + } } i.webError(w, r, fmt.Errorf("unsupported UnixFS type"), http.StatusInternalServerError) @@ -142,8 +156,8 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h } } -// parseRange parses a Range header string as per RFC 7233. -func parseRange(s string) ([]ByteRange, error) { +// parseRangeWithoutLength parses a Range header string as per RFC 7233. +func parseRangeWithoutLength(s string) ([]ByteRange, error) { if s == "" { return nil, nil // header not present } diff --git a/gateway/handler_unixfs__redirects.go b/gateway/handler_unixfs__redirects.go index a96b87d36..978f55647 100644 --- a/gateway/handler_unixfs__redirects.go +++ b/gateway/handler_unixfs__redirects.go @@ -11,8 +11,6 @@ import ( "go.uber.org/zap" ipath "github.com/ipfs/boxo/coreiface/path" - "github.com/ipfs/boxo/files" - redirects "github.com/ipfs/go-ipfs-redirects-file" ) @@ -160,12 +158,12 @@ func (i *handler) getRedirectRules(r *http.Request, redirectsPath ImmutablePath) } return false, nil, err } + defer redirectsFileGetResp.Close() if redirectsFileGetResp.bytes == nil { return false, nil, fmt.Errorf(" _redirects is not a file") } f := redirectsFileGetResp.bytes - defer f.Close() // Parse redirect rules from file redirectRules, err := redirects.Parse(f) @@ -186,19 +184,16 @@ func (i *handler) serve4xx(w http.ResponseWriter, r *http.Request, content4xxPat if err != nil { return err } + defer getresp.Close() if getresp.bytes == nil { return fmt.Errorf("could not convert node for %d page to file", status) } content4xxFile := getresp.bytes - defer content4xxFile.Close() content4xxCid := pathMetadata.LastSegment.Cid() - size, err := content4xxFile.Size() - if err != nil { - return fmt.Errorf("could not get size of %d page", status) - } + size := getresp.bytesSize logger.Debugf("using _redirects: custom %d file at %q", status, content4xxPath) w.Header().Set("Content-Type", "text/html") @@ -224,29 +219,24 @@ func hasOriginIsolation(r *http.Request) bool { // This is provided only for backward-compatibility, until websites migrate // to 404s managed via _redirects file (https://github.com/ipfs/specs/pull/290) func (i *handler) serveLegacy404IfPresent(w http.ResponseWriter, r *http.Request, imPath ImmutablePath, logger *zap.SugaredLogger) bool { - resolved404File, ctype, err := i.searchUpTreeFor404(r, imPath) + resolved404File, resolved404FileSize, ctype, err := i.searchUpTreeFor404(r, imPath) if err != nil { return false } defer resolved404File.Close() - size, err := resolved404File.Size() - if err != nil { - return false - } - logger.Debugw("using pretty 404 file", "path", imPath) w.Header().Set("Content-Type", ctype) - w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + w.Header().Set("Content-Length", strconv.FormatInt(resolved404FileSize, 10)) w.WriteHeader(http.StatusNotFound) - _, err = io.CopyN(w, resolved404File, size) + _, err = io.CopyN(w, resolved404File, resolved404FileSize) return err == nil } -func (i *handler) searchUpTreeFor404(r *http.Request, imPath ImmutablePath) (files.File, string, error) { +func (i *handler) searchUpTreeFor404(r *http.Request, imPath ImmutablePath) (io.ReadCloser, int64, string, error) { filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) if err != nil { - return nil, "", err + return nil, 0, "", err } pathComponents := strings.Split(imPath.String(), "/") @@ -267,12 +257,14 @@ func (i *handler) searchUpTreeFor404(r *http.Request, imPath ImmutablePath) (fil continue } if getResp.bytes == nil { - return nil, "", fmt.Errorf("found a pretty 404 but it was not a file") + // Close the response here if not returning bytes, otherwise it's the caller's responsibility to close the io.ReadCloser + getResp.Close() + return nil, 0, "", fmt.Errorf("found a pretty 404 but it was not a file") } - return getResp.bytes, ctype, nil + return getResp.bytes, getResp.bytesSize, ctype, nil } - return nil, "", fmt.Errorf("no pretty 404 in any parent folder") + return nil, 0, "", fmt.Errorf("no pretty 404 in any parent folder") } func preferred404Filename(acceptHeaders []string) (string, string, error) { diff --git a/gateway/handler_unixfs_dir.go b/gateway/handler_unixfs_dir.go index 2808cfdc4..1ece9c96d 100644 --- a/gateway/handler_unixfs_dir.go +++ b/gateway/handler_unixfs_dir.go @@ -3,6 +3,7 @@ package gateway import ( "context" "fmt" + "io" "net/http" "net/url" gopath "path" @@ -59,42 +60,60 @@ func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r * } // Check if directory has index.html, if so, serveFile - idxPath := ipath.Join(contentPath, "index.html") - imIndexPath, err := NewImmutablePath(ipath.Join(resolvedPath, "index.html")) + appendIndexHtml := func(p ipath.Path) ipath.Path { + basePath := p.String() + if basePath[len(basePath)-1] != '/' { + basePath += "/" + } + return ipath.New(basePath + "index.html") + } + + idxPath := appendIndexHtml(contentPath) + imIndexPath, err := NewImmutablePath(appendIndexHtml(resolvedPath)) if err != nil { i.webError(w, r, err, http.StatusInternalServerError) return false } // TODO: could/should this all be skipped to have HEAD requests just return html content type and save the complexity? If so can we skip the above code as well? - var idxFile files.File + var idxFileBytes io.ReadCloser + var idxFileSize int64 + var returnRangeStartsAtZero bool if isHeadRequest { - var idx files.Node - _, idx, err = i.backend.Head(ctx, imIndexPath) + var idxHeadResp *HeadResponse + _, idxHeadResp, err = i.backend.Head(ctx, imIndexPath) if err == nil { - f, ok := idx.(files.File) - if !ok { + defer idxHeadResp.Close() + if !idxHeadResp.isFile { i.webError(w, r, fmt.Errorf("%q could not be read: %w", imIndexPath, files.ErrNotReader), http.StatusUnprocessableEntity) return false } - idxFile = f + returnRangeStartsAtZero = true + idxFileBytes = idxHeadResp.startingBytes + idxFileSize = idxHeadResp.bytesSize } } else { - var getResp *GetResponse - _, getResp, err = i.backend.Get(ctx, imIndexPath, ranges...) + var idxGetResp *GetResponse + _, idxGetResp, err = i.backend.Get(ctx, imIndexPath, ranges...) if err == nil { - if getResp.bytes == nil { + defer idxGetResp.Close() + if idxGetResp.bytes == nil { i.webError(w, r, fmt.Errorf("%q could not be read: %w", imIndexPath, files.ErrNotReader), http.StatusUnprocessableEntity) return false } - idxFile = getResp.bytes + if len(ranges) > 0 { + ra := ranges[0] + returnRangeStartsAtZero = ra.From == 0 + } + idxFileBytes = idxGetResp.bytes + idxFileSize = idxGetResp.bytesSize } } if err == nil { logger.Debugw("serving index.html file", "path", idxPath) // write to request - success := i.serveFile(ctx, w, r, resolvedPath, idxPath, idxFile, "text/html", begin) + success := i.serveFile(ctx, w, r, resolvedPath, idxPath, idxFileSize, idxFileBytes, false, returnRangeStartsAtZero, "text/html", begin) if success { i.unixfsDirIndexGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) } diff --git a/gateway/handler_unixfs_file.go b/gateway/handler_unixfs_file.go index cd924e5aa..8375bfcd2 100644 --- a/gateway/handler_unixfs_file.go +++ b/gateway/handler_unixfs_file.go @@ -1,6 +1,7 @@ package gateway import ( + "bytes" "context" "fmt" "io" @@ -12,14 +13,13 @@ import ( "github.com/gabriel-vasile/mimetype" ipath "github.com/ipfs/boxo/coreiface/path" - "github.com/ipfs/boxo/files" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) // serveFile returns data behind a file along with HTTP headers based on // the file itself, its CID and the contentPath used for accessing it. -func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, file files.File, fileContentType string, begin time.Time) bool { +func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, fileSize int64, fileBytes io.ReadCloser, isSymlink bool, returnRangeStartsAtZero bool, fileContentType string, begin time.Time) bool { _, span := spanTrace(ctx, "Handler.ServeFile", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() @@ -29,14 +29,7 @@ func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http. // Set Content-Disposition name := addContentDispositionHeader(w, r, contentPath) - // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) - size, err := file.Size() - if err != nil { - http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) - return false - } - - if size == 0 { + if fileSize == 0 { // We override null files to 200 to avoid issues with fragment caching reverse proxies. // Also whatever you are asking for, it's cheaper to just give you the complete file (nothing). // TODO: remove this if clause once https://github.com/golang/go/issues/54794 is fixed in two latest releases of go @@ -45,16 +38,11 @@ func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http. return true } - // Lazy seeker enables efficient range-requests and HTTP HEAD responses - content := &lazySeeker{ - size: size, - reader: file, - } - + var content io.Reader = fileBytes // Calculate deterministic value for Content-Type HTTP header // (we prefer to do it here, rather than using implicit sniffing in http.ServeContent) var ctype string - if _, isSymlink := file.(*files.Symlink); isSymlink { + if isSymlink { // We should be smarter about resolving symlinks but this is the // "most correct" we can be without doing that. ctype = "inode/symlink" @@ -63,21 +51,24 @@ func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http. if ctype == "" { ctype = fileContentType } - if ctype == "" { + if ctype == "" && returnRangeStartsAtZero { // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. // Fixes https://github.com/ipfs/kubo/issues/7252 - mimeType, err := mimetype.DetectReader(content) + + // We read from a TeeReader into a buffer and then put the buffer in front of the original reader to + // simulate the behavior of being able to read from the start and then seek back to the beginning while + // only having a Reader and not a ReadSeeker + var buf bytes.Buffer + tr := io.TeeReader(fileBytes, &buf) + + mimeType, err := mimetype.DetectReader(tr) if err != nil { http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) return false } ctype = mimeType.String() - _, err = content.Seek(0, io.SeekStart) - if err != nil { - http.Error(w, "seeker can't seek", http.StatusInternalServerError) - return false - } + content = io.MultiReader(&buf, fileBytes) } // Strip the encoding from the HTML Content-Type header and let the // browser figure it out. @@ -93,7 +84,7 @@ func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http. // ServeContent will take care of // If-None-Match+Etag, Content-Length and range requests - _, dataSent, _ := serveContent(w, r, name, modtime, content) + _, dataSent, _ := serveContent(w, r, modtime, fileSize, content) // Was response successful? if dataSent { diff --git a/gateway/metrics.go b/gateway/metrics.go index 69e81425f..6035c74b5 100644 --- a/gateway/metrics.go +++ b/gateway/metrics.go @@ -96,7 +96,7 @@ func (b *ipfsBackendWithMetrics) GetBlock(ctx context.Context, path ImmutablePat return md, n, err } -func (b *ipfsBackendWithMetrics) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) { +func (b *ipfsBackendWithMetrics) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { begin := time.Now() name := "IPFSBackend.Head" ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) diff --git a/gateway/serve_http_content.go b/gateway/serve_http_content.go new file mode 100644 index 000000000..2bb27ae04 --- /dev/null +++ b/gateway/serve_http_content.go @@ -0,0 +1,472 @@ +package gateway + +import ( + "errors" + "fmt" + "io" + "net/http" + "net/textproto" + "strconv" + "strings" + "time" +) + +// errNoOverlap is returned by serveContent's parseRange if first-byte-pos of +// all of the byte-range-spec values is greater than the content size. +var errNoOverlap = errors.New("invalid range: failed to overlap") + +func headerGetExact(h http.Header, key string) string { + if v := h[key]; len(v) > 0 { + return v[0] + } + return "" +} + +// httpServeContent replies to the request using the content in the +// provided Reader. +// +// The main benefit of httpServeContent over io.Copy is that it handles Range requests properly, +// handles If-Match, If-Unmodified-Since, If-None-Match, If-Modified-Since, and If-Range requests. +// +// If modtime is not the zero time or Unix epoch, ServeContent +// includes it in a Last-Modified header in the response. If the +// request includes an If-Modified-Since header, ServeContent uses +// modtime to decide whether the content needs to be sent at all. +// +// If the caller has set w's ETag header formatted per RFC 7232, section 2.3, +// ServeContent uses it to handle requests using If-Match, If-None-Match, or If-Range. +// +// Notable differences from http.ServeContent +// 1. Takes an io.Reader instead of an io.ReaderSeeker +// 2. Requires the size to be passed in explicitly instead of discovered via Seeker behavior +// 3. Only handles a single HTTP Range, if multiple are requested it returns the first +// 4. The passed io.Reader must start at wherever the HTTP Range Request will start +// 4. Requires the Content-Type header to already be set +// 5. Does not require the name to be passed in for content sniffing +// 6. content may be nil for HEAD requests +func httpServeContent(w http.ResponseWriter, r *http.Request, modtime time.Time, size int64, content io.Reader) { + setLastModified(w, modtime) + done, rangeReq := checkPreconditions(w, r, modtime) + if done { + return + } + + code := http.StatusOK + + // handle Content-Range header. + sendSize := size + if size >= 0 { + ranges, err := parseRange(rangeReq, size) + if err != nil { + if err == errNoOverlap { + w.Header().Set("Content-Range", fmt.Sprintf("bytes */%d", size)) + } + http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) + return + } + if sumRangesSize(ranges) > size { + // The total number of bytes in all the ranges + // is larger than the size of the file by + // itself, so this is probably an attack, or a + // dumb client. Ignore the range request. + ranges = nil + } + + // We only support a single range request, if more than one is submitted we just send back the first + if len(ranges) > 0 { + ra := ranges[0] + // RFC 7233, Section 4.1: + // "If a single part is being transferred, the server + // generating the 206 response MUST generate a + // Content-Range header field, describing what range + // of the selected representation is enclosed, and a + // payload consisting of the range. + // ... + // A server MUST NOT generate a multipart response to + // a request for a single range, since a client that + // does not request multiple parts might not support + // multipart responses." + + sendSize = ra.length + code = http.StatusPartialContent + w.Header().Set("Content-Range", ra.contentRange(size)) + } + + w.Header().Set("Accept-Ranges", "bytes") + if w.Header().Get("Content-Encoding") == "" { + w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) + } + } + + w.WriteHeader(code) + + if r.Method != "HEAD" { + var sendContent io.Reader = content + io.CopyN(w, sendContent, sendSize) + } +} + +// scanETag determines if a syntactically valid ETag is present at s. If so, +// the ETag and remaining text after consuming ETag is returned. Otherwise, +// it returns "", "". +func scanETag(s string) (etag string, remain string) { + s = textproto.TrimString(s) + start := 0 + if strings.HasPrefix(s, "W/") { + start = 2 + } + if len(s[start:]) < 2 || s[start] != '"' { + return "", "" + } + // ETag is either W/"text" or "text". + // See RFC 7232 2.3. + for i := start + 1; i < len(s); i++ { + c := s[i] + switch { + // Character values allowed in ETags. + case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80: + case c == '"': + return s[:i+1], s[i+1:] + default: + return "", "" + } + } + return "", "" +} + +// etagStrongMatch reports whether a and b match using strong ETag comparison. +// Assumes a and b are valid ETags. +func etagStrongMatch(a, b string) bool { + return a == b && a != "" && a[0] == '"' +} + +// etagWeakMatch reports whether a and b match using weak ETag comparison. +// Assumes a and b are valid ETags. +func etagWeakMatch(a, b string) bool { + return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/") +} + +// condResult is the result of an HTTP request precondition check. +// See https://tools.ietf.org/html/rfc7232 section 3. +type condResult int + +const ( + condNone condResult = iota + condTrue + condFalse +) + +func checkIfMatch(w http.ResponseWriter, r *http.Request) condResult { + im := r.Header.Get("If-Match") + if im == "" { + return condNone + } + for { + im = textproto.TrimString(im) + if len(im) == 0 { + break + } + if im[0] == ',' { + im = im[1:] + continue + } + if im[0] == '*' { + return condTrue + } + etag, remain := scanETag(im) + if etag == "" { + break + } + if etagStrongMatch(etag, headerGetExact(w.Header(), "Etag")) { + return condTrue + } + im = remain + } + + return condFalse +} + +func checkIfUnmodifiedSince(r *http.Request, modtime time.Time) condResult { + ius := r.Header.Get("If-Unmodified-Since") + if ius == "" || isZeroTime(modtime) { + return condNone + } + t, err := http.ParseTime(ius) + if err != nil { + return condNone + } + + // The Last-Modified header truncates sub-second precision so + // the modtime needs to be truncated too. + modtime = modtime.Truncate(time.Second) + if modtime.Before(t) || modtime.Equal(t) { + return condTrue + } + return condFalse +} + +func checkIfNoneMatch(w http.ResponseWriter, r *http.Request) condResult { + inm := headerGetExact(r.Header, "If-None-Match") + if inm == "" { + return condNone + } + buf := inm + for { + buf = textproto.TrimString(buf) + if len(buf) == 0 { + break + } + if buf[0] == ',' { + buf = buf[1:] + continue + } + if buf[0] == '*' { + return condFalse + } + etag, remain := scanETag(buf) + if etag == "" { + break + } + if etagWeakMatch(etag, headerGetExact(w.Header(), "Etag")) { + return condFalse + } + buf = remain + } + return condTrue +} + +func checkIfModifiedSince(r *http.Request, modtime time.Time) condResult { + if r.Method != "GET" && r.Method != "HEAD" { + return condNone + } + ims := r.Header.Get("If-Modified-Since") + if ims == "" || isZeroTime(modtime) { + return condNone + } + t, err := http.ParseTime(ims) + if err != nil { + return condNone + } + // The Last-Modified header truncates sub-second precision so + // the modtime needs to be truncated too. + modtime = modtime.Truncate(time.Second) + if modtime.Before(t) || modtime.Equal(t) { + return condFalse + } + return condTrue +} + +func checkIfRange(w http.ResponseWriter, r *http.Request, modtime time.Time) condResult { + if r.Method != "GET" && r.Method != "HEAD" { + return condNone + } + ir := headerGetExact(r.Header, "If-Range") + if ir == "" { + return condNone + } + etag, _ := scanETag(ir) + if etag != "" { + if etagStrongMatch(etag, w.Header().Get("Etag")) { + return condTrue + } else { + return condFalse + } + } + // The If-Range value is typically the ETag value, but it may also be + // the modtime date. See golang.org/issue/8367. + if modtime.IsZero() { + return condFalse + } + t, err := http.ParseTime(ir) + if err != nil { + return condFalse + } + if t.Unix() == modtime.Unix() { + return condTrue + } + return condFalse +} + +// isZeroTime reports whether t is obviously unspecified (either zero or Unix()=0). +func isZeroTime(t time.Time) bool { + return t.IsZero() || t.Equal(unixEpochTime) +} + +func setLastModified(w http.ResponseWriter, modtime time.Time) { + if !isZeroTime(modtime) { + w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat)) + } +} + +func writeNotModified(w http.ResponseWriter) { + // RFC 7232 section 4.1: + // a sender SHOULD NOT generate representation metadata other than the + // above listed fields unless said metadata exists for the purpose of + // guiding cache updates (e.g., Last-Modified might be useful if the + // response does not have an ETag field). + h := w.Header() + delete(h, "Content-Type") + delete(h, "Content-Length") + delete(h, "Content-Encoding") + if h.Get("Etag") != "" { + delete(h, "Last-Modified") + } + w.WriteHeader(http.StatusNotModified) +} + +// checkPreconditions evaluates request preconditions and reports whether a precondition +// resulted in sending StatusNotModified or StatusPreconditionFailed. +func checkPreconditions(w http.ResponseWriter, r *http.Request, modtime time.Time) (done bool, rangeHeader string) { + // This function carefully follows RFC 7232 section 6. + ch := checkIfMatch(w, r) + if ch == condNone { + ch = checkIfUnmodifiedSince(r, modtime) + } + if ch == condFalse { + w.WriteHeader(http.StatusPreconditionFailed) + return true, "" + } + switch checkIfNoneMatch(w, r) { + case condFalse: + if r.Method == "GET" || r.Method == "HEAD" { + writeNotModified(w) + return true, "" + } else { + w.WriteHeader(http.StatusPreconditionFailed) + return true, "" + } + case condNone: + if checkIfModifiedSince(r, modtime) == condFalse { + writeNotModified(w) + return true, "" + } + } + + rangeHeader = headerGetExact(r.Header, "Range") + if rangeHeader != "" && checkIfRange(w, r, modtime) == condFalse { + rangeHeader = "" + } + return false, rangeHeader +} + +// httpRange specifies the byte range to be sent to the client. +type httpRange struct { + start, length int64 +} + +func (r httpRange) contentRange(size int64) string { + return fmt.Sprintf("bytes %d-%d/%d", r.start, r.start+r.length-1, size) +} + +// parseRange parses a Range header string as per RFC 7233. +// errNoOverlap is returned if none of the ranges overlap. +func parseRange(s string, size int64) ([]httpRange, error) { + if s == "" { + return nil, nil // header not present + } + const b = "bytes=" + if !strings.HasPrefix(s, b) { + return nil, errors.New("invalid range") + } + var ranges []httpRange + noOverlap := false + for _, ra := range strings.Split(s[len(b):], ",") { + ra = textproto.TrimString(ra) + if ra == "" { + continue + } + start, end, ok := strings.Cut(ra, "-") + if !ok { + return nil, errors.New("invalid range") + } + start, end = textproto.TrimString(start), textproto.TrimString(end) + var r httpRange + if start == "" { + // If no start is specified, end specifies the + // range start relative to the end of the file, + // and we are dealing with + // which has to be a non-negative integer as per + // RFC 7233 Section 2.1 "Byte-Ranges". + if end == "" || end[0] == '-' { + return nil, errors.New("invalid range") + } + i, err := strconv.ParseInt(end, 10, 64) + if i < 0 || err != nil { + return nil, errors.New("invalid range") + } + if i > size { + i = size + } + r.start = size - i + r.length = size - r.start + } else { + i, err := strconv.ParseInt(start, 10, 64) + if err != nil || i < 0 { + return nil, errors.New("invalid range") + } + if i >= size { + // If the range begins after the size of the content, + // then it does not overlap. + noOverlap = true + continue + } + r.start = i + if end == "" { + // If no end is specified, range extends to end of the file. + r.length = size - r.start + } else { + i, err := strconv.ParseInt(end, 10, 64) + if err != nil || r.start > i { + return nil, errors.New("invalid range") + } + if i >= size { + i = size - 1 + } + r.length = i - r.start + 1 + } + } + ranges = append(ranges, r) + } + if noOverlap && len(ranges) == 0 { + // The specified ranges did not overlap with the content. + return nil, errNoOverlap + } + return ranges, nil +} + +func sumRangesSize(ranges []httpRange) (size int64) { + for _, ra := range ranges { + size += ra.length + } + return +} + +// seekToStartOfFirstRange seeks to the start of the first Range if the request is an HTTP Range Request +func (i *handler) seekToStartOfFirstRange(w http.ResponseWriter, r *http.Request, data io.Seeker) bool { + rangeHeader := r.Header.Get("Range") + if rangeHeader != "" { + ranges, err := parseRangeWithoutLength(rangeHeader) + if err != nil { + i.webError(w, r, fmt.Errorf("invalid range request: %w", err), http.StatusBadRequest) + return false + } + if len(ranges) > 0 { + ra := &ranges[0] + err = seekToRangeStart(data, ra) + if err != nil { + i.webError(w, r, fmt.Errorf("could not seek to location in range request: %w", err), http.StatusBadRequest) + return false + } + } + } + return true +} + +func seekToRangeStart(data io.Seeker, ra *ByteRange) error { + if ra != nil && ra.From != 0 { + if _, err := data.Seek(int64(ra.From), io.SeekStart); err != nil { + return err + } + } + return nil +} diff --git a/gateway/utilities_test.go b/gateway/utilities_test.go index 27ba43a14..9d58d8c11 100644 --- a/gateway/utilities_test.go +++ b/gateway/utilities_test.go @@ -145,7 +145,7 @@ func (mb *mockBackend) GetBlock(ctx context.Context, immutablePath ImmutablePath return mb.gw.GetBlock(ctx, immutablePath) } -func (mb *mockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, files.Node, error) { +func (mb *mockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { return mb.gw.Head(ctx, immutablePath) }