Skip to content

Commit

Permalink
feat(gateway)!: return HeadResponse instead of files.Node, no multi-r…
Browse files Browse the repository at this point in the history
…ange

Co-authored-by: Henrique Dias <hacdias@gmail.com>
Co-authored-by: Marcin Rataj <lidel@lidel.org>
  • Loading branch information
3 people committed Oct 2, 2023
1 parent 76f8e3f commit c28c847
Show file tree
Hide file tree
Showing 14 changed files with 781 additions and 176 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ The following emojis are used to highlight certain changes:

### Changed

* `boxo/gateway`
* 🛠 The `IPFSBackend` interface was updated to make the responses of the
`Head` method more explicit. It now returns a `HeadResponse` instead of a
`files.Node`.

### Removed

### Fixed
Expand Down
65 changes: 59 additions & 6 deletions gateway/blocks_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,34 @@ func (bb *BlocksBackend) Get(ctx context.Context, path ImmutablePath, ranges ...
return md, nil, err
}

// Only a single range is supported in responses to HTTP Range Requests.
// When more than one is passed in the Range header, this library will
// return a response for the first one and ignores remaining ones.
var ra *ByteRange
if len(ranges) > 0 {
ra = &ranges[0]
}

rootCodec := nd.Cid().Prefix().GetCodec()

// This covers both Raw blocks and terminal IPLD codecs like dag-cbor and dag-json
// Note: while only cbor, json, dag-cbor, and dag-json are currently supported by gateways this could change
// Note: For the raw codec we return just the relevant range rather than the entire block
if rootCodec != uint64(mc.DagPb) {
return md, NewGetResponseFromFile(files.NewBytesFile(nd.RawData())), nil
f := files.NewBytesFile(nd.RawData())

fileSize, err := f.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

if rootCodec == uint64(mc.Raw) {
if err := seekToRangeStart(f, ra); err != nil {
return ContentPathMetadata{}, nil, err
}
}

return md, NewGetResponseFromReader(f, fileSize), nil
}

// This code path covers full graph, single file/directory, and range requests
Expand All @@ -179,10 +202,23 @@ func (bb *BlocksBackend) Get(ctx context.Context, path ImmutablePath, ranges ...
if sz < 0 {
return ContentPathMetadata{}, nil, fmt.Errorf("directory cumulative DAG size cannot be negative")
}
return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx)), nil
return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx), nil), nil
}
if file, ok := f.(files.File); ok {
return md, NewGetResponseFromFile(file), nil
fileSize, err := f.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

if err := seekToRangeStart(file, ra); err != nil {
return ContentPathMetadata{}, nil, err
}

if s, ok := f.(*files.Symlink); ok {
return md, NewGetResponseFromSymlink(s, fileSize), nil
}

return md, NewGetResponseFromReader(file, fileSize), nil
}

return ContentPathMetadata{}, nil, fmt.Errorf("data was not a valid file or directory: %w", ErrInternalServerError) // TODO: should there be a gateway invalid content type to abstract over the various IPLD error types?
Expand Down Expand Up @@ -211,15 +247,15 @@ func (bb *BlocksBackend) GetBlock(ctx context.Context, path ImmutablePath) (Cont
return md, files.NewBytesFile(nd.RawData()), nil
}

func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
md, nd, err := bb.getNode(ctx, path)
if err != nil {
return md, nil, err
}

rootCodec := nd.Cid().Prefix().GetCodec()
if rootCodec != uint64(mc.DagPb) {
return md, files.NewBytesFile(nd.RawData()), nil
return md, NewHeadResponseForFile(files.NewBytesFile(nd.RawData()), int64(len(nd.RawData()))), nil
}

// TODO: We're not handling non-UnixFS dag-pb. There's a bit of a discrepancy
Expand All @@ -229,7 +265,24 @@ func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentP
return ContentPathMetadata{}, nil, err
}

return md, fileNode, nil
sz, err := fileNode.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

if _, ok := fileNode.(files.Directory); ok {
return md, NewHeadResponseForDirectory(sz), nil
}

if _, ok := fileNode.(*files.Symlink); ok {
return md, NewHeadResponseForSymlink(sz), nil
}

if f, ok := fileNode.(files.File); ok {
return md, NewHeadResponseForFile(f, sz), nil
}

return ContentPathMetadata{}, nil, fmt.Errorf("unsupported UnixFS file type")
}

// emptyRoot is a CAR root with the empty identity CID. CAR files are recommended
Expand Down
82 changes: 71 additions & 11 deletions gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,21 +260,74 @@ type ByteRange struct {
}

type GetResponse struct {
bytes files.File
bytes io.ReadCloser
bytesSize int64
symlink *files.Symlink
directoryMetadata *directoryMetadata
}

func (r *GetResponse) Close() error {
if r.bytes != nil {
return r.bytes.Close()
}
if r.symlink != nil {
return r.symlink.Close()
}
if r.directoryMetadata != nil {
if r.directoryMetadata.closeFn == nil {
return nil
}
return r.directoryMetadata.closeFn()
}
// Should be unreachable
return nil
}

var _ io.Closer = (*GetResponse)(nil)

type directoryMetadata struct {
dagSize uint64
entries <-chan unixfs.LinkResult
closeFn func() error
}

func NewGetResponseFromReader(file io.ReadCloser, fullFileSize int64) *GetResponse {
return &GetResponse{bytes: file, bytesSize: fullFileSize}
}

func NewGetResponseFromSymlink(symlink *files.Symlink, size int64) *GetResponse {
return &GetResponse{symlink: symlink, bytesSize: size}
}

func NewGetResponseFromFile(file files.File) *GetResponse {
return &GetResponse{bytes: file}
func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult, closeFn func() error) *GetResponse {
return &GetResponse{directoryMetadata: &directoryMetadata{dagSize: dagSize, entries: entries, closeFn: closeFn}}
}

func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult) *GetResponse {
return &GetResponse{directoryMetadata: &directoryMetadata{dagSize, entries}}
type HeadResponse struct {
bytesSize int64
startingBytes io.ReadCloser
isFile bool
isSymLink bool
isDir bool
}

func (r *HeadResponse) Close() error {
if r.startingBytes != nil {
return r.startingBytes.Close()
}
return nil
}

func NewHeadResponseForFile(startingBytes io.ReadCloser, size int64) *HeadResponse {
return &HeadResponse{startingBytes: startingBytes, isFile: true, bytesSize: size}
}

func NewHeadResponseForSymlink(symlinkSize int64) *HeadResponse {
return &HeadResponse{isSymLink: true, bytesSize: symlinkSize}
}

func NewHeadResponseForDirectory(dagSize int64) *HeadResponse {
return &HeadResponse{isDir: true, bytesSize: dagSize}
}

// IPFSBackend is the required set of functionality used to implement the IPFS
Expand Down Expand Up @@ -305,6 +358,9 @@ type IPFSBackend interface {
// file will still need magic bytes from the very beginning for content
// type sniffing).
// - A range request for a directory currently holds no semantic meaning.
// - For non-UnixFS (and non-raw data) such as terminal IPLD dag-cbor/json, etc. blocks the returned response
// bytes should be the complete block and returned as an [io.ReadSeekCloser] starting at the beginning of the
// block rather than as an [io.ReadCloser] that starts at the beginning of the range request.
//
// [HTTP Byte Ranges]: https://httpwg.org/specs/rfc9110.html#rfc.section.14.1.2
Get(context.Context, ImmutablePath, ...ByteRange) (ContentPathMetadata, *GetResponse, error)
Expand All @@ -316,12 +372,16 @@ type IPFSBackend interface {
// GetBlock returns a single block of data
GetBlock(context.Context, ImmutablePath) (ContentPathMetadata, files.File, error)

// Head returns a file or directory depending on what the path is that has been requested.
// For UnixFS files should return a file which has the correct file size and either returns the ContentType in ContentPathMetadata or
// enough data (e.g. 3kiB) such that the content type can be determined by sniffing.
// For all other data types returning just size information is sufficient
// TODO: give function more explicit return types
Head(context.Context, ImmutablePath) (ContentPathMetadata, files.Node, error)
// Head returns a [HeadResponse] depending on what the path is that has been requested.
// For UnixFS files (and raw blocks) should return the size of the file and either set the ContentType in
// ContentPathMetadata or send back a reader from the beginning of the file with enough data (e.g. 3kiB) such that
// the content type can be determined by sniffing.
//
// For UnixFS directories and symlinks only setting the size and type are necessary.
//
// For all other data types (e.g. (DAG-)CBOR/JSON blocks) returning the size information as a file while setting
// the content-type is sufficient.
Head(context.Context, ImmutablePath) (ContentPathMetadata, *HeadResponse, error)

// ResolvePath resolves the path using UnixFS resolver. If the path does not
// exist due to a missing link, it should return an error of type:
Expand Down
4 changes: 2 additions & 2 deletions gateway/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ func (mb *errorMockBackend) GetBlock(ctx context.Context, path ImmutablePath) (C
return ContentPathMetadata{}, nil, mb.err
}

func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
return ContentPathMetadata{}, nil, mb.err
}

Expand Down Expand Up @@ -803,7 +803,7 @@ func (mb *panicMockBackend) GetBlock(ctx context.Context, immutablePath Immutabl
panic("i am panicking")
}

func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
panic("i am panicking")
}

Expand Down
55 changes: 17 additions & 38 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,19 @@ func NewHandler(c Config, backend IPFSBackend) http.Handler {
return newHandlerWithMetrics(&c, backend)
}

// serveContent replies to the request using the content in the provided ReadSeeker
// serveContent replies to the request using the content in the provided Reader
// and returns the status code written and any error encountered during a write.
// It wraps http.serveContent which takes care of If-None-Match+Etag,
// It wraps httpServeContent (a close clone of http.ServeContent) which takes care of If-None-Match+Etag,
// Content-Length and range requests.
func serveContent(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, content io.ReadSeeker) (int, bool, error) {
//
// Notes:
// 1. For HEAD requests the io.Reader may be nil/undefined
// 2. When the io.Reader is needed it must start at the beginning of the first Range Request component if it exists
// 3. Only a single HTTP Range Request is supported, if more than one are requested only the first will be honored
// 4. The Content-Type header must already be set
func serveContent(w http.ResponseWriter, req *http.Request, modtime time.Time, size int64, content io.Reader) (int, bool, error) {
ew := &errRecordingResponseWriter{ResponseWriter: w}
http.ServeContent(ew, req, name, modtime, content)
httpServeContent(ew, req, modtime, size, content)

// When we calculate some metrics we want a flag that lets us to ignore
// errors and 304 Not Modified, and only care when requested data
Expand Down Expand Up @@ -554,40 +560,6 @@ func etagMatch(ifNoneMatchHeader string, etagsToCheck ...string) bool {
return false
}

// scanETag determines if a syntactically valid ETag is present at s. If so,
// the ETag and remaining text after consuming ETag is returned. Otherwise,
// it returns "", "".
// (This is the same logic as one executed inside of http.ServeContent)
func scanETag(s string) (etag string, remain string) {
s = textproto.TrimString(s)
start := 0
if strings.HasPrefix(s, "W/") {
start = 2
}
if len(s[start:]) < 2 || s[start] != '"' {
return "", ""
}
// ETag is either W/"text" or "text".
// See RFC 7232 2.3.
for i := start + 1; i < len(s); i++ {
c := s[i]
switch {
// Character values allowed in ETags.
case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80:
case c == '"':
return s[:i+1], s[i+1:]
default:
return "", ""
}
}
return "", ""
}

// etagWeakMatch reports whether a and b match using weak ETag comparison.
func etagWeakMatch(a, b string) bool {
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
}

// getEtag generates an ETag value based on an HTTP Request, a CID and a response
// format. This function DOES NOT generate ETags for CARs or IPNS Records.
func getEtag(r *http.Request, cid cid.Cid, responseFormat string) string {
Expand Down Expand Up @@ -776,6 +748,13 @@ func (i *handler) handleWebRequestErrors(w http.ResponseWriter, r *http.Request,
return ImmutablePath{}, false
}

// If the error is not an IPLD traversal error then we should not be looking for _redirects or legacy 404s
if !isErrNotFound(err) {
err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err)
i.webError(w, r, err, http.StatusInternalServerError)
return ImmutablePath{}, false
}

// If we have origin isolation (subdomain gw, DNSLink website),
// and response type is UnixFS (default for website hosting)
// we can leverage the presence of an _redirects file and apply rules defined there.
Expand Down
12 changes: 11 additions & 1 deletion gateway/handler_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,19 @@ func (i *handler) serveRawBlock(ctx context.Context, w http.ResponseWriter, r *h
w.Header().Set("Content-Type", rawResponseFormat)
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

sz, err := data.Size()
if err != nil {
i.handleRequestErrors(w, r, rq.contentPath, err)
return false
}

if !i.seekToStartOfFirstRange(w, r, data) {
return false
}

// ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
_, dataSent, _ := serveContent(w, r, name, modtime, data)
_, dataSent, _ := serveContent(w, r, modtime, sz, data)

if dataSent {
// Update metrics
Expand Down
Loading

0 comments on commit c28c847

Please sign in to comment.