Skip to content

Commit

Permalink
feat(gateway): JSON and CBOR response formats (IPIP-328) (#9335)
Browse files Browse the repository at this point in the history
ipfs/kubo#9335
ipfs/specs#328

Co-authored-by: Marcin Rataj <lidel@lidel.org>

This commit was moved from ipfs/kubo@fdd1965
  • Loading branch information
hacdias committed Dec 5, 2022
1 parent 53e5c20 commit 79843b4
Show file tree
Hide file tree
Showing 3 changed files with 288 additions and 8 deletions.
30 changes: 26 additions & 4 deletions gateway/core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
coreiface "github.com/ipfs/interface-go-ipfs-core"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
routing "github.com/libp2p/go-libp2p/core/routing"
mc "github.com/multiformats/go-multicodec"
prometheus "github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
Expand Down Expand Up @@ -417,9 +418,15 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "": // The implicit response format is UnixFS
logger.Debugw("serving unixfs", "path", contentPath)
i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
case "":
switch resolvedPath.Cid().Prefix().Codec {
case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor):
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
default:
logger.Debugw("serving unixfs", "path", contentPath)
i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
}
return
case "application/vnd.ipld.raw":
logger.Debugw("serving raw block", "path", contentPath)
Expand All @@ -434,6 +441,11 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
return
case "application/json", "application/vnd.ipld.dag-json",
"application/cbor", "application/vnd.ipld.dag-cbor":
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
Expand Down Expand Up @@ -866,6 +878,14 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
case "dag-json":
return "application/vnd.ipld.dag-json", nil, nil
case "json":
return "application/json", nil, nil
case "dag-cbor":
return "application/vnd.ipld.dag-cbor", nil, nil
case "cbor":
return "application/cbor", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
Expand All @@ -874,7 +894,9 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
for _, accept := range r.Header.Values("Accept") {
// respond to the very first ipld content type
if strings.HasPrefix(accept, "application/vnd.ipld") ||
strings.HasPrefix(accept, "application/x-tar") {
strings.HasPrefix(accept, "application/x-tar") ||
strings.HasPrefix(accept, "application/json") ||
strings.HasPrefix(accept, "application/cbor") {
mediatype, params, err := mime.ParseMediaType(accept)
if err != nil {
return "", nil, err
Expand Down
258 changes: 258 additions & 0 deletions gateway/core/corehttp/gateway_handler_codec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
package corehttp

import (
"bytes"
"context"
"fmt"
"html"
"io"
"net/http"
"strings"
"time"

cid "github.com/ipfs/go-cid"
ipldlegacy "github.com/ipfs/go-ipld-legacy"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"github.com/ipfs/kubo/assets"
dih "github.com/ipfs/kubo/assets/dag-index-html"
"github.com/ipfs/kubo/tracing"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/multicodec"
mc "github.com/multiformats/go-multicodec"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)

// codecToContentType maps the supported IPLD codecs to the HTTP Content
// Type they should have.
var codecToContentType = map[uint64]string{
uint64(mc.Json): "application/json",
uint64(mc.Cbor): "application/cbor",
uint64(mc.DagJson): "application/vnd.ipld.dag-json",
uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor",
}

// contentTypeToCodecs maps the HTTP Content Type to the respective
// possible codecs. If the original data is in one of those codecs,
// we stream the raw bytes. Otherwise, we encode in the last codec
// of the list.
var contentTypeToCodecs = map[string][]uint64{
"application/json": {uint64(mc.Json), uint64(mc.DagJson)},
"application/vnd.ipld.dag-json": {uint64(mc.DagJson)},
"application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)},
"application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)},
}

// contentTypeToExtension maps the HTTP Content Type to the respective file
// extension, used in Content-Disposition header when downloading the file.
var contentTypeToExtension = map[string]string{
"application/json": ".json",
"application/vnd.ipld.dag-json": ".json",
"application/cbor": ".cbor",
"application/vnd.ipld.dag-cbor": ".cbor",
}

func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
defer span.End()

cidCodec := resolvedPath.Cid().Prefix().Codec
responseContentType := requestedContentType

// If the resolved path still has some remainder, return error for now.
// TODO: handle this when we have IPLD Patch (https://ipld.io/specs/patch/) via HTTP PUT
// TODO: (depends on https://github.com/ipfs/kubo/issues/4801 and https://github.com/ipfs/kubo/issues/4782)
if resolvedPath.Remainder() != "" {
path := strings.TrimSuffix(resolvedPath.String(), resolvedPath.Remainder())
err := fmt.Errorf("%q of %q could not be returned: reading IPLD Kinds other than Links (CBOR Tag 42) is not implemented: try reading %q instead", resolvedPath.Remainder(), resolvedPath.String(), path)
webError(w, "unsupported pathing", err, http.StatusNotImplemented)
return
}

// If no explicit content type was requested, the response will have one based on the codec from the CID
if requestedContentType == "" {
cidContentType, ok := codecToContentType[cidCodec]
if !ok {
// Should not happen unless function is called with wrong parameters.
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}
responseContentType = cidContentType
}

// Set HTTP headers (for caching etc)
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
name := setCodecContentDisposition(w, r, resolvedPath, responseContentType)
w.Header().Set("Content-Type", responseContentType)
w.Header().Set("X-Content-Type-Options", "nosniff")

// No content type is specified by the user (via Accept, or format=). However,
// we support this format. Let's handle it.
if requestedContentType == "" {
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")
download := r.URL.Query().Get("download") == "true"

if isDAG && acceptsHTML && !download {
i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath)
} else {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
}

return
}

// Otherwise, the user has requested a specific content type. Let's first get
// the codecs that can be used with this content type.
codecs, ok := contentTypeToCodecs[requestedContentType]
if !ok {
// This is never supposed to happen unless function is called with wrong parameters.
err := fmt.Errorf("unsupported content type: %s", requestedContentType)
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// If we need to convert, use the last codec (strict dag- variant)
toCodec := codecs[len(codecs)-1]

// If the requested content type has "dag-", ALWAYS go through the encoding
// process in order to validate the content.
if strings.Contains(requestedContentType, "dag-") {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
return
}

// Otherwise, check if the data is encoded with the requested content type.
// If so, we can directly stream the raw data. serveRawBlock cannot be directly
// used here as it sets different headers.
for _, codec := range codecs {
if resolvedPath.Cid().Prefix().Codec == codec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
return
}
}

// Finally, if nothing of the above is true, we have to actually convert the codec.
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
}

func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) {
// A HTML directory index will be presented, be sure to set the correct
// type instead of relying on autodetection (which may fail).
w.Header().Set("Content-Type", "text/html")

// Clear Content-Disposition -- we want HTML to be rendered inline
w.Header().Del("Content-Disposition")

// Generated index requires custom Etag (output may change between Kubo versions)
dagEtag := getDagIndexEtag(resolvedPath.Cid())
w.Header().Set("Etag", dagEtag)

// Remove Cache-Control for now to match UnixFS dir-index-html responses
// (we don't want browser to cache HTML forever)
// TODO: if we ever change behavior for UnixFS dir listings, same changes should be applied here
w.Header().Del("Cache-Control")

cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec)
if err := dih.DagIndexTemplate.Execute(w, dih.DagIndexTemplateData{
Path: contentPath.String(),
CID: resolvedPath.Cid().String(),
CodecName: cidCodec.String(),
CodecHex: fmt.Sprintf("0x%x", uint64(cidCodec)),
}); err != nil {
webError(w, "failed to generate HTML listing for this DAG: try fetching raw block with ?format=raw", err, http.StatusInternalServerError)
}
}

func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, name string, modtime time.Time) {
blockCid := resolvedPath.Cid()
blockReader, err := i.api.Block().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
block, err := io.ReadAll(blockReader)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
content := bytes.NewReader(block)

// ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
_, _, _ = ServeContent(w, r, name, modtime, content)
}

func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) {
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
if err != nil {
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
return
}

universal, ok := obj.(ipldlegacy.UniversalNode)
if !ok {
err = fmt.Errorf("%T is not a valid IPLD node", obj)
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}
finalNode := universal.(ipld.Node)

encoder, err := multicodec.LookupEncoder(toCodec)
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// Ensure IPLD node conforms to the codec specification.
var buf bytes.Buffer
err = encoder(finalNode, &buf)
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}

_, _ = w.Write(buf.Bytes())
}

func setCodecContentDisposition(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentType string) string {
var dispType, name string

ext, ok := contentTypeToExtension[contentType]
if !ok {
// Should never happen.
ext = ".bin"
}

if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" {
name = urlFilename
} else {
name = resolvedPath.Cid().String() + ext
}

// JSON should be inlined, but ?download=true should still override
if r.URL.Query().Get("download") == "true" {
dispType = "attachment"
} else {
switch ext {
case ".json": // codecs that serialize to JSON can be rendered by browsers
dispType = "inline"
default: // everything else is assumed binary / opaque bytes
dispType = "attachment"
}
}

setContentDispositionHeader(w, name, dispType)
return name
}

func getDagIndexEtag(dagCid cid.Cid) string {
return `"DagIndex-` + assets.AssetHash + `_CID-` + dagCid.String() + `"`
}
8 changes: 4 additions & 4 deletions gateway/core/corehttp/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,9 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/file.txt\">") {
t.Fatalf("expected file in directory listing")
}
if !strings.Contains(s, "<a class=\"ipfs-hash\" translate=\"no\" href=\"https://cid.ipfs.io/#") {
if !strings.Contains(s, "<a class=\"ipfs-hash\" translate=\"no\" href=\"https://cid.ipfs.tech/#") {
// https://github.com/ipfs/dir-index-html/issues/42
t.Fatalf("expected links to cid.ipfs.io in CID column when on DNSLink website")
t.Fatalf("expected links to cid.ipfs.tech in CID column when on DNSLink website")
}
if !strings.Contains(s, k2.Cid().String()) {
t.Fatalf("expected hash in directory listing")
Expand Down Expand Up @@ -535,9 +535,9 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !strings.Contains(s, "<a href=\"/file.txt\">") {
t.Fatalf("expected file in directory listing")
}
if !strings.Contains(s, "<a class=\"ipfs-hash\" translate=\"no\" href=\"https://cid.ipfs.io/#") {
if !strings.Contains(s, "<a class=\"ipfs-hash\" translate=\"no\" href=\"https://cid.ipfs.tech/#") {
// https://github.com/ipfs/dir-index-html/issues/42
t.Fatalf("expected links to cid.ipfs.io in CID column when on DNSLink website")
t.Fatalf("expected links to cid.ipfs.tech in CID column when on DNSLink website")
}
if !strings.Contains(s, k.Cid().String()) {
t.Fatalf("expected hash in directory listing")
Expand Down

0 comments on commit 79843b4

Please sign in to comment.