Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gateway): undesired conversions to dag-json and friends #9566

Merged
merged 9 commits into from
Jan 21, 2023
21 changes: 11 additions & 10 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "":
switch resolvedPath.Cid().Prefix().Codec {
case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor):
case "", "application/json", "application/cbor":
switch mc.Code(resolvedPath.Cid().Prefix().Codec) {
case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor:
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
default:
Expand All @@ -441,14 +441,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
return
case "application/json", "application/vnd.ipld.dag-json",
"application/cbor", "application/vnd.ipld.dag-cbor":
case "application/vnd.ipld.dag-json", "application/vnd.ipld.dag-cbor":
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
webError(w, "failed to respond with requested content type", err, http.StatusBadRequest)
return
}
}
Expand Down Expand Up @@ -878,14 +877,14 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
case "dag-json":
return "application/vnd.ipld.dag-json", nil, nil
case "json":
return "application/json", nil, nil
case "dag-cbor":
return "application/vnd.ipld.dag-cbor", nil, nil
case "cbor":
return "application/cbor", nil, nil
case "dag-json":
return "application/vnd.ipld.dag-json", nil, nil
case "dag-cbor":
return "application/vnd.ipld.dag-cbor", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
Expand All @@ -908,6 +907,8 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
}
}
}
// If none of special-cased content types is found, return empty string
// to indicate default, implicit UnixFS response should be prepared
return "", nil, nil
}

Expand Down
83 changes: 41 additions & 42 deletions core/corehttp/gateway_handler_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,25 @@ import (

// codecToContentType maps the supported IPLD codecs to the HTTP Content
// Type they should have.
var codecToContentType = map[uint64]string{
uint64(mc.Json): "application/json",
uint64(mc.Cbor): "application/cbor",
uint64(mc.DagJson): "application/vnd.ipld.dag-json",
uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor",
var codecToContentType = map[mc.Code]string{
mc.Json: "application/json",
mc.Cbor: "application/cbor",
mc.DagJson: "application/vnd.ipld.dag-json",
mc.DagCbor: "application/vnd.ipld.dag-cbor",
}

// contentTypeToCodecs maps the HTTP Content Type to the respective
// possible codecs. If the original data is in one of those codecs,
// we stream the raw bytes. Otherwise, we encode in the last codec
// of the list.
var contentTypeToCodecs = map[string][]uint64{
"application/json": {uint64(mc.Json), uint64(mc.DagJson)},
"application/vnd.ipld.dag-json": {uint64(mc.DagJson)},
"application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)},
"application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)},
// contentTypeToRaw maps the HTTP Content Type to the respective codec that
// allows raw response without any conversion.
var contentTypeToRaw = map[string][]mc.Code{
"application/json": {mc.Json, mc.DagJson},
"application/cbor": {mc.Cbor, mc.DagCbor},
}

// contentTypeToCodec maps the HTTP Content Type to the respective codec. We
// only add here the codecs that we want to convert-to-from.
var contentTypeToCodec = map[string]mc.Code{
"application/vnd.ipld.dag-json": mc.DagJson,
"application/vnd.ipld.dag-cbor": mc.DagCbor,
}

// contentTypeToExtension maps the HTTP Content Type to the respective file
Expand All @@ -56,7 +59,7 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
defer span.End()

cidCodec := resolvedPath.Cid().Prefix().Codec
cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec)
responseContentType := requestedContentType

// If the resolved path still has some remainder, return error for now.
Expand Down Expand Up @@ -90,50 +93,44 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
// No content type is specified by the user (via Accept, or format=). However,
// we support this format. Let's handle it.
if requestedContentType == "" {
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
isDAG := cidCodec == mc.DagJson || cidCodec == mc.DagCbor
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")
download := r.URL.Query().Get("download") == "true"

if isDAG && acceptsHTML && !download {
i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath)
} else {
// This covers CIDs with codec 'json' and 'cbor' as those do not have
// an explicit requested content type.
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
}

return
}

// Otherwise, the user has requested a specific content type. Let's first get
// the codecs that can be used with this content type.
codecs, ok := contentTypeToCodecs[requestedContentType]
// If DAG-JSON or DAG-CBOR was requested using corresponding plain content type
// return raw block as-is, without conversion
skipCodecs, ok := contentTypeToRaw[requestedContentType]
if ok {
for _, skipCodec := range skipCodecs {
if skipCodec == cidCodec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
return
}
}
}

// Otherwise, the user has requested a specific content type (a DAG-* variant).
// Let's first get the codecs that can be used with this content type.
toCodec, ok := contentTypeToCodec[requestedContentType]
if !ok {
// This is never supposed to happen unless function is called with wrong parameters.
err := fmt.Errorf("unsupported content type: %s", requestedContentType)
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// If we need to convert, use the last codec (strict dag- variant)
toCodec := codecs[len(codecs)-1]

// If the requested content type has "dag-", ALWAYS go through the encoding
// process in order to validate the content.
if strings.Contains(requestedContentType, "dag-") {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
return
}

// Otherwise, check if the data is encoded with the requested content type.
// If so, we can directly stream the raw data. serveRawBlock cannot be directly
// used here as it sets different headers.
for _, codec := range codecs {
if resolvedPath.Cid().Prefix().Codec == codec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
return
}
}

// Finally, if nothing of the above is true, we have to actually convert the codec.
// This handles DAG-* conversions and validations.
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
}

Expand Down Expand Up @@ -165,6 +162,7 @@ func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWrit
}
}

// serveCodecRaw returns the raw block without any conversion
func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, name string, modtime time.Time) {
blockCid := resolvedPath.Cid()
blockReader, err := i.api.Block().Get(ctx, resolvedPath)
Expand All @@ -184,7 +182,8 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite
_, _, _ = ServeContent(w, r, name, modtime, content)
}

func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) {
// serveCodecConverted returns payload converted to codec specified in toCodec
func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec mc.Code, modtime time.Time) {
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
if err != nil {
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
Expand All @@ -199,7 +198,7 @@ func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.Respons
}
finalNode := universal.(ipld.Node)

encoder, err := multicodec.LookupEncoder(toCodec)
encoder, err := multicodec.LookupEncoder(uint64(toCodec))
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
Expand Down
79 changes: 68 additions & 11 deletions docs/changelogs/v0.18.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,74 @@ Learn more in the [`Reprovider` config](https://github.com/ipfs/go-ipfs/blob/mas

##### (DAG-)JSON and (DAG-)CBOR response formats

Implemented [IPIP-328](https://github.com/ipfs/specs/pull/328) which adds support
for DAG-JSON and DAG-CBOR, as well as their non-DAG variants, to the gateway. Now,
CIDs that encode JSON, CBOR, DAG-JSON and DAG-CBOR objects can be retrieved, and
traversed thanks to the [special meaning of CBOR Tag 42](https://github.com/ipld/cid-cbor/).
The IPFS project has reserved the corresponding media types at IANA:
- [`application/vnd.ipld.dag-json`](https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-json)
- [`application/vnd.ipld.dag-cbor`](https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-cbor)

HTTP clients can request JSON, CBOR, DAG-JSON, and DAG-CBOR responses by either
passing the query parameter `?format` or setting the `Accept` HTTP header to the
following values:
This release implements them as part of [IPIP-328](https://github.com/ipfs/specs/pull/328)
and adds Gateway support for CIDs with `json` (0x0200), `cbor` (0x51),
[`dag-json`](https://ipld.io/specs/codecs/dag-json/) (0x0129)
and [`dag-cbor`](https://ipld.io/specs/codecs/dag-cbor/spec/) (0x71) codecs.

- JSON: `?format=json`, or `Accept: application/json`
- CBOR: `?format=cbor`, or `Accept: application/cbor`
- DAG-JSON: `?format=dag-json`, or `Accept: application/vnd.ipld.dag-json`
- DAG-JSON: `?format=dag-cbor`, or `Accept: application/vnd.ipld.dag-cbor`
To specify the response `Content-Type` explicitly, the HTTP client can override
the codec present in the CID by using the `format` parameter
or setting the `Accept` HTTP header:

- Plain JSON: `?format=json` or `Accept: application/json`
- Plain CBOR: `?format=cbor` or `Accept: application/cbor`
- DAG-JSON: `?format=dag-json` or `Accept: application/vnd.ipld.dag-json`
- DAG-CBOR: `?format=dag-cbor` or `Accept: application/vnd.ipld.dag-cbor`

In addition, when DAG-JSON or DAG-CBOR is requested with the `Accept` header
set to `text/html`, the Gateway will return a basic HTML page with download
options, improving the user experience in web browsers.

###### Example 1: DAG-CBOR and DAG-JSON Conversion on Gateway

The Gateway supports conversion between DAG-CBOR and DAG-JSON for efficient
end-to-end data structure management: author in CBOR or JSON, store as binary
CBOR and retrieve as JSON via HTTP:

```console
$ echo '{"test": "json"}' | ipfs dag put # implicit --input-codec dag-json --store-codec dag-cbor
bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4

$ ipfs block get bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4 | xxd
00000000: a164 7465 7374 646a 736f 6e .dtestdjson

$ ipfs dag get bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4 # implicit --output-codec dag-json
{"test":"json"}

$ curl "http://127.0.0.1:8080/ipfs/bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4?format=dag-json"
{"test":"json"}
```

###### Example 2: Traversing CBOR DAGs

Placing a CID in [CBOR Tag 42](https://github.com/ipld/cid-cbor/) enables the
creation of arbitrary DAGs. The equivalent DAG-JSON notation for linking
to different blocks is represented by `{ "/": "cid" }`.

The Gateway supports traversing these links, enabling access to data
referenced by structures other than regular UnixFS directories:

```console
$ echo '{"test.jpg": {"/": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"}}' | ipfs dag put
bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4 # dag-cbor document linking to unixfs file

$ ipfs resolve /ipfs/bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4/test.jpg
/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi

$ ipfs dag stat bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4
Size: 119827, NumBlocks: 2

$ curl "http://127.0.0.1:8080/ipfs/bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4/test.jpg" > test.jpg
```

###### Example 3: UnixFS directory listing as JSON

Finally, Gateway now supports the same [logical format projection](https://ipld.io/specs/codecs/dag-pb/spec/#logical-format) from
DAG-PB to DAG-JSON as the `ipfs dag get` command, enabling the retrieval of directory listings as JSON instead of HTML:

```console
$ export DIR_CID=bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq
Expand Down Expand Up @@ -112,6 +167,8 @@ $ curl "http://127.0.0.1:8080/ipfs/$DIR_CID?format=dag-json" | jq
}
]
}
$ ipfs dag get $DIR_CID
{"Data":{"/":{"bytes":"CAE"}},"Links":[{"Hash":{"/":"Qmc3zqKcwzbbvw3MQm3hXdg8BQoFjGdZiGdAfXAyAGGdLi"},"Name":"1 - Barrel - Part 1 - alt.txt","Tsize":21},{"Hash":{"/":"QmdMxMx29KVYhHnaCc1icWYxQqXwUNCae6t1wS2NqruiHd"},"Name":"1 - Barrel - Part 1 - transcript.txt","Tsize":195},{"Hash":{"/":"QmawceGscqN4o8Y8Fv26UUmB454kn2bnkXV5tEQYc4jBd6"},"Name":"1 - Barrel - Part 1.png","Tsize":24862}]}
```

##### 🐎 Fast directory listings with DAG sizes
Expand Down
Loading