Skip to content

Commit

Permalink
fix: set cache-control header correctly (#19)
Browse files Browse the repository at this point in the history
* fix: set cache-control header conditionally

Fixes #17

* chore: bump deps

* feat: implement new ipns record&answer properties (#23)

* feat: implement new ipns record&answer properties

* fix: parseUrlString response includes defined ttl, set ttl if ipnsCached

* test: fix firefox failure

* feat: support http range header (#10)

* chore: limit body parameters to the types used

* chore: add response-header helper and tests

* feat: add range header parsing support

* feat: verified-fetch supports range-requests

* test: fix dns test asserting test failure since we are catching it now

* fix: return 500 error when streaming unixfs content throws

* fix: cleanup code and unexecuting tests hiding errors

* chore: some cleanup and code coverage

* tmp: most things working

* fix: stream slicing and test correctness

* chore: fixed some ByteRangeContext tests

* test: add back header helpers

* fix: unixfs tests are passing

* fix: range-requests on raw content

* feat: tests are passing

moved transform stream over to https://github.com/SgtPooki/streams

* chore: log string casing

* chore: use 502 response instead of 500

* chore: use libp2p/interface for types in src

* chore: failing to create range resp logs error

* chore: Apply suggestions from code review

* chore: fix broken tests from github PR patches (my own)

* chore: re-enable stream tests for ByteRangeContext

* chore: clean up getBody a bit

* chore: ByteRangeContext getBody cleanup

* chore: apply suggestions from code review

Co-authored-by: Alex Potsides <alex@achingbrain.net>

* fix: getSlicedBody uses correct types

* chore: remove extra stat call

* chore: fix jsdoc with '*/'

* chore: fileSize is public property, but should not be used

* test: fix blob comparisons that broke or were never worjing properly

* chore: Update byte-range-context.ts

Co-authored-by: Alex Potsides <alex@achingbrain.net>

* chore: jsdoc cleanup

* Revert "chore: fileSize is public property, but should not be used"

This reverts commit 46dc133.

* chore: jsdoc comments explaining .fileSize use

* chore: isRangeRequest is public

* chore: getters/setters update

* chore: remove unnecessary _contentRangeHeaderValue

* chore: ByteRangeContext uses setFileSize and getFileSize

* chore: remove .stat changes that are no longer needed

---------

Co-authored-by: Alex Potsides <alex@achingbrain.net>

* chore(release): 1.2.0 [skip ci]

## @helia/verified-fetch [1.2.0](https://github.com/ipfs/helia-verified-fetch/compare/@helia/verified-fetch-1.1.3...@helia/verified-fetch-1.2.0) (2024-03-15)

### Features

* support http range header ([#10](#10)) ([9f5078a](9f5078a))

### Trivial Changes

* fix build ([#22](#22)) ([01261fe](01261fe))

* chore(release): 1.7.0 [skip ci]

## @helia/verified-fetch-interop [1.7.0](https://github.com/ipfs/helia-verified-fetch/compare/@helia/verified-fetch-interop-1.6.0...@helia/verified-fetch-interop-1.7.0) (2024-03-15)

### Dependencies

* **@helia/verified-fetch:** upgraded to 1.2.0

* chore: apply pr comments

* fix: some ipns ttl precision cleanup

---------

Co-authored-by: Alex Potsides <alex@achingbrain.net>
Co-authored-by: semantic-release-bot <semantic-release-bot@martynus.net>

* chore: add matchUrlGroups typeguard

* chore: remove unnecessary headerValue != null check

* test: remove unnecessary redefinition of verifiedFetch

---------

Co-authored-by: Daniel N <2color@users.noreply.github.com>
Co-authored-by: Russell Dempsey <1173416+SgtPooki@users.noreply.github.com>
Co-authored-by: Alex Potsides <alex@achingbrain.net>
Co-authored-by: semantic-release-bot <semantic-release-bot@martynus.net>
  • Loading branch information
5 people committed Mar 21, 2024
1 parent a240056 commit 114f3a4
Show file tree
Hide file tree
Showing 7 changed files with 284 additions and 36 deletions.
5 changes: 3 additions & 2 deletions packages/verified-fetch/src/utils/parse-resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ export async function parseResource (resource: Resource, { ipns, logger }: Parse
cid,
protocol: 'ipfs',
path: '',
query: {}
}
query: {},
ttl: 29030400 // 1 year for ipfs content
} satisfies ParsedUrlStringResults
}

throw new TypeError(`Invalid resource. Cannot determine CID from resource: ${resource}`)
Expand Down
100 changes: 81 additions & 19 deletions packages/verified-fetch/src/utils/parse-url-string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ import { peerIdFromString } from '@libp2p/peer-id'
import { CID } from 'multiformats/cid'
import { TLRU } from './tlru.js'
import type { RequestFormatShorthand } from '../types.js'
import type { IPNS, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns'
import type { DNSLinkResolveResult, IPNS, IPNSResolveResult, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns'
import type { ComponentLogger } from '@libp2p/interface'
import type { ProgressOptions } from 'progress-events'

const ipnsCache = new TLRU<ResolveResult>(1000)
const ipnsCache = new TLRU<DNSLinkResolveResult | IPNSResolveResult>(1000)

export interface ParseUrlStringInput {
urlString: string
Expand All @@ -23,30 +23,80 @@ export interface ParsedUrlQuery extends Record<string, string | unknown> {
filename?: string
}

export interface ParsedUrlStringResults {
protocol: string
path: string
cid: CID
interface ParsedUrlStringResultsBase extends ResolveResult {
protocol: 'ipfs' | 'ipns'
query: ParsedUrlQuery

/**
* seconds as a number
*/
ttl?: number
}

export type ParsedUrlStringResults = ParsedUrlStringResultsBase

const URL_REGEX = /^(?<protocol>ip[fn]s):\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_REGEX = /^\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_GATEWAY_REGEX = /^https?:\/\/(.*[^/])\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const SUBDOMAIN_GATEWAY_REGEX = /^https?:\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\.(?<protocol>ip[fn]s)\.([^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/

function matchURLString (urlString: string): Record<string, string> {
interface MatchUrlGroups {
protocol: 'ipfs' | 'ipns'
cidOrPeerIdOrDnsLink: string
path?: string
queryString?: string
}

function matchUrlGroupsGuard (groups?: null | { [key in string]: string; } | MatchUrlGroups): groups is MatchUrlGroups {
const protocol = groups?.protocol
if (protocol == null) return false
const cidOrPeerIdOrDnsLink = groups?.cidOrPeerIdOrDnsLink
if (cidOrPeerIdOrDnsLink == null) return false
const path = groups?.path
const queryString = groups?.queryString

return ['ipns', 'ipfs'].includes(protocol) &&
typeof cidOrPeerIdOrDnsLink === 'string' &&
(path == null || typeof path === 'string') &&
(queryString == null || typeof queryString === 'string')
}

function matchURLString (urlString: string): MatchUrlGroups {
for (const pattern of [URL_REGEX, PATH_REGEX, PATH_GATEWAY_REGEX, SUBDOMAIN_GATEWAY_REGEX]) {
const match = urlString.match(pattern)

if (match?.groups != null) {
return match.groups
if (matchUrlGroupsGuard(match?.groups)) {
return match.groups satisfies MatchUrlGroups
}
}

throw new TypeError(`Invalid URL: ${urlString}, please use ipfs://, ipns://, or gateway URLs only`)
}

/**
* determines the TTL for the resolved resource that will be used for the `Cache-Control` header's `max-age` directive.
* max-age is in seconds
*
* @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*
* If we have ipnsTtlNs, it will be a BigInt representing "nanoseconds". We need to convert it back to seconds.
*
* For more TTL nuances:
*
* @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200
* @see https://github.com/ipfs/js-ipns/pull/308
*/
function calculateTtl (resolveResult?: IPNSResolveResult | DNSLinkResolveResult): number | undefined {
if (resolveResult == null) {
return undefined
}
const dnsLinkTtl = (resolveResult as DNSLinkResolveResult).answer?.TTL
const ipnsTtlNs = (resolveResult as IPNSResolveResult).record?.ttl
// For some reason, ipns "nanoseconds" are 1e-8 of a second, instead of 1e-9.
const ipnsTtl = ipnsTtlNs != null ? Number(ipnsTtlNs / BigInt(1e8)) : undefined
return dnsLinkTtl ?? ipnsTtl
}

/**
* For dnslinks see https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header
* DNSLink names include . which means they must be inlined into a single DNS label to provide unique origin and work with wildcard TLS certificates.
Expand Down Expand Up @@ -89,32 +139,36 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
let cid: CID | undefined
let resolvedPath: string | undefined
const errors: Error[] = []
let resolveResult: IPNSResolveResult | DNSLinkResolveResult | undefined

if (protocol === 'ipfs') {
try {
cid = CID.parse(cidOrPeerIdOrDnsLink)
/**
* no ttl set. @link {setCacheControlHeader}
*/
} catch (err) {
log.error(err)
errors.push(new TypeError('Invalid CID for ipfs://<cid> URL'))
}
} else {
let resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink)
// protocol is ipns
resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink)

if (resolveResult != null) {
cid = resolveResult.cid
resolvedPath = resolveResult.path
log.trace('resolved %s to %c from cache', cidOrPeerIdOrDnsLink, cid)
} else {
// protocol is ipns
log.trace('attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink)
log.trace('Attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink)
let peerId = null
try {
// try resolving as an IPNS name
peerId = peerIdFromString(cidOrPeerIdOrDnsLink)
resolveResult = await ipns.resolve(peerId, { onProgress: options?.onProgress })
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
cid = resolveResult.cid
resolvedPath = resolveResult.path
log.trace('resolved %s to %c', cidOrPeerIdOrDnsLink, cid)
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2)
} catch (err) {
if (peerId == null) {
log.error('could not parse PeerId string "%s"', cidOrPeerIdOrDnsLink, err)
Expand All @@ -126,6 +180,7 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
}

if (cid == null) {
// cid is still null, try resolving as a DNSLink
let decodedDnsLinkLabel = cidOrPeerIdOrDnsLink
if (isInlinedDnsLink(cidOrPeerIdOrDnsLink)) {
decodedDnsLinkLabel = dnsLinkLabelDecoder(cidOrPeerIdOrDnsLink)
Expand All @@ -138,7 +193,6 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
log.trace('resolved %s to %c', decodedDnsLinkLabel, cid)
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2)
} catch (err: any) {
log.error('could not resolve DnsLink for "%s"', cidOrPeerIdOrDnsLink, err)
errors.push(err)
Expand All @@ -155,6 +209,13 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
throw new AggregateError(errors, `Invalid resource. Cannot determine CID from URL "${urlString}"`)
}

const ttl = calculateTtl(resolveResult)

if (resolveResult != null) {
// use the ttl for the resolved resouce for the cache, but fallback to 2 minutes if not available
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, ttl ?? 60 * 1000 * 2)
}

// parse query string
const query: Record<string, any> = {}

Expand All @@ -177,9 +238,10 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
return {
protocol,
cid,
path: joinPaths(resolvedPath, urlPath),
query
}
path: joinPaths(resolvedPath, urlPath ?? ''),
query,
ttl
} satisfies ParsedUrlStringResults
}

/**
Expand Down
36 changes: 36 additions & 0 deletions packages/verified-fetch/src/utils/response-headers.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,39 @@
interface CacheControlHeaderOptions {
/**
* This should be seconds as a number.
*
* See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*/
ttl?: number
protocol: 'ipfs' | 'ipns'
response: Response
}

/**
* Implementations may place an upper bound on any TTL received, as noted in Section 8 of [rfc2181].
* If TTL value is unknown, implementations should not send a Cache-Control
* No matter if TTL value is known or not, implementations should always send a Last-Modified header with the timestamp of the record resolution.
*
* @see https://specs.ipfs.tech/http-gateways/path-gateway/#cache-control-response-header
*/
export function setCacheControlHeader ({ ttl, protocol, response }: CacheControlHeaderOptions): void {
let headerValue: string
if (protocol === 'ipfs') {
headerValue = 'public, max-age=29030400, immutable'
} else if (ttl == null) {
/**
* default limit for unknown TTL: "use 5 minute as default fallback when it is not available."
*
* @see https://github.com/ipfs/boxo/issues/329#issuecomment-1995236409
*/
headerValue = 'public, max-age=300'
} else {
headerValue = `public, max-age=${ttl}`
}

response.headers.set('cache-control', headerValue)
}

/**
* This function returns the value of the `Content-Range` header for a given range.
* If you know the total size of the body, pass it as `byteSize`
Expand Down
8 changes: 7 additions & 1 deletion packages/verified-fetch/src/verified-fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { getETag } from './utils/get-e-tag.js'
import { getStreamFromAsyncIterable } from './utils/get-stream-from-async-iterable.js'
import { tarStream } from './utils/get-tar-stream.js'
import { parseResource } from './utils/parse-resource.js'
import { setCacheControlHeader } from './utils/response-headers.js'
import { badRequestResponse, movedPermanentlyResponse, notAcceptableResponse, notSupportedResponse, okResponse, badRangeResponse, okRangeResponse, badGatewayResponse } from './utils/responses.js'
import { selectOutputType, queryFormatToAcceptHeader } from './utils/select-output-type.js'
import { walkPath } from './utils/walk-path.js'
Expand Down Expand Up @@ -441,11 +442,15 @@ export class VerifiedFetch {
let cid: ParsedUrlStringResults['cid']
let path: ParsedUrlStringResults['path']
let query: ParsedUrlStringResults['query']
let ttl: ParsedUrlStringResults['ttl']
let protocol: ParsedUrlStringResults['protocol']
try {
const result = await parseResource(resource, { ipns: this.ipns, logger: this.helia.logger }, options)
cid = result.cid
path = result.path
query = result.query
ttl = result.ttl
protocol = result.protocol
} catch (err) {
this.log.error('error parsing resource %s', resource, err)

Expand Down Expand Up @@ -516,7 +521,8 @@ export class VerifiedFetch {
}

response.headers.set('etag', getETag({ cid, reqFormat, weak: false }))
response.headers.set('cache-control', 'public, max-age=29030400, immutable')

setCacheControlHeader({ response, ttl, protocol })
// https://specs.ipfs.tech/http-gateways/path-gateway/#x-ipfs-path-response-header
response.headers.set('X-Ipfs-Path', resource.toString())

Expand Down
Loading

0 comments on commit 114f3a4

Please sign in to comment.