From c17971a2d8e424cc7f10c071d97c07c08aa319cf Mon Sep 17 00:00:00 2001 From: bnewbold Date: Thu, 30 Nov 2023 16:19:06 -0800 Subject: [PATCH] harden datetime verification (#1702) * syntax: add datetime validator (and interop tests) * syntax: improve datetime normalization * lexicon: stronger datetime validation (from syntax package) * syntax: make datetime syntax norm test more flexible * make fmt * datetime: docs, normalize and always variant * bsky replace toSimplifiedISOSafe with normalizeDatetimeAlways * more rigorous datetime parsing on record creation * handle negative dates * syntax: disallow datetimes before year 0010 * syntax: datetime normalization functions validate output --------- Co-authored-by: dholms --- .../syntax/datetime_parse_invalid.txt | 7 ++ .../syntax/datetime_syntax_invalid.txt | 68 ++++++++++ .../syntax/datetime_syntax_valid.txt | 40 ++++++ .../src/services/indexing/plugins/block.ts | 5 +- .../indexing/plugins/feed-generator.ts | 5 +- .../src/services/indexing/plugins/follow.ts | 5 +- .../src/services/indexing/plugins/like.ts | 5 +- .../services/indexing/plugins/list-block.ts | 5 +- .../services/indexing/plugins/list-item.ts | 5 +- .../src/services/indexing/plugins/list.ts | 5 +- .../src/services/indexing/plugins/post.ts | 5 +- .../src/services/indexing/plugins/repost.ts | 5 +- .../services/indexing/plugins/thread-gate.ts | 5 +- packages/bsky/src/services/label/index.ts | 5 +- packages/lexicon/src/validators/formats.ts | 2 +- packages/lexicon/tests/general.test.ts | 4 +- packages/pds/src/repo/prepare.ts | 20 ++- packages/pds/tests/crud.test.ts | 20 ++- packages/syntax/src/datetime.ts | 112 +++++++++++++++++ packages/syntax/src/index.ts | 1 + packages/syntax/tests/datetime.test.ts | 118 ++++++++++++++++++ .../interop-files/datetime_parse_invalid.txt | 1 + .../interop-files/datetime_syntax_invalid.txt | 1 + .../interop-files/datetime_syntax_valid.txt | 1 + 24 files changed, 413 insertions(+), 37 deletions(-) create mode 100644 interop-test-files/syntax/datetime_parse_invalid.txt create mode 100644 interop-test-files/syntax/datetime_syntax_invalid.txt create mode 100644 interop-test-files/syntax/datetime_syntax_valid.txt create mode 100644 packages/syntax/src/datetime.ts create mode 100644 packages/syntax/tests/datetime.test.ts create mode 120000 packages/syntax/tests/interop-files/datetime_parse_invalid.txt create mode 120000 packages/syntax/tests/interop-files/datetime_syntax_invalid.txt create mode 120000 packages/syntax/tests/interop-files/datetime_syntax_valid.txt diff --git a/interop-test-files/syntax/datetime_parse_invalid.txt b/interop-test-files/syntax/datetime_parse_invalid.txt new file mode 100644 index 0000000000..3672453a29 --- /dev/null +++ b/interop-test-files/syntax/datetime_parse_invalid.txt @@ -0,0 +1,7 @@ +# superficial syntax parses ok, but are not valid datetimes for semantic reasons (eg, "month zero") +1985-00-12T23:20:50.123Z +1985-04-00T23:20:50.123Z +1985-13-12T23:20:50.123Z +1985-04-12T25:20:50.123Z +1985-04-12T23:99:50.123Z +1985-04-12T23:20:61.123Z diff --git a/interop-test-files/syntax/datetime_syntax_invalid.txt b/interop-test-files/syntax/datetime_syntax_invalid.txt new file mode 100644 index 0000000000..6702e43e8e --- /dev/null +++ b/interop-test-files/syntax/datetime_syntax_invalid.txt @@ -0,0 +1,68 @@ + +# subtle changes to: 1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.123z +01985-04-12T23:20:50.123Z +985-04-12T23:20:50.123Z +1985-04-12T23:20:50.Z +1985-04-32T23;20:50.123Z +1985-04-32T23;20:50.123Z + +# en-dash and em-dash +1985—04-32T23;20:50.123Z +1985–04-32T23;20:50.123Z + +# whitespace + 1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.123Z +1985-04-12T 23:20:50.123Z + +# not enough zero padding +1985-4-12T23:20:50.123Z +1985-04-2T23:20:50.123Z +1985-04-12T3:20:50.123Z +1985-04-12T23:0:50.123Z +1985-04-12T23:20:5.123Z + +# too much zero padding +01985-04-12T23:20:50.123Z +1985-004-12T23:20:50.123Z +1985-04-012T23:20:50.123Z +1985-04-12T023:20:50.123Z +1985-04-12T23:020:50.123Z +1985-04-12T23:20:050.123Z + +# strict capitalization (ISO-8601) +1985-04-12t23:20:50.123Z +1985-04-12T23:20:50.123z + +# RFC-3339, but not ISO-8601 +1985-04-12T23:20:50.123-00:00 +1985-04-12_23:20:50.123Z +1985-04-12 23:20:50.123Z + +# ISO-8601, but weird +1985-04-274T23:20:50.123Z + +# timezone is required +1985-04-12T23:20:50.123 +1985-04-12T23:20:50 + +1985-04-12 +1985-04-12T23:20Z +1985-04-12T23:20:5Z +1985-04-12T23:20:50.123 ++001985-04-12T23:20:50.123Z +23:20:50.123Z + +1985-04-12T23:20:50.123+00 +1985-04-12T23:20:50.123+00:0 +1985-04-12T23:20:50.123+0:00 +1985-04-12T23:20:50.123 +1985-04-12T23:20:50.123+0000 +1985-04-12T23:20:50.123+00 +1985-04-12T23:20:50.123+ +1985-04-12T23:20:50.123- + +# ISO-8601, but normalizes to a negative time +0000-01-01T00:00:00+01:00 +-000001-12-31T23:00:00.000Z diff --git a/interop-test-files/syntax/datetime_syntax_valid.txt b/interop-test-files/syntax/datetime_syntax_valid.txt new file mode 100644 index 0000000000..f47d539c2f --- /dev/null +++ b/interop-test-files/syntax/datetime_syntax_valid.txt @@ -0,0 +1,40 @@ +# "preferred" +1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.000Z +2000-01-01T00:00:00.000Z +1985-04-12T23:20:50.123456Z +1985-04-12T23:20:50.120Z +1985-04-12T23:20:50.120000Z + +# "supported" +1985-04-12T23:20:50.1235678912345Z +1985-04-12T23:20:50.100Z +1985-04-12T23:20:50Z +1985-04-12T23:20:50.0Z +1985-04-12T23:20:50.123+00:00 +1985-04-12T23:20:50.123-07:00 +1985-04-12T23:20:50.123+07:00 +1985-04-12T23:20:50.123+01:45 +0985-04-12T23:20:50.123-07:00 +1985-04-12T23:20:50.123-07:00 +0123-01-01T00:00:00.000Z + +# various precisions, up through at least 12 digits +1985-04-12T23:20:50.1Z +1985-04-12T23:20:50.12Z +1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.1234Z +1985-04-12T23:20:50.12345Z +1985-04-12T23:20:50.123456Z +1985-04-12T23:20:50.1234567Z +1985-04-12T23:20:50.12345678Z +1985-04-12T23:20:50.123456789Z +1985-04-12T23:20:50.1234567890Z +1985-04-12T23:20:50.12345678901Z +1985-04-12T23:20:50.123456789012Z + +# extreme but currently allowed +0010-12-31T23:00:00.000Z +1000-12-31T23:00:00.000Z +1900-12-31T23:00:00.000Z +3001-12-31T23:00:00.000Z diff --git a/packages/bsky/src/services/indexing/plugins/block.ts b/packages/bsky/src/services/indexing/plugins/block.ts index bf8ae9e502..88e62b6f5a 100644 --- a/packages/bsky/src/services/indexing/plugins/block.ts +++ b/packages/bsky/src/services/indexing/plugins/block.ts @@ -1,6 +1,5 @@ import { Selectable } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { CID } from 'multiformats/cid' import * as Block from '../../../lexicon/types/app/bsky/graph/block' import * as lex from '../../../lexicon/lexicons' @@ -27,7 +26,7 @@ const insertFn = async ( cid: cid.toString(), creator: uri.host, subjectDid: obj.subject, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/indexing/plugins/feed-generator.ts b/packages/bsky/src/services/indexing/plugins/feed-generator.ts index e4ae5eb4f5..be5435966f 100644 --- a/packages/bsky/src/services/indexing/plugins/feed-generator.ts +++ b/packages/bsky/src/services/indexing/plugins/feed-generator.ts @@ -1,6 +1,5 @@ import { Selectable } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { CID } from 'multiformats/cid' import * as FeedGenerator from '../../../lexicon/types/app/bsky/feed/generator' import * as lex from '../../../lexicon/lexicons' @@ -33,7 +32,7 @@ const insertFn = async ( ? JSON.stringify(obj.descriptionFacets) : undefined, avatarCid: obj.avatar?.ref.toString(), - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/indexing/plugins/follow.ts b/packages/bsky/src/services/indexing/plugins/follow.ts index e9a344db2f..8655c7eba7 100644 --- a/packages/bsky/src/services/indexing/plugins/follow.ts +++ b/packages/bsky/src/services/indexing/plugins/follow.ts @@ -1,6 +1,5 @@ import { Selectable } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { CID } from 'multiformats/cid' import * as Follow from '../../../lexicon/types/app/bsky/graph/follow' import * as lex from '../../../lexicon/lexicons' @@ -28,7 +27,7 @@ const insertFn = async ( cid: cid.toString(), creator: uri.host, subjectDid: obj.subject, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/indexing/plugins/like.ts b/packages/bsky/src/services/indexing/plugins/like.ts index 01e0fa5c4f..703800f67c 100644 --- a/packages/bsky/src/services/indexing/plugins/like.ts +++ b/packages/bsky/src/services/indexing/plugins/like.ts @@ -1,6 +1,5 @@ import { Selectable } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { CID } from 'multiformats/cid' import * as Like from '../../../lexicon/types/app/bsky/feed/like' import * as lex from '../../../lexicon/lexicons' @@ -29,7 +28,7 @@ const insertFn = async ( creator: uri.host, subject: obj.subject.uri, subjectCid: obj.subject.cid, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/indexing/plugins/list-block.ts b/packages/bsky/src/services/indexing/plugins/list-block.ts index 33dc7cfc51..3040f1aa3f 100644 --- a/packages/bsky/src/services/indexing/plugins/list-block.ts +++ b/packages/bsky/src/services/indexing/plugins/list-block.ts @@ -1,6 +1,5 @@ import { Selectable } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { CID } from 'multiformats/cid' import * as ListBlock from '../../../lexicon/types/app/bsky/graph/listblock' import * as lex from '../../../lexicon/lexicons' @@ -27,7 +26,7 @@ const insertFn = async ( cid: cid.toString(), creator: uri.host, subjectUri: obj.subject, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/indexing/plugins/list-item.ts b/packages/bsky/src/services/indexing/plugins/list-item.ts index 2ab125062a..9e08145b23 100644 --- a/packages/bsky/src/services/indexing/plugins/list-item.ts +++ b/packages/bsky/src/services/indexing/plugins/list-item.ts @@ -1,6 +1,5 @@ import { Selectable } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { CID } from 'multiformats/cid' import * as ListItem from '../../../lexicon/types/app/bsky/graph/listitem' import * as lex from '../../../lexicon/lexicons' @@ -35,7 +34,7 @@ const insertFn = async ( creator: uri.host, subjectDid: obj.subject, listUri: obj.list, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/indexing/plugins/list.ts b/packages/bsky/src/services/indexing/plugins/list.ts index 293c457c4f..0d07857250 100644 --- a/packages/bsky/src/services/indexing/plugins/list.ts +++ b/packages/bsky/src/services/indexing/plugins/list.ts @@ -1,6 +1,5 @@ import { Selectable } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { CID } from 'multiformats/cid' import * as List from '../../../lexicon/types/app/bsky/graph/list' import * as lex from '../../../lexicon/lexicons' @@ -33,7 +32,7 @@ const insertFn = async ( ? JSON.stringify(obj.descriptionFacets) : undefined, avatarCid: obj.avatar?.ref.toString(), - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/indexing/plugins/post.ts b/packages/bsky/src/services/indexing/plugins/post.ts index 396544b8f2..5f2fca934c 100644 --- a/packages/bsky/src/services/indexing/plugins/post.ts +++ b/packages/bsky/src/services/indexing/plugins/post.ts @@ -1,7 +1,6 @@ import { Insertable, Selectable, sql } from 'kysely' import { CID } from 'multiformats/cid' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { jsonStringToLex } from '@atproto/lexicon' import { Record as PostRecord, @@ -68,7 +67,7 @@ const insertFn = async ( cid: cid.toString(), creator: uri.host, text: obj.text, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), replyRoot: obj.reply?.root?.uri || null, replyRootCid: obj.reply?.root?.cid || null, replyParent: obj.reply?.parent?.uri || null, diff --git a/packages/bsky/src/services/indexing/plugins/repost.ts b/packages/bsky/src/services/indexing/plugins/repost.ts index 9c46b9b337..ea8d517dc5 100644 --- a/packages/bsky/src/services/indexing/plugins/repost.ts +++ b/packages/bsky/src/services/indexing/plugins/repost.ts @@ -1,7 +1,6 @@ import { Selectable } from 'kysely' import { CID } from 'multiformats/cid' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import * as Repost from '../../../lexicon/types/app/bsky/feed/repost' import * as lex from '../../../lexicon/lexicons' import { DatabaseSchema, DatabaseSchemaType } from '../../../db/database-schema' @@ -27,7 +26,7 @@ const insertFn = async ( creator: uri.host, subject: obj.subject.uri, subjectCid: obj.subject.cid, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, } const [inserted] = await Promise.all([ diff --git a/packages/bsky/src/services/indexing/plugins/thread-gate.ts b/packages/bsky/src/services/indexing/plugins/thread-gate.ts index 37f3ddb062..9a58547f2d 100644 --- a/packages/bsky/src/services/indexing/plugins/thread-gate.ts +++ b/packages/bsky/src/services/indexing/plugins/thread-gate.ts @@ -1,6 +1,5 @@ -import { AtUri } from '@atproto/syntax' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { InvalidRequestError } from '@atproto/xrpc-server' -import { toSimplifiedISOSafe } from '@atproto/common' import { CID } from 'multiformats/cid' import * as Threadgate from '../../../lexicon/types/app/bsky/feed/threadgate' import * as lex from '../../../lexicon/lexicons' @@ -33,7 +32,7 @@ const insertFn = async ( cid: cid.toString(), creator: uri.host, postUri: obj.post, - createdAt: toSimplifiedISOSafe(obj.createdAt), + createdAt: normalizeDatetimeAlways(obj.createdAt), indexedAt: timestamp, }) .onConflict((oc) => oc.doNothing()) diff --git a/packages/bsky/src/services/label/index.ts b/packages/bsky/src/services/label/index.ts index 7d351b9501..f44b0439dd 100644 --- a/packages/bsky/src/services/label/index.ts +++ b/packages/bsky/src/services/label/index.ts @@ -1,6 +1,5 @@ import { sql } from 'kysely' -import { AtUri } from '@atproto/syntax' -import { toSimplifiedISOSafe } from '@atproto/common' +import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax' import { Database } from '../../db' import { Label, isSelfLabels } from '../../lexicon/types/com/atproto/label/defs' import { ids } from '../../lexicon/lexicons' @@ -166,7 +165,7 @@ export function getSelfLabels(details: { const src = new AtUri(uri).host // record creator const cts = typeof record.createdAt === 'string' - ? toSimplifiedISOSafe(record.createdAt) + ? normalizeDatetimeAlways(record.createdAt) : new Date(0).toISOString() return record.labels.values.map(({ val }) => { return { src, uri, cid, val, cts, neg: false } diff --git a/packages/lexicon/src/validators/formats.ts b/packages/lexicon/src/validators/formats.ts index 63fc941628..b786c68281 100644 --- a/packages/lexicon/src/validators/formats.ts +++ b/packages/lexicon/src/validators/formats.ts @@ -18,7 +18,7 @@ export function datetime(path: string, value: string): ValidationResult { return { success: false, error: new ValidationError( - `${path} must be an iso8601 formatted datetime`, + `${path} must be an valid atproto datetime (both RFC-3339 and ISO-8601)`, ), } } diff --git a/packages/lexicon/tests/general.test.ts b/packages/lexicon/tests/general.test.ts index 685c99f40e..ca9cb44dc3 100644 --- a/packages/lexicon/tests/general.test.ts +++ b/packages/lexicon/tests/general.test.ts @@ -659,7 +659,9 @@ describe('Record validation', () => { $type: 'com.example.datetime', datetime: 'bad date', }), - ).toThrow('Record/datetime must be an iso8601 formatted datetime') + ).toThrow( + 'Record/datetime must be an valid atproto datetime (both RFC-3339 and ISO-8601)', + ) }) it('Applies uri formatting constraint', () => { diff --git a/packages/pds/src/repo/prepare.ts b/packages/pds/src/repo/prepare.ts index 8820145530..06b1da9599 100644 --- a/packages/pds/src/repo/prepare.ts +++ b/packages/pds/src/repo/prepare.ts @@ -1,9 +1,10 @@ import { CID } from 'multiformats/cid' -import { AtUri } from '@atproto/syntax' +import { AtUri, ensureValidDatetime } from '@atproto/syntax' import { MINUTE, TID, dataToCborBlock } from '@atproto/common' import { LexiconDefNotFoundError, RepoRecord, + ValidationError, lexToIpld, } from '@atproto/lexicon' import { @@ -115,6 +116,7 @@ export const assertValidRecord = (record: Record) => { } try { lex.lexicons.assertValidRecord(record.$type, record) + assertValidCreatedAt(record) } catch (e) { if (e instanceof LexiconDefNotFoundError) { throw new InvalidRecordError(e.message) @@ -127,6 +129,22 @@ export const assertValidRecord = (record: Record) => { } } +// additional more rigorous check on datetimes +// this check will eventually be in the lex sdk, but this will stop the bleed until then +export const assertValidCreatedAt = (record: Record) => { + const createdAt = record['createdAt'] + if (typeof createdAt !== 'string') { + return + } + try { + ensureValidDatetime(createdAt) + } catch { + throw new ValidationError( + 'createdAt must be an valid atproto datetime (both RFC-3339 and ISO-8601)', + ) + } +} + export const setCollectionName = ( collection: string, record: RepoRecord, diff --git a/packages/pds/tests/crud.test.ts b/packages/pds/tests/crud.test.ts index 65544677ff..f8f855ce04 100644 --- a/packages/pds/tests/crud.test.ts +++ b/packages/pds/tests/crud.test.ts @@ -13,7 +13,7 @@ import { defaultFetchHandler } from '@atproto/xrpc' import * as Post from '../src/lexicon/types/app/bsky/feed/post' import { paginateAll } from './_util' import AppContext from '../src/context' -import { ids } from '../src/lexicon/lexicons' +import { ids, lexicons } from '../src/lexicon/lexicons' const alice = { email: 'alice@test.com', @@ -579,6 +579,24 @@ describe('crud operations', () => { ) }) + it('validates datetimes more rigorously than lex sdk', async () => { + const postRecord = { + $type: 'app.bsky.feed.post', + text: 'test', + createdAt: '1985-04-12T23:20:50.123', + } + lexicons.assertValidRecord('app.bsky.feed.post', postRecord) + await expect( + aliceAgent.api.com.atproto.repo.createRecord({ + repo: alice.did, + collection: 'app.bsky.feed.post', + record: postRecord, + }), + ).rejects.toThrow( + 'Invalid app.bsky.feed.post record: createdAt must be an valid atproto datetime (both RFC-3339 and ISO-8601)', + ) + }) + describe('compare-and-swap', () => { let recordCount = 0 // Ensures unique cids const postRecord = () => ({ diff --git a/packages/syntax/src/datetime.ts b/packages/syntax/src/datetime.ts new file mode 100644 index 0000000000..96643271c8 --- /dev/null +++ b/packages/syntax/src/datetime.ts @@ -0,0 +1,112 @@ +/* Validates datetime string against atproto Lexicon 'datetime' format. + * Syntax is described at: https://atproto.com/specs/lexicon#datetime + */ +export const ensureValidDatetime = (dtStr: string): void => { + const date = new Date(dtStr) + // must parse as ISO 8601; this also verifies semantics like month is not 13 or 00 + if (isNaN(date.getTime())) { + throw new InvalidDatetimeError('datetime did not parse as ISO 8601') + } + if (date.toISOString().startsWith('-')) { + throw new InvalidDatetimeError('datetime normalized to a negative time') + } + // regex and other checks for RFC-3339 + if ( + !/^[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9](.[0-9]{1,20})?(Z|([+-][0-2][0-9]:[0-5][0-9]))$/.test( + dtStr, + ) + ) { + throw new InvalidDatetimeError("datetime didn't validate via regex") + } + if (dtStr.length > 64) { + throw new InvalidDatetimeError('datetime is too long (64 chars max)') + } + if (dtStr.endsWith('-00:00')) { + throw new InvalidDatetimeError( + 'datetime can not use "-00:00" for UTC timezone', + ) + } + if (dtStr.startsWith('000')) { + throw new InvalidDatetimeError('datetime so close to year zero not allowed') + } +} + +/* Same logic as ensureValidDatetime(), but returns a boolean instead of throwing an exception. + */ +export const isValidDatetime = (dtStr: string): boolean => { + try { + ensureValidDatetime(dtStr) + } catch (err) { + if (err instanceof InvalidDatetimeError) { + return false + } + throw err + } + + return true +} + +/* Takes a flexible datetime sting and normalizes representation. + * + * This function will work with any valid atproto datetime (eg, anything which isValidDatetime() is true for). It *additinally* is more flexible about accepting datetimes that don't comply to RFC 3339, or are missing timezone information, and normalizing them to a valid datetime. + * + * One use-case is a consistent, sortable string. Another is to work with older invalid createdAt datetimes. + * + * Successful output will be a valid atproto datetime with millisecond precision (3 sub-second digits) and UTC timezone with trailing 'Z' syntax. Throws `InvalidDatetimeError` if the input string could not be parsed as a datetime, even with permissive parsing. + * + * Expected output format: YYYY-MM-DDTHH:mm:ss.sssZ + */ +export const normalizeDatetime = (dtStr: string): string => { + if (isValidDatetime(dtStr)) { + const outStr = new Date(dtStr).toISOString() + if (isValidDatetime(outStr)) { + return outStr + } + } + + // check if this permissive datetime is missing a timezone + if (!/.*(([+-]\d\d:?\d\d)|[a-zA-Z])$/.test(dtStr)) { + const date = new Date(dtStr + 'Z') + if (!isNaN(date.getTime())) { + const tzStr = date.toISOString() + if (isValidDatetime(tzStr)) { + return tzStr + } + } + } + + // finally try parsing as simple datetime + const date = new Date(dtStr) + if (isNaN(date.getTime())) { + throw new InvalidDatetimeError( + 'datetime did not parse as any timestamp format', + ) + } + const isoStr = date.toISOString() + if (isValidDatetime(isoStr)) { + return isoStr + } else { + throw new InvalidDatetimeError( + 'datetime normalized to invalid timestamp string', + ) + } +} + +/* Variant of normalizeDatetime() which always returns a valid datetime strings. + * + * If a InvalidDatetimeError is encountered, returns the UNIX epoch time as a UTC datetime (1970-01-01T00:00:00.000Z). + */ +export const normalizeDatetimeAlways = (dtStr: string): string => { + try { + return normalizeDatetime(dtStr) + } catch (err) { + if (err instanceof InvalidDatetimeError) { + return new Date(0).toISOString() + } + throw err + } +} + +/* Indicates a datetime string did not pass full atproto Lexicon datetime string format checks. + */ +export class InvalidDatetimeError extends Error {} diff --git a/packages/syntax/src/index.ts b/packages/syntax/src/index.ts index 0b056b995a..2a108e5379 100644 --- a/packages/syntax/src/index.ts +++ b/packages/syntax/src/index.ts @@ -2,3 +2,4 @@ export * from './handle' export * from './did' export * from './nsid' export * from './aturi' +export * from './datetime' diff --git a/packages/syntax/tests/datetime.test.ts b/packages/syntax/tests/datetime.test.ts new file mode 100644 index 0000000000..15fdc8dc6e --- /dev/null +++ b/packages/syntax/tests/datetime.test.ts @@ -0,0 +1,118 @@ +import { + isValidDatetime, + ensureValidDatetime, + normalizeDatetime, + normalizeDatetimeAlways, + InvalidDatetimeError, +} from '../src' +import * as readline from 'readline' +import * as fs from 'fs' + +describe('datetime validation', () => { + const expectValid = (h: string) => { + ensureValidDatetime(h) + normalizeDatetime(h) + normalizeDatetimeAlways(h) + } + const expectInvalid = (h: string) => { + expect(() => ensureValidDatetime(h)).toThrow(InvalidDatetimeError) + } + + it('conforms to interop valid datetimes', () => { + const lineReader = readline.createInterface({ + input: fs.createReadStream( + `${__dirname}/interop-files/datetime_syntax_valid.txt`, + ), + terminal: false, + }) + lineReader.on('line', (line) => { + if (line.startsWith('#') || line.length == 0) { + return + } + if (!isValidDatetime(line)) { + console.log(line) + } + expectValid(line) + }) + }) + + it('conforms to interop invalid datetimes', () => { + const lineReader = readline.createInterface({ + input: fs.createReadStream( + `${__dirname}/interop-files/datetime_syntax_invalid.txt`, + ), + terminal: false, + }) + lineReader.on('line', (line) => { + if (line.startsWith('#') || line.length == 0) { + return + } + expectInvalid(line) + }) + }) + + it('conforms to interop invalid parse (semantics) datetimes', () => { + const lineReader = readline.createInterface({ + input: fs.createReadStream( + `${__dirname}/interop-files/datetime_parse_invalid.txt`, + ), + terminal: false, + }) + lineReader.on('line', (line) => { + if (line.startsWith('#') || line.length == 0) { + return + } + expectInvalid(line) + }) + }) +}) + +describe('normalization', () => { + it('normalizes datetimes', () => { + expect(normalizeDatetime('1234-04-12T23:20:50Z')).toEqual( + '1234-04-12T23:20:50.000Z', + ) + expect(normalizeDatetime('1985-04-12T23:20:50Z')).toEqual( + '1985-04-12T23:20:50.000Z', + ) + expect(normalizeDatetime('1985-04-12T23:20:50.123')).toEqual( + '1985-04-12T23:20:50.123Z', + ) + expect(normalizeDatetime('1985-04-12 23:20:50.123')).toEqual( + '1985-04-12T23:20:50.123Z', + ) + expect(normalizeDatetime('1985-04-12T10:20:50.1+01:00')).toEqual( + '1985-04-12T09:20:50.100Z', + ) + expect(normalizeDatetime('Fri, 02 Jan 1999 12:34:56 GMT')).toEqual( + '1999-01-02T12:34:56.000Z', + ) + }) + + it('throws on invalid normalized datetimes', () => { + expect(() => normalizeDatetime('')).toThrow(InvalidDatetimeError) + expect(() => normalizeDatetime('blah')).toThrow(InvalidDatetimeError) + expect(() => normalizeDatetime('1999-19-39T23:20:50.123Z')).toThrow( + InvalidDatetimeError, + ) + expect(() => normalizeDatetime('-000001-12-31T23:00:00.000Z')).toThrow( + InvalidDatetimeError, + ) + expect(() => normalizeDatetime('0000-01-01T00:00:00+01:00')).toThrow( + InvalidDatetimeError, + ) + expect(() => normalizeDatetime('0001-01-01T00:00:00+01:00')).toThrow( + InvalidDatetimeError, + ) + }) + + it('normalizes datetimes always', () => { + expect(normalizeDatetimeAlways('1985-04-12T23:20:50Z')).toEqual( + '1985-04-12T23:20:50.000Z', + ) + expect(normalizeDatetimeAlways('blah')).toEqual('1970-01-01T00:00:00.000Z') + expect(normalizeDatetimeAlways('0000-01-01T00:00:00+01:00')).toEqual( + '1970-01-01T00:00:00.000Z', + ) + }) +}) diff --git a/packages/syntax/tests/interop-files/datetime_parse_invalid.txt b/packages/syntax/tests/interop-files/datetime_parse_invalid.txt new file mode 120000 index 0000000000..ef8782df26 --- /dev/null +++ b/packages/syntax/tests/interop-files/datetime_parse_invalid.txt @@ -0,0 +1 @@ +../../../../interop-test-files/syntax/datetime_parse_invalid.txt \ No newline at end of file diff --git a/packages/syntax/tests/interop-files/datetime_syntax_invalid.txt b/packages/syntax/tests/interop-files/datetime_syntax_invalid.txt new file mode 120000 index 0000000000..948c647c88 --- /dev/null +++ b/packages/syntax/tests/interop-files/datetime_syntax_invalid.txt @@ -0,0 +1 @@ +../../../../interop-test-files/syntax/datetime_syntax_invalid.txt \ No newline at end of file diff --git a/packages/syntax/tests/interop-files/datetime_syntax_valid.txt b/packages/syntax/tests/interop-files/datetime_syntax_valid.txt new file mode 120000 index 0000000000..9c74ded2ed --- /dev/null +++ b/packages/syntax/tests/interop-files/datetime_syntax_valid.txt @@ -0,0 +1 @@ +../../../../interop-test-files/syntax/datetime_syntax_valid.txt \ No newline at end of file