Skip to content

Commit

Permalink
harden datetime verification (#1702)
Browse files Browse the repository at this point in the history
* syntax: add datetime validator (and interop tests)

* syntax: improve datetime normalization

* lexicon: stronger datetime validation (from syntax package)

* syntax: make datetime syntax norm test more flexible

* make fmt

* datetime: docs, normalize and always variant

* bsky replace toSimplifiedISOSafe with normalizeDatetimeAlways

* more rigorous datetime parsing on record creation

* handle negative dates

* syntax: disallow datetimes before year 0010

* syntax: datetime normalization functions validate output

---------

Co-authored-by: dholms <dtholmgren@gmail.com>
  • Loading branch information
bnewbold and dholms committed Dec 1, 2023
1 parent 691511b commit c17971a
Show file tree
Hide file tree
Showing 24 changed files with 413 additions and 37 deletions.
7 changes: 7 additions & 0 deletions interop-test-files/syntax/datetime_parse_invalid.txt
@@ -0,0 +1,7 @@
# superficial syntax parses ok, but are not valid datetimes for semantic reasons (eg, "month zero")
1985-00-12T23:20:50.123Z
1985-04-00T23:20:50.123Z
1985-13-12T23:20:50.123Z
1985-04-12T25:20:50.123Z
1985-04-12T23:99:50.123Z
1985-04-12T23:20:61.123Z
68 changes: 68 additions & 0 deletions interop-test-files/syntax/datetime_syntax_invalid.txt
@@ -0,0 +1,68 @@

# subtle changes to: 1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.123z
01985-04-12T23:20:50.123Z
985-04-12T23:20:50.123Z
1985-04-12T23:20:50.Z
1985-04-32T23;20:50.123Z
1985-04-32T23;20:50.123Z

# en-dash and em-dash
1985—04-32T23;20:50.123Z
1985–04-32T23;20:50.123Z

# whitespace
1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.123Z
1985-04-12T 23:20:50.123Z

# not enough zero padding
1985-4-12T23:20:50.123Z
1985-04-2T23:20:50.123Z
1985-04-12T3:20:50.123Z
1985-04-12T23:0:50.123Z
1985-04-12T23:20:5.123Z

# too much zero padding
01985-04-12T23:20:50.123Z
1985-004-12T23:20:50.123Z
1985-04-012T23:20:50.123Z
1985-04-12T023:20:50.123Z
1985-04-12T23:020:50.123Z
1985-04-12T23:20:050.123Z

# strict capitalization (ISO-8601)
1985-04-12t23:20:50.123Z
1985-04-12T23:20:50.123z

# RFC-3339, but not ISO-8601
1985-04-12T23:20:50.123-00:00
1985-04-12_23:20:50.123Z
1985-04-12 23:20:50.123Z

# ISO-8601, but weird
1985-04-274T23:20:50.123Z

# timezone is required
1985-04-12T23:20:50.123
1985-04-12T23:20:50

1985-04-12
1985-04-12T23:20Z
1985-04-12T23:20:5Z
1985-04-12T23:20:50.123
+001985-04-12T23:20:50.123Z
23:20:50.123Z

1985-04-12T23:20:50.123+00
1985-04-12T23:20:50.123+00:0
1985-04-12T23:20:50.123+0:00
1985-04-12T23:20:50.123
1985-04-12T23:20:50.123+0000
1985-04-12T23:20:50.123+00
1985-04-12T23:20:50.123+
1985-04-12T23:20:50.123-

# ISO-8601, but normalizes to a negative time
0000-01-01T00:00:00+01:00
-000001-12-31T23:00:00.000Z
40 changes: 40 additions & 0 deletions interop-test-files/syntax/datetime_syntax_valid.txt
@@ -0,0 +1,40 @@
# "preferred"
1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.000Z
2000-01-01T00:00:00.000Z
1985-04-12T23:20:50.123456Z
1985-04-12T23:20:50.120Z
1985-04-12T23:20:50.120000Z

# "supported"
1985-04-12T23:20:50.1235678912345Z
1985-04-12T23:20:50.100Z
1985-04-12T23:20:50Z
1985-04-12T23:20:50.0Z
1985-04-12T23:20:50.123+00:00
1985-04-12T23:20:50.123-07:00
1985-04-12T23:20:50.123+07:00
1985-04-12T23:20:50.123+01:45
0985-04-12T23:20:50.123-07:00
1985-04-12T23:20:50.123-07:00
0123-01-01T00:00:00.000Z

# various precisions, up through at least 12 digits
1985-04-12T23:20:50.1Z
1985-04-12T23:20:50.12Z
1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.1234Z
1985-04-12T23:20:50.12345Z
1985-04-12T23:20:50.123456Z
1985-04-12T23:20:50.1234567Z
1985-04-12T23:20:50.12345678Z
1985-04-12T23:20:50.123456789Z
1985-04-12T23:20:50.1234567890Z
1985-04-12T23:20:50.12345678901Z
1985-04-12T23:20:50.123456789012Z

# extreme but currently allowed
0010-12-31T23:00:00.000Z
1000-12-31T23:00:00.000Z
1900-12-31T23:00:00.000Z
3001-12-31T23:00:00.000Z
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/block.ts
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as Block from '../../../lexicon/types/app/bsky/graph/block'
import * as lex from '../../../lexicon/lexicons'
Expand All @@ -27,7 +26,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
subjectDid: obj.subject,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/feed-generator.ts
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as FeedGenerator from '../../../lexicon/types/app/bsky/feed/generator'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -33,7 +32,7 @@ const insertFn = async (
? JSON.stringify(obj.descriptionFacets)
: undefined,
avatarCid: obj.avatar?.ref.toString(),
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/follow.ts
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as Follow from '../../../lexicon/types/app/bsky/graph/follow'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -28,7 +27,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
subjectDid: obj.subject,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/like.ts
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as Like from '../../../lexicon/types/app/bsky/feed/like'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -29,7 +28,7 @@ const insertFn = async (
creator: uri.host,
subject: obj.subject.uri,
subjectCid: obj.subject.cid,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/list-block.ts
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as ListBlock from '../../../lexicon/types/app/bsky/graph/listblock'
import * as lex from '../../../lexicon/lexicons'
Expand All @@ -27,7 +26,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
subjectUri: obj.subject,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/list-item.ts
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as ListItem from '../../../lexicon/types/app/bsky/graph/listitem'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -35,7 +34,7 @@ const insertFn = async (
creator: uri.host,
subjectDid: obj.subject,
listUri: obj.list,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/list.ts
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as List from '../../../lexicon/types/app/bsky/graph/list'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -33,7 +32,7 @@ const insertFn = async (
? JSON.stringify(obj.descriptionFacets)
: undefined,
avatarCid: obj.avatar?.ref.toString(),
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/post.ts
@@ -1,7 +1,6 @@
import { Insertable, Selectable, sql } from 'kysely'
import { CID } from 'multiformats/cid'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { jsonStringToLex } from '@atproto/lexicon'
import {
Record as PostRecord,
Expand Down Expand Up @@ -68,7 +67,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
text: obj.text,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
replyRoot: obj.reply?.root?.uri || null,
replyRootCid: obj.reply?.root?.cid || null,
replyParent: obj.reply?.parent?.uri || null,
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/repost.ts
@@ -1,7 +1,6 @@
import { Selectable } from 'kysely'
import { CID } from 'multiformats/cid'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import * as Repost from '../../../lexicon/types/app/bsky/feed/repost'
import * as lex from '../../../lexicon/lexicons'
import { DatabaseSchema, DatabaseSchemaType } from '../../../db/database-schema'
Expand All @@ -27,7 +26,7 @@ const insertFn = async (
creator: uri.host,
subject: obj.subject.uri,
subjectCid: obj.subject.cid,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
}
const [inserted] = await Promise.all([
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/thread-gate.ts
@@ -1,6 +1,5 @@
import { AtUri } from '@atproto/syntax'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { InvalidRequestError } from '@atproto/xrpc-server'
import { toSimplifiedISOSafe } from '@atproto/common'
import { CID } from 'multiformats/cid'
import * as Threadgate from '../../../lexicon/types/app/bsky/feed/threadgate'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -33,7 +32,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
postUri: obj.post,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/label/index.ts
@@ -1,6 +1,5 @@
import { sql } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { Database } from '../../db'
import { Label, isSelfLabels } from '../../lexicon/types/com/atproto/label/defs'
import { ids } from '../../lexicon/lexicons'
Expand Down Expand Up @@ -166,7 +165,7 @@ export function getSelfLabels(details: {
const src = new AtUri(uri).host // record creator
const cts =
typeof record.createdAt === 'string'
? toSimplifiedISOSafe(record.createdAt)
? normalizeDatetimeAlways(record.createdAt)
: new Date(0).toISOString()
return record.labels.values.map(({ val }) => {
return { src, uri, cid, val, cts, neg: false }
Expand Down
2 changes: 1 addition & 1 deletion packages/lexicon/src/validators/formats.ts
Expand Up @@ -18,7 +18,7 @@ export function datetime(path: string, value: string): ValidationResult {
return {
success: false,
error: new ValidationError(
`${path} must be an iso8601 formatted datetime`,
`${path} must be an valid atproto datetime (both RFC-3339 and ISO-8601)`,
),
}
}
Expand Down
4 changes: 3 additions & 1 deletion packages/lexicon/tests/general.test.ts
Expand Up @@ -659,7 +659,9 @@ describe('Record validation', () => {
$type: 'com.example.datetime',
datetime: 'bad date',
}),
).toThrow('Record/datetime must be an iso8601 formatted datetime')
).toThrow(
'Record/datetime must be an valid atproto datetime (both RFC-3339 and ISO-8601)',
)
})

it('Applies uri formatting constraint', () => {
Expand Down
20 changes: 19 additions & 1 deletion packages/pds/src/repo/prepare.ts
@@ -1,9 +1,10 @@
import { CID } from 'multiformats/cid'
import { AtUri } from '@atproto/syntax'
import { AtUri, ensureValidDatetime } from '@atproto/syntax'
import { MINUTE, TID, dataToCborBlock } from '@atproto/common'
import {
LexiconDefNotFoundError,
RepoRecord,
ValidationError,
lexToIpld,
} from '@atproto/lexicon'
import {
Expand Down Expand Up @@ -115,6 +116,7 @@ export const assertValidRecord = (record: Record<string, unknown>) => {
}
try {
lex.lexicons.assertValidRecord(record.$type, record)
assertValidCreatedAt(record)
} catch (e) {
if (e instanceof LexiconDefNotFoundError) {
throw new InvalidRecordError(e.message)
Expand All @@ -127,6 +129,22 @@ export const assertValidRecord = (record: Record<string, unknown>) => {
}
}

// additional more rigorous check on datetimes
// this check will eventually be in the lex sdk, but this will stop the bleed until then
export const assertValidCreatedAt = (record: Record<string, unknown>) => {
const createdAt = record['createdAt']
if (typeof createdAt !== 'string') {
return
}
try {
ensureValidDatetime(createdAt)
} catch {
throw new ValidationError(
'createdAt must be an valid atproto datetime (both RFC-3339 and ISO-8601)',
)
}
}

export const setCollectionName = (
collection: string,
record: RepoRecord,
Expand Down

0 comments on commit c17971a

Please sign in to comment.