Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Grapheme counting for lex + increase post size #671

Merged
merged 5 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lexicons/app/bsky/feed/post.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"type": "object",
"required": ["text", "createdAt"],
"properties": {
"text": {"type": "string", "maxLength": 256},
"text": {"type": "string", "maxLength": 3000, "maxGraphemes": 300},
"entities": {
"type": "array",
"items": {"type": "ref", "ref": "#entity"}
Expand Down
3 changes: 2 additions & 1 deletion packages/api/src/client/lexicons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3524,7 +3524,8 @@ export const schemaDict = {
properties: {
text: {
type: 'string',
maxLength: 256,
maxLength: 3000,
maxGraphemes: 300,
},
entities: {
type: 'array',
Expand Down
1 change: 1 addition & 0 deletions packages/common/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ export * from './logger'
export * from './types'
export * from './streams'
export * from './times'
export * from './strings'
9 changes: 9 additions & 0 deletions packages/common/src/strings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// counts the number of bytes in a utf8 string
export const utf8Len = (str: string): number => {
return new TextEncoder().encode(str).byteLength
}

// counts the number of graphemes (user-displayed characters) in a string
export const graphemeLen = (str: string): number => {
return [...new Intl.Segmenter().segment(str)].length
}
30 changes: 30 additions & 0 deletions packages/common/tests/strings.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { graphemeLen, utf8Len } from '../src'

describe('string', () => {
it('calculates utf8 string length', () => {
expect(utf8Len('a')).toBe(1)
expect(utf8Len('~')).toBe(1)
expect(utf8Len('ö')).toBe(2)
expect(utf8Len('ñ')).toBe(2)
expect(utf8Len('©')).toBe(2)
expect(utf8Len('⽘')).toBe(3)
expect(utf8Len('☎')).toBe(3)
expect(utf8Len('𓋓')).toBe(4)
expect(utf8Len('😀')).toBe(4)
expect(utf8Len('👨‍👩‍👧‍👧')).toBe(25)
})

it('caluclates grapheme length', () => {
expect(graphemeLen('a')).toBe(1)
expect(graphemeLen('~')).toBe(1)
expect(graphemeLen('ö')).toBe(1)
expect(graphemeLen('ñ')).toBe(1)
expect(graphemeLen('©')).toBe(1)
expect(graphemeLen('⽘')).toBe(1)
expect(graphemeLen('☎')).toBe(1)
expect(graphemeLen('𓋓')).toBe(1)
expect(graphemeLen('😀')).toBe(1)
expect(graphemeLen('👨‍👩‍👧‍👧')).toBe(1)
expect(graphemeLen('a~öñ©⽘☎𓋓😀👨‍👩‍👧‍👧')).toBe(10)
})
})
2 changes: 2 additions & 0 deletions packages/lexicon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
},
"license": "MIT",
"dependencies": {
"@atproto/common": "*",
"@atproto/nsid": "*",
"@atproto/uri": "*",
"iso-datestring-validator": "^2.2.2",
"zod": "^3.14.2"
}
Expand Down
2 changes: 2 additions & 0 deletions packages/lexicon/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ export const lexString = z.object({
default: z.string().optional(),
minLength: z.number().int().optional(),
maxLength: z.number().int().optional(),
minGraphemes: z.number().int().optional(),
maxGraphemes: z.number().int().optional(),
enum: z.string().array().optional(),
const: z.string().optional(),
knownValues: z.string().array().optional(),
Expand Down
29 changes: 27 additions & 2 deletions packages/lexicon/src/validators/primitives.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { AtUri } from '@atproto/uri'
import * as common from '@atproto/common'
import { isValidISODateString } from 'iso-datestring-validator'
import { CID } from 'multiformats/cid'
import { Lexicons } from '../lexicons'
Expand Down Expand Up @@ -224,7 +225,7 @@ export function string(

// maxLength
if (typeof def.maxLength === 'number') {
if ((value as string).length > def.maxLength) {
if (common.utf8Len(value) > def.maxLength) {
return {
success: false,
error: new ValidationError(
Expand All @@ -236,7 +237,7 @@ export function string(

// minLength
if (typeof def.minLength === 'number') {
if ((value as string).length < def.minLength) {
if (common.utf8Len(value) < def.minLength) {
return {
success: false,
error: new ValidationError(
Expand All @@ -246,6 +247,30 @@ export function string(
}
}

// maxGraphemes
if (typeof def.maxGraphemes === 'number') {
if (common.graphemeLen(value) > def.maxGraphemes) {
return {
success: false,
error: new ValidationError(
`${path} must not be longer than ${def.maxGraphemes} graphemes`,
),
}
}
}

// minGraphemes
if (typeof def.minGraphemes === 'number') {
if (common.graphemeLen(value) < def.minGraphemes) {
return {
success: false,
error: new ValidationError(
`${path} must not be shorter than ${def.minGraphemes} graphemes`,
),
}
}
}

if (typeof def.format === 'string') {
switch (def.format) {
case 'datetime':
Expand Down
19 changes: 19 additions & 0 deletions packages/lexicon/tests/_scaffolds/lexicons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,25 @@ export default [
},
},
},
{
lexicon: 1,
id: 'com.example.stringLengthGrapheme',
defs: {
main: {
type: 'record',
record: {
type: 'object',
properties: {
string: {
type: 'string',
minGraphemes: 2,
maxGraphemes: 4,
},
},
},
},
},
},
{
lexicon: 1,
id: 'com.example.stringEnum',
Expand Down
25 changes: 25 additions & 0 deletions packages/lexicon/tests/general.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,31 @@ describe('Record validation', () => {
string: '12345',
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '👨‍👩‍👧‍👧',
}),
).toThrow('Record/string must not be longer than 4 characters')
})

it('Applies grapheme string length constraint', () => {
lex.assertValidRecord('com.example.stringLengthGrapheme', {
$type: 'com.example.stringLengthGrapheme',
string: '12👨‍👩‍👧‍👧',
})
expect(() =>
lex.assertValidRecord('com.example.stringLengthGrapheme', {
$type: 'com.example.stringLengthGrapheme',
string: '👨‍👩‍👧‍👧',
}),
).toThrow('Record/string must not be shorter than 2 graphemes')
expect(() =>
lex.assertValidRecord('com.example.stringLengthGrapheme', {
$type: 'com.example.stringLengthGrapheme',
string: '12345',
}),
).toThrow('Record/string must not be longer than 4 graphemes')
})

it('Applies string enum constraint', () => {
Expand Down
4 changes: 3 additions & 1 deletion packages/lexicon/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
},
"include": ["./src"],
"references": [
{ "path": "../nsid/tsconfig.build.json" }
{ "path": "../common/tsconfig.build.json" },
{ "path": "../nsid/tsconfig.build.json" },
{ "path": "../uri/tsconfig.build.json" }
]
}
3 changes: 2 additions & 1 deletion packages/pds/src/lexicon/lexicons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3524,7 +3524,8 @@ export const schemaDict = {
properties: {
text: {
type: 'string',
maxLength: 256,
maxLength: 3000,
maxGraphemes: 300,
},
entities: {
type: 'array',
Expand Down