Skip to content

Commit

Permalink
Merge 79f1562 into a9a36a5
Browse files Browse the repository at this point in the history
  • Loading branch information
epoberezkin committed Sep 29, 2020
2 parents a9a36a5 + 79f1562 commit b0b1ac5
Show file tree
Hide file tree
Showing 30 changed files with 1,000 additions and 319 deletions.
1 change: 1 addition & 0 deletions .eslintrc.js
Expand Up @@ -19,6 +19,7 @@ module.exports = {
"@typescript-eslint/no-floating-promises": "off",
"@typescript-eslint/no-implied-eval": "off",
"@typescript-eslint/no-invalid-this": "off",
"@typescript-eslint/no-parameter-properties": "off",
"@typescript-eslint/no-unnecessary-condition": "warn",
"@typescript-eslint/no-unsafe-assignment": "off",
"@typescript-eslint/no-unsafe-member-access": "off",
Expand Down
14 changes: 13 additions & 1 deletion docs/api.md
Expand Up @@ -281,7 +281,13 @@ const defaultOptions = {
ownProperties: false,
multipleOfPrecision: false,
messages: true,
code: {es5: false, lines: false},
code: {
es5: false,
lines: false,
source: false,
process: undefined, // (code: string) => string
optimize: true,
},
}
```

Expand Down Expand Up @@ -366,6 +372,12 @@ type CodeOptions = {
process?: (code: string, schema?: SchemaEnv) => string // an optional function to process generated code
// before it is passed to Function constructor.
// It can be used to either beautify or to transpile code.
optimize?: boolean | number // code optimization flag or number of passes, 1 pass by default,
// code optimizations reduce the size of the generated code (bytes, based on the tests) by over 10%,
// the number of code tree nodes by nearly 17%.
// You would almost never need more than one optimization pass, unless you have some really complex schemas -
// the second pass in the tests (it has quite complex schemas) only improves optimization by less than 0.1%.
// See [Code optimization](./codegen.md#code-optimization) for details.
}

type Source = {
Expand Down
19 changes: 18 additions & 1 deletion docs/codegen.md
Expand Up @@ -58,7 +58,24 @@ These methods only accept instances of private class `_Code`, other values will

If a string is used in template literals, it will be wrapped in quotes - the generated code could be invalid, but it prevents the risk of code execution that atacker could pass via untrusted schema as a string value that will be interpolated. Also see the comment in the example.

Currently CodeGen class does safe append-only string concatenation (without any code substitutions that would present risks of malicious code execution). In the next Ajv versions CodeGen class API will allow implementing code optimizations (e.g., removing empty branches and unused variable declarations) without changing the main Ajv code, purely by switching to lightweight syntax trees in the internal class code.
## Code optimization

CodeGen class generates code trees and performs several optimizations before the code is rendered:

1. removes empty and unreachable branches (e.g. `else` branch after `if(true)`, etc.).
2. removes unused variable declarations.
3. replaces variables that are used only once and assigned expressions that are explicitely marked as "constant" (i.e. having referential transparency) with the expressions themselves.

These optimizations assume that the expressions in `if` coditions, `for` loop headers and assignemnts are free of any side effects - this is the case for all pre-defined validation keywords. You can either use the same approach in user-defined keywords, or you may need to disable optimization.

See [these tests](../spec/codegen.spec.ts) for examples.

By default Ajv does 1-pass optimization - based on the test suite it achives 10.5% code size reduction and 16.7% tree nodes reduction (TODO benchmark the validation time). The second optimization pass would only change it marginally, by less than 0.1%, so you won't need it unless you have really complex schemas or if you generate standalone code and want it to pass relevant eslint rules.

Optimization mode can be changed with options:

- `{code: {optimize: false}}` - to disable,
- `{code: {optimize: 2}}` - 2-pass optimization.

## User-defined keywords

Expand Down
28 changes: 18 additions & 10 deletions lib/ajv.ts
Expand Up @@ -118,13 +118,20 @@ interface CurrentOptions {
ownProperties?: boolean
multipleOfPrecision?: boolean | number
messages?: boolean
code?: {
es5?: boolean
lines?: boolean
formats?: Code // code to require (or construct) map of available formats - for standalone code
source?: boolean
process?: (code: string, schema?: SchemaEnv) => string
}
code?: CodeOptions
}

export interface CodeOptions {
es5?: boolean
lines?: boolean
optimize?: boolean | number
formats?: Code // code to require (or construct) map of available formats - for standalone code
source?: boolean
process?: (code: string, schema?: SchemaEnv) => string
}

interface InstanceCodeOptions extends CodeOptions {
optimize: number
}

interface DeprecatedOptions {
Expand Down Expand Up @@ -189,7 +196,6 @@ type RequiredInstanceOptions = {
| "strict"
| "strictTypes"
| "strictTuples"
| "code"
| "inlineRefs"
| "loopRequired"
| "loopEnum"
Expand All @@ -198,18 +204,20 @@ type RequiredInstanceOptions = {
| "addUsedSchema"
| "validateSchema"
| "validateFormats"]: NonNullable<Options[K]>
}
} & {code: InstanceCodeOptions}

export type InstanceOptions = Options & RequiredInstanceOptions

function requiredOptions(o: Options): RequiredInstanceOptions {
const strict = o.strict ?? true
const strictLog = strict ? "log" : false
const _optz = o.code?.optimize
const optimize = _optz === true || _optz === undefined ? 1 : _optz || 0
return {
strict,
strictTypes: o.strictTypes ?? strictLog,
strictTuples: o.strictTuples ?? strictLog,
code: o.code ?? {},
code: o.code ? {...o.code, optimize} : {optimize},
loopRequired: o.loopRequired ?? Infinity,
loopEnum: o.loopEnum ?? Infinity,
meta: o.meta ?? true,
Expand Down
144 changes: 105 additions & 39 deletions lib/compile/codegen/code.ts
@@ -1,78 +1,144 @@
export class _Code {
private readonly _str: string
export abstract class _CodeOrName {
abstract readonly str: string
abstract readonly names: UsedNames
abstract toString(): string
abstract emptyStr(): boolean
}

export const IDENTIFIER = /^[a-z$_][a-z$_0-9]*$/i

export class Name extends _CodeOrName {
readonly str: string
constructor(s: string) {
this._str = s
super()
if (!IDENTIFIER.test(s)) throw new Error("CodeGen: name must be a valid identifier")
this.str = s
}

toString(): string {
return this._str
return this.str
}

emptyStr(): boolean {
return this._str === "" || this._str === '""'
return false
}

get names(): UsedNames {
return {[this.str]: 1}
}
}

export const IDENTIFIER = /^[a-z$_][a-z$_0-9]*$/i
export class _Code extends _CodeOrName {
readonly _items: readonly CodeItem[]
private _str?: string
private _names?: UsedNames

export class Name extends _Code {
constructor(s: string) {
super(s)
if (!IDENTIFIER.test(s)) throw new Error("CodeGen: name must be a valid identifier")
constructor(code: string | readonly CodeItem[]) {
super()
this._items = typeof code === "string" ? [code] : code
}

toString(): string {
return this.str
}

emptyStr(): boolean {
return false
if (this._items.length > 1) return false
const item = this._items[0]
return item === "" || item === '""'
}

get str(): string {
return (this._str ??= this._items.reduce((s: string, c: CodeItem) => `${s}${c}`, ""))
}

get names(): UsedNames {
return (this._names ??= this._items.reduce((names: UsedNames, c) => {
if (c instanceof Name) names[c.str] = (names[c.str] || 0) + 1
return names
}, {}))
}
}

export type CodeItem = Name | string | number | boolean | null

export type UsedNames = Record<string, number | undefined>

export type Code = _Code | Name

export type SafeExpr = Code | number | boolean | null

export const nil = new _Code("")

type TemplateArg = SafeExpr | string | undefined
type CodeArg = SafeExpr | string | undefined

export function _(strs: TemplateStringsArray, ...args: TemplateArg[]): _Code {
return new _Code(strs.reduce((res, s, i) => `${res}${interpolate(args[i - 1])}${s}`))
export function _(strs: TemplateStringsArray, ...args: CodeArg[]): _Code {
const code: CodeItem[] = [strs[0]]
let i = 0
while (i < args.length) {
addCodeArg(code, args[i])
code.push(strs[++i])
}
return new _Code(code)
}

export function str(strs: TemplateStringsArray, ...args: (TemplateArg | string[])[]): _Code {
return new _Code(
strs
.map(safeStringify)
.reduce((res, s, i) => concat(concat(res, interpolateStr(args[i - 1])), s))
)
const plus = new _Code("+")

export function str(strs: TemplateStringsArray, ...args: (CodeArg | string[])[]): _Code {
const expr: CodeItem[] = [safeStringify(strs[0])]
let i = 0
while (i < args.length) {
expr.push(plus)
addCodeArg(expr, args[i])
expr.push(plus, safeStringify(strs[++i]))
}
optimize(expr)
return new _Code(expr)
}

function concat(s: string, a: string | number | boolean | null | undefined): string {
return a === '""'
? s
: s === '""'
? `${a}`
: typeof a != "string"
? `${s.slice(0, -1)}${a}"`
: s.endsWith('"') && a[0] === '"'
? s.slice(0, -1) + a.slice(1)
: `${s} + ${a}`
export function addCodeArg(code: CodeItem[], arg: CodeArg | string[]): void {
if (arg instanceof _Code) code.push(...arg._items)
else if (arg instanceof Name) code.push(arg)
else code.push(interpolate(arg))
}

function optimize(expr: CodeItem[]): void {
let i = 1
while (i < expr.length - 1) {
if (expr[i] === plus) {
const res = mergeExprItems(expr[i - 1], expr[i + 1])
if (res !== undefined) {
expr.splice(i - 1, 3, res)
continue
}
expr[i++] = "+"
}
i++
}
}

function mergeExprItems(a: CodeItem, b: CodeItem): CodeItem | undefined {
if (b === '""') return a
if (a === '""') return b
if (typeof a == "string") {
if (b instanceof Name || a[a.length - 1] !== '"') return
if (typeof b != "string") return `${a.slice(0, -1)}${b}"`
if (b[0] === '"') return a.slice(0, -1) + b.slice(1)
return
}
if (typeof b == "string" && b[0] === '"' && !(a instanceof Name)) return `"${a}${b.slice(1)}`
return
}

export function strConcat(c1: Code, c2: Code): Code {
return c2.emptyStr() ? c1 : c1.emptyStr() ? c2 : str`${c1}${c2}`
}

function interpolate(x: TemplateArg): TemplateArg {
return x instanceof _Code || typeof x == "number" || typeof x == "boolean" || x === null
// TODO do not allow arrays here
function interpolate(x?: string | string[] | number | boolean | null): SafeExpr | string {
return typeof x == "number" || typeof x == "boolean" || x === null
? x
: safeStringify(x)
}

function interpolateStr(x: TemplateArg | string[]): string | number | boolean | null | undefined {
if (Array.isArray(x)) x = x.join(",")
x = interpolate(x)
return x instanceof _Code ? x.toString() : x
: safeStringify(Array.isArray(x) ? x.join(",") : x)
}

export function stringify(x: unknown): Code {
Expand Down

0 comments on commit b0b1ac5

Please sign in to comment.