diff --git a/README.md b/README.md index 247f6d39..bc6be5df 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ compile-json-stringify date format x 1,086,187 ops/sec ±0.16% (99 runs sampled) - `Long integers` - `Integers` - `Nullable` + - `Large Arrays` - `Security Notice` - `Acknowledgements` - `License` @@ -117,6 +118,8 @@ const stringify = fastJson(mySchema, { - `schema`: external schemas references by $ref property. [More details](#ref) - `ajv`: [ajv v8 instance's settings](https://ajv.js.org/options.html) for those properties that require `ajv`. [More details](#anyof) - `rounding`: setup how the `integer` types will be rounded when not integers. [More details](#integer) +- `largeArrayMechanism`: set the mechanism that should be used to handle large +(by default `20000` or more items) arrays. [More details](#largearrays) @@ -582,6 +585,59 @@ Otherwise, instead of raising an error, null values will be coerced as follows: - `string` -> `""` - `boolean` -> `false` + +#### Large Arrays + +Large arrays are, for the scope of this document, defined as arrays containing, +by default, `20000` elements or more. That value can be adjusted via the option +parameter `largeArraySize`. + +At some point the overhead caused by the default mechanism used by +`fast-json-stringify` to handle arrays starts increasing exponentially, leading +to slow overall executions. + +##### Settings + +In order to improve that the user can set the `largeArrayMechanism` and +`largeArraySize` options. + +`largeArrayMechanism`'s default value is `default`. Valid values for it are: + +- `default` - This option is a compromise between performance and feature set by +still providing the expected functionality out of this lib but giving up some +possible performance gain. With this option set, **large arrays** would be +stringified by joining their stringified elements using `Array.join` instead of +string concatenation for better performance +- `json-stringify` - This option will remove support for schema validation +within **large arrays** completely. By doing so the overhead previously +mentioned is nulled, greatly improving execution time. Mind there's no change +in behavior for arrays not considered _large_ + +`largeArraySize`'s default value is `20000`. Valid values for it are +integer-like values, such as: + +- `20000` +- `2e4` +- `'20000'` +- `'2e4'` - _note this will be converted to `2`, not `20000`_ +- `1.5` - _note this will be converted to `1`_ + +##### Benchmarks + +For reference, here goes some benchmarks for comparison over the three +mechanisms. Benchmarks conducted on an old machine. + +- Machine: `ST1000LM024 HN-M 1TB HDD, Intel Core i7-3610QM @ 2.3GHz, 12GB RAM, 4C/8T`. +- Node.js `v16.13.1` + +``` +JSON.stringify large array x 157 ops/sec ±0.73% (86 runs sampled) +fast-json-stringify large array default x 48.72 ops/sec ±4.92% (48 runs sampled) +fast-json-stringify large array json-stringify x 157 ops/sec ±0.76% (86 runs sampled) +compile-json-stringify large array x 175 ops/sec ±4.47% (79 runs sampled) +AJV Serialize large array x 58.76 ops/sec ±4.59% (60 runs sampled) +``` + ## Security notice diff --git a/bench.js b/bench.js index 2b353297..c51032c0 100644 --- a/bench.js +++ b/bench.js @@ -3,6 +3,10 @@ const benchmark = require('benchmark') const suite = new benchmark.Suite() +const STR_LEN = 1e4 +const LARGE_ARRAY_SIZE = 2e4 +const MULTI_ARRAY_LENGHT = 1e3 + const schema = { title: 'Example Schema', type: 'object', @@ -89,7 +93,8 @@ const obj = { const date = new Date() -const multiArray = [] +const multiArray = new Array(MULTI_ARRAY_LENGHT) +const largeArray = new Array(LARGE_ARRAY_SIZE) const CJS = require('compile-json-stringify') const CJSStringify = CJS(schemaCJS) @@ -99,7 +104,10 @@ const CJSStringifyString = CJS({ type: 'string' }) const FJS = require('.') const stringify = FJS(schema) -const stringifyArray = FJS(arraySchema) +const stringifyArrayDefault = FJS(arraySchema) +const stringifyArrayJSONStringify = FJS(arraySchema, { + largeArrayMechanism: 'json-stringify' +}) const stringifyDate = FJS(dateFormatSchema) const stringifyString = FJS({ type: 'string' }) let str = '' @@ -110,18 +118,48 @@ const ajvSerialize = ajv.compileSerializer(schemaAJVJTD) const ajvSerializeArray = ajv.compileSerializer(arraySchemaAJVJTD) const ajvSerializeString = ajv.compileSerializer({ type: 'string' }) +const getRandomString = (length) => { + if (!Number.isInteger(length)) { + throw new Error('Expected integer length') + } + + const validCharacters = 'abcdefghijklmnopqrstuvwxyz' + const nValidCharacters = 26 + + let result = '' + for (let i = 0; i < length; ++i) { + result += validCharacters[Math.floor(Math.random() * nValidCharacters)] + } + + return result[0].toUpperCase() + result.slice(1) +} + // eslint-disable-next-line -for (var i = 0; i < 10000; i++) { +for (let i = 0; i < STR_LEN; i++) { + largeArray[i] = { + firstName: getRandomString(8), + lastName: getRandomString(6), + age: Math.ceil(Math.random() * 99) + } + str += i if (i % 100 === 0) { str += '"' } } +for (let i = STR_LEN; i < LARGE_ARRAY_SIZE; ++i) { + largeArray[i] = { + firstName: getRandomString(10), + lastName: getRandomString(4), + age: Math.ceil(Math.random() * 99) + } +} + Number(str) -for (i = 0; i < 1000; i++) { - multiArray.push(obj) +for (let i = 0; i < MULTI_ARRAY_LENGHT; i++) { + multiArray[i] = obj } suite.add('FJS creation', function () { @@ -138,8 +176,12 @@ suite.add('JSON.stringify array', function () { JSON.stringify(multiArray) }) -suite.add('fast-json-stringify array', function () { - stringifyArray(multiArray) +suite.add('fast-json-stringify array default', function () { + stringifyArrayDefault(multiArray) +}) + +suite.add('fast-json-stringify array json-stringify', function () { + stringifyArrayJSONStringify(multiArray) }) suite.add('compile-json-stringify array', function () { @@ -150,6 +192,26 @@ suite.add('AJV Serialize array', function () { ajvSerializeArray(multiArray) }) +suite.add('JSON.stringify large array', function () { + JSON.stringify(largeArray) +}) + +suite.add('fast-json-stringify large array default', function () { + stringifyArrayDefault(largeArray) +}) + +suite.add('fast-json-stringify large array json-stringify', function () { + stringifyArrayJSONStringify(largeArray) +}) + +suite.add('compile-json-stringify large array', function () { + CJSStringifyArray(largeArray) +}) + +suite.add('AJV Serialize large array', function () { + ajvSerializeArray(largeArray) +}) + suite.add('JSON.stringify long string', function () { JSON.stringify(str) }) diff --git a/index.js b/index.js index 502f3cf5..436fd7e0 100644 --- a/index.js +++ b/index.js @@ -11,7 +11,14 @@ const fjsCloned = Symbol('fast-json-stringify.cloned') const { randomUUID } = require('crypto') const validate = require('./schema-validator') + +let largeArraySize = 2e4 let stringSimilarity = null +let largeArrayMechanism = 'default' +const validLargeArrayMechanisms = [ + 'default', + 'json-stringify' +] const addComma = ` if (addComma) { @@ -73,6 +80,22 @@ function build (schema, options) { } } + if (options.largeArrayMechanism) { + if (validLargeArrayMechanisms.includes(options.largeArrayMechanism)) { + largeArrayMechanism = options.largeArrayMechanism + } else { + throw new Error(`Unsupported large array mechanism ${options.rounding}`) + } + } + + if (options.largeArraySize) { + if (!Number.isNaN(Number.parseInt(options.largeArraySize, 10))) { + largeArraySize = options.largeArraySize + } else { + throw new Error(`Unsupported large array size. Expected integer-like, got ${options.largeArraySize}`) + } + } + /* eslint no-new-func: "off" */ let code = ` 'use strict' @@ -1029,6 +1052,11 @@ function buildArray (location, code, name, key = null) { code += ` var l = obj.length + if (l && l >= ${largeArraySize}) {` + + const concatSnippet = ` + } + var jsonOutput= '' for (var i = 0; i < l; i++) { var json = '' @@ -1040,7 +1068,25 @@ function buildArray (location, code, name, key = null) { } } return \`[\${jsonOutput}]\` + }` + + switch (largeArrayMechanism) { + case 'default': + code += ` + return \`[\${obj.map(${result.mapFnName}).join(',')}]\`` + break + + case 'json-stringify': + code += ` + return JSON.stringify(obj)` + break + + default: + throw new Error(`Unsupported large array mechanism ${largeArrayMechanism}`) } + + code += ` + ${concatSnippet} ${result.laterCode} ` @@ -1148,22 +1194,27 @@ function nested (laterCode, name, key, location, subKey, isArray) { switch (type) { case 'null': + funcName = '$asNull' code += ` json += $asNull() ` break case 'string': { + funcName = '$asString' const stringSerializer = getStringSerializer(schema.format) code += nullable ? `json += obj${accessor} === null ? null : ${stringSerializer}(obj${accessor})` : `json += ${stringSerializer}(obj${accessor})` break } case 'integer': + funcName = '$asInteger' code += nullable ? `json += obj${accessor} === null ? null : $asInteger(obj${accessor})` : `json += $asInteger(obj${accessor})` break case 'number': + funcName = '$asNumber' code += nullable ? `json += obj${accessor} === null ? null : $asNumber(obj${accessor})` : `json += $asNumber(obj${accessor})` break case 'boolean': + funcName = '$asBoolean' code += nullable ? `json += obj${accessor} === null ? null : $asBoolean(obj${accessor})` : `json += $asBoolean(obj${accessor})` break case 'object': @@ -1181,6 +1232,7 @@ function nested (laterCode, name, key, location, subKey, isArray) { ` break case undefined: + funcName = '$asNull' if ('anyOf' in schema) { // beware: dereferenceOfRefs has side effects and changes schema.anyOf const anyOfLocations = dereferenceOfRefs(location, 'anyOf') @@ -1319,7 +1371,8 @@ function nested (laterCode, name, key, location, subKey, isArray) { return { code, - laterCode + laterCode, + mapFnName: funcName } } @@ -1335,6 +1388,8 @@ function isEmpty (schema) { module.exports = build +module.exports.validLargeArrayMechanisms = validLargeArrayMechanisms + module.exports.restore = function ({ code, ajv }) { // eslint-disable-next-line return (Function.apply(null, ['ajv', code]) diff --git a/test/array.test.js b/test/array.test.js index 92431f88..72bf72a8 100644 --- a/test/array.test.js +++ b/test/array.test.js @@ -5,12 +5,12 @@ const test = require('tap').test const validator = require('is-my-json-valid') const build = require('..') -function buildTest (schema, toStringify) { +function buildTest (schema, toStringify, options) { test(`render a ${schema.title} as JSON`, (t) => { t.plan(3) const validate = validator(schema) - const stringify = build(schema) + const stringify = build(schema, options) const output = stringify(toStringify) t.same(JSON.parse(output), toStringify) @@ -319,3 +319,47 @@ test('object array with anyOf and symbol', (t) => { ]) t.equal(value, '[{"name":"name-0","option":"Foo"},{"name":"name-1","option":"Bar"}]') }) + +const largeArray = new Array(2e4).fill({ a: 'test', b: 1 }) +buildTest({ + title: 'large array with default mechanism', + type: 'object', + properties: { + ids: { + type: 'array', + items: { + type: 'object', + properties: { + a: { type: 'string' }, + b: { type: 'number' } + } + } + } + } +}, { + ids: largeArray +}, { + largeArraySize: 2e4, + largeArrayMechanism: 'default' +}) + +buildTest({ + title: 'large array with json-stringify mechanism', + type: 'object', + properties: { + ids: { + type: 'array', + items: { + type: 'object', + properties: { + a: { type: 'string' }, + b: { type: 'number' } + } + } + } + } +}, { + ids: largeArray +}, { + largeArrayMechanism: 'json-stringify' +})