From 43b2dd78a2cb96c810299d40ac86fae911545018 Mon Sep 17 00:00:00 2001 From: Nigro Simone Date: Sat, 27 Sep 2025 13:39:04 +0200 Subject: [PATCH 1/4] fix: isWellFormed --- lib/serializer.js | 6 +++++- test/string.test.js | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/serializer.js b/lib/serializer.js index 0f4dde76..1f2ab201 100644 --- a/lib/serializer.js +++ b/lib/serializer.js @@ -1,5 +1,8 @@ 'use strict' +// eslint-disable-next-line +const ASCII_ESCAPE = /[\u0000-\u001f\u0022\u005c]/ + module.exports = class Serializer { constructor (options) { switch (options && options.rounding) { @@ -89,6 +92,7 @@ module.exports = class Serializer { asString (str) { const len = str.length + if (len === 0) { return '""' } else if (len < 42) { @@ -116,7 +120,7 @@ module.exports = class Serializer { } } return (last === -1 && ('"' + str + '"')) || ('"' + result + str.slice(last) + '"') - } else if (len < 5000 && str.isWellFormed()) { + } else if (len < 5000 && str.isWellFormed() && ASCII_ESCAPE.test(str) === false) { // Only use the regular expression for shorter input. The overhead is otherwise too much. return '"' + str + '"' } else { diff --git a/test/string.test.js b/test/string.test.js index 518513da..0b3a76c8 100644 --- a/test/string.test.js +++ b/test/string.test.js @@ -34,6 +34,21 @@ test('serialize short string', (t) => { t.assert.equal(JSON.parse(output), input) }) +test('serialize medium string', (t) => { + t.plan(2) + + const schema = { + type: 'string' + } + + const input = new Array(150).fill('\x00').join('') + const stringify = build(schema) + const output = stringify(input) + + t.assert.equal(output, `"${new Array(150).fill('\\u0000').join('')}"`) + t.assert.equal(JSON.parse(output), input) +}) + test('serialize long string', (t) => { t.plan(2) From b287ce4c874f5f491b549dab5982eb32703fa534 Mon Sep 17 00:00:00 2001 From: Nigro Simone Date: Sat, 27 Sep 2025 14:53:00 +0200 Subject: [PATCH 2/4] fix: enhance string serialization with well-formed check --- lib/serializer.js | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/serializer.js b/lib/serializer.js index 1f2ab201..8e017a73 100644 --- a/lib/serializer.js +++ b/lib/serializer.js @@ -95,7 +95,13 @@ module.exports = class Serializer { if (len === 0) { return '""' - } else if (len < 42) { + } + + if (!str.isWellFormed()) { + return JSON.stringify(str) + } + + if (len < 42) { // magically escape strings for json // relying on their charCodeAt // everything below 32 needs JSON.stringify() @@ -114,13 +120,13 @@ module.exports = class Serializer { last === -1 && (last = 0) result += str.slice(last, i) + '\\' last = i - } else if (point < 32 || (point >= 0xD800 && point <= 0xDFFF)) { + } else if (point < 32) { // The current character is non-printable characters or a surrogate. return JSON.stringify(str) } } return (last === -1 && ('"' + str + '"')) || ('"' + result + str.slice(last) + '"') - } else if (len < 5000 && str.isWellFormed() && ASCII_ESCAPE.test(str) === false) { + } else if (len < 5000 && ASCII_ESCAPE.test(str) === false) { // Only use the regular expression for shorter input. The overhead is otherwise too much. return '"' + str + '"' } else { From bfc2beab255c701d75a74af6ffee7e0b7d486cdd Mon Sep 17 00:00:00 2001 From: Nigro Simone Date: Sun, 28 Sep 2025 08:43:10 +0200 Subject: [PATCH 3/4] fix: revert to Uzlopak proposal, is more stable and faster on more cases on node 24 --- lib/serializer.js | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lib/serializer.js b/lib/serializer.js index 8e017a73..eadaa074 100644 --- a/lib/serializer.js +++ b/lib/serializer.js @@ -95,13 +95,7 @@ module.exports = class Serializer { if (len === 0) { return '""' - } - - if (!str.isWellFormed()) { - return JSON.stringify(str) - } - - if (len < 42) { + } else if (len < 42) { // magically escape strings for json // relying on their charCodeAt // everything below 32 needs JSON.stringify() @@ -120,13 +114,13 @@ module.exports = class Serializer { last === -1 && (last = 0) result += str.slice(last, i) + '\\' last = i - } else if (point < 32) { + } else if (point < 32 || (point >= 0xd800 && point <= 0xdfff)) { // The current character is non-printable characters or a surrogate. return JSON.stringify(str) } } return (last === -1 && ('"' + str + '"')) || ('"' + result + str.slice(last) + '"') - } else if (len < 5000 && ASCII_ESCAPE.test(str) === false) { + } else if (len < 5000 && str.isWellFormed() && ASCII_ESCAPE.test(str) === false) { // Only use the regular expression for shorter input. The overhead is otherwise too much. return '"' + str + '"' } else { From 2403bdd0ef2be055bb1d6cefae8373cc996c037e Mon Sep 17 00:00:00 2001 From: Nigro Simone Date: Sun, 28 Sep 2025 09:25:37 +0200 Subject: [PATCH 4/4] feat: add empty string into benchmark --- benchmark/bench.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/benchmark/bench.js b/benchmark/bench.js index acb87b94..cd4bda87 100644 --- a/benchmark/bench.js +++ b/benchmark/bench.js @@ -40,6 +40,13 @@ for (let i = 0; i < SHORT_ARRAY_SIZE; i++) { } const benchmarks = [ + { + name: 'empty string', + schema: { + type: 'string' + }, + input: '' + }, { name: 'short string', schema: {