diff --git a/CHANGELOG.md b/CHANGELOG.md index 07dc87f..ac09264 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - Test with karma. ### Changed +- **BREAKING**: Use latest [rdf-canon][] N-Quads canonical form. This can + change the canonical output! There is an expanded set of control characters + that are escaped as an `ECHAR` or `UCHAR` instead of using a native + representation. - **BREAKING**: Use `globalThis` to access `crypto` in browsers. Use a polyfill if your environment doesn't support `globalThis`. - Update tooling. diff --git a/lib/NQuads.js b/lib/NQuads.js index 021e39e..e34a215 100644 --- a/lib/NQuads.js +++ b/lib/NQuads.js @@ -381,7 +381,7 @@ function _compareTriples(t1, t2) { ); } -const _stringLiteralEscapeRegex = /["\\\n\r]/g; +const _stringLiteralEscapeRegex = /[\u0000-\u001F\u007F"\\]/g; /** * Escape string to N-Quads literal */ @@ -391,16 +391,25 @@ function _stringLiteralEscape(s) { } return s.replace(_stringLiteralEscapeRegex, function(match) { switch(match) { - case '"': return '\\"'; - case '\\': return '\\\\'; + case '\b': return '\\b'; + case '\t': return '\\t'; case '\n': return '\\n'; + case '\f': return '\\f'; case '\r': return '\\r'; + case '"': return '\\"'; + case '\\': return '\\\\'; + case '\u007F': return '\\u007F'; } + return '\\u' + match + .codePointAt(0) + .toString(16) + .toUpperCase() + .padStart(4, '0'); }); } const _stringLiteralUnescapeRegex = - /(?:\\([tbnrf"'\\]))|(?:\\u([0-9A-Fa-f]{4}))|(?:\\U([0-9A-Fa-f]{8}))/g; + /(?:\\([btnfr"'\\]))|(?:\\u([0-9A-Fa-f]{4}))|(?:\\U([0-9A-Fa-f]{8}))/g; /** * Unescape N-Quads literal to string */ @@ -411,11 +420,11 @@ function _stringLiteralUnescape(s) { return s.replace(_stringLiteralUnescapeRegex, function(match, code, u, U) { if(code) { switch(code) { - case 't': return '\t'; case 'b': return '\b'; + case 't': return '\t'; case 'n': return '\n'; - case 'r': return '\r'; case 'f': return '\f'; + case 'r': return '\r'; case '"': return '"'; case '\'': return '\''; case '\\': return '\\';