Skip to content

Commit

Permalink
Optimize N-Quads escape replacement.
Browse files Browse the repository at this point in the history
- Use a pre-computed map of replacement values.
- Performance difference depends on the number of replacements. The
  rdf-canon escaping test showed up to 15% improvement.
  • Loading branch information
davidlehn committed May 3, 2023
1 parent 38cc7eb commit 7bd276a
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 17 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,14 @@
- Node.js using the improved browser algorithm can be ~4-9% faster overall.
- Node.js native `Buffer` conversion can be ~5-12% faster overall.
- Optimize a N-Quads serialization call.
- Optimize N-Quads escape/unescape:
- Optimize N-Quads escape/unescape calling replace:
- Run regex test before doing a replace call.
- Performance difference depends on data and how often escape/unescape would
need to be called. A benchmark test data showed ~3-5% overall improvement.
- Optimize N-Quads escape replacement:
- Use a pre-computed map of replacement values.
- Performance difference depends on the number of replacements. The
[rdf-canon][] escaping test showed up to 15% improvement.

### Fixed
- Disable native lib tests in a browser.
Expand Down
50 changes: 34 additions & 16 deletions lib/NQuads.js
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,25 @@ function _compareTriples(t1, t2) {
}

const _stringLiteralEscapeRegex = /[\u0000-\u001F\u007F"\\]/g;
const _stringLiteralEscapeMap = [];
for(let n = 0; n <= 0x7f; ++n) {
if(_stringLiteralEscapeRegex.test(String.fromCharCode(n))) {
// default UCHAR mapping
_stringLiteralEscapeMap[n] =
'\\u' + n.toString(16).toUpperCase().padStart(4, '0');
// reset regex
_stringLiteralEscapeRegex.lastIndex = 0;
}
}
// special ECHAR mappings
_stringLiteralEscapeMap['\b'.codePointAt(0)] = '\\b';
_stringLiteralEscapeMap['\t'.codePointAt(0)] = '\\t';
_stringLiteralEscapeMap['\n'.codePointAt(0)] = '\\n';
_stringLiteralEscapeMap['\f'.codePointAt(0)] = '\\f';
_stringLiteralEscapeMap['\r'.codePointAt(0)] = '\\r';
_stringLiteralEscapeMap['"' .codePointAt(0)] = '\\"';
_stringLiteralEscapeMap['\\'.codePointAt(0)] = '\\\\';

/**
* Escape string to N-Quads literal
*/
Expand All @@ -390,26 +409,13 @@ function _stringLiteralEscape(s) {
return s;
}
return s.replace(_stringLiteralEscapeRegex, function(match) {
switch(match) {
case '\b': return '\\b';
case '\t': return '\\t';
case '\n': return '\\n';
case '\f': return '\\f';
case '\r': return '\\r';
case '"': return '\\"';
case '\\': return '\\\\';
case '\u007F': return '\\u007F';
}
return '\\u' + match
.codePointAt(0)
.toString(16)
.toUpperCase()
.padStart(4, '0');
return _stringLiteralEscapeMap[match.codePointAt(0)];
});
}

const _stringLiteralUnescapeRegex =
/(?:\\([btnfr"'\\]))|(?:\\u([0-9A-Fa-f]{4}))|(?:\\U([0-9A-Fa-f]{8}))/g;

/**
* Unescape N-Quads literal to string
*/
Expand Down Expand Up @@ -440,6 +446,17 @@ function _stringLiteralUnescape(s) {
}

const _iriEscapeRegex = /[\u0000-\u0020<>"{}|^`\\]/g;
const _iriEscapeRegexMap = [];
for(let n = 0; n <= 0x7f; ++n) {
if(_iriEscapeRegex.test(String.fromCharCode(n))) {
// UCHAR mapping
_iriEscapeRegexMap[n] =
'\\u' + n.toString(16).toUpperCase().padStart(4, '0');
// reset regex
_iriEscapeRegex.lastIndex = 0;
}
}

/**
* Escape IRI to N-Quads IRI
*/
Expand All @@ -448,12 +465,13 @@ function _iriEscape(s) {
return s;
}
return s.replace(_iriEscapeRegex, function(match) {
return '\\u' + match.codePointAt(0).toString(16).padStart(4, '0');
return _iriEscapeRegexMap[match.codePointAt(0)];
});
}

const _iriUnescapeRegex =
/(?:\\u([0-9A-Fa-f]{4}))|(?:\\U([0-9A-Fa-f]{8}))/g;

/**
* Unescape N-Quads IRI to IRI
*/
Expand Down

0 comments on commit 7bd276a

Please sign in to comment.