Skip to content

Commit

Permalink
Implement ident decode/encode
Browse files Browse the repository at this point in the history
  • Loading branch information
lahmatiy committed Jan 21, 2020
1 parent b9840cf commit 3355b15
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 19 deletions.
2 changes: 2 additions & 0 deletions lib/syntax/create.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const createWalker = require('../walker/create');
const clone = require('../utils/clone');
const names = require('../utils/names');
const mix = require('./config/mix');
const ident = require('../utils/ident');
const string = require('../utils/string');
const url = require('../utils/url');

Expand All @@ -30,6 +31,7 @@ function createSyntax(config) {
keyword: names.keyword,
property: names.property,
isCustomProperty: names.isCustomProperty,
ident,
string,
url,

Expand Down
105 changes: 105 additions & 0 deletions lib/utils/ident.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
const { consumeEscaped, decodeEscaped } = require('../tokenizer/utils');
const {
isName,
isValidEscape
} = require('../tokenizer/char-code-definitions');

const REVERSE_SOLIDUS = 0x005c; // U+005C REVERSE SOLIDUS (\)

function decode(str) {
const end = str.length - 1;
let decoded = '';

for (let i = 0; i < str.length; i++) {
let code = str.charCodeAt(i);

if (code === REVERSE_SOLIDUS) {
// special case at the ending
if (i === end) {
// if the next input code point is EOF, do nothing
break;
}

code = str.charCodeAt(++i);

// consume escaped
if (isValidEscape(REVERSE_SOLIDUS, code)) {
const escapeStart = i - 1;
const escapeEnd = consumeEscaped(str, escapeStart);

i = escapeEnd - 1;
decoded += decodeEscaped(str.substring(escapeStart + 1, escapeEnd));
} else {
// \r\n
if (code === 0x000d && str.charCodeAt(i + 1) === 0x000a) {
i++;
}
}
} else {
decoded += str[i];
}
}

return decoded;
}

// https://drafts.csswg.org/cssom/#serialize-an-identifier
// § 2.1. Common Serializing Idioms
function encode(str) {
let encoded = '';

// If the character is the first character and is a "-" (U+002D),
// and there is no second character, then the escaped character.
// Note: That's means a single dash string "-" return as escaped dash,
// so move the condition out of the main loop
if (str.length === 1 && str.charCodeAt(0) === 0x002D) {
return '\\-';
}

// To serialize an identifier means to create a string represented
// by the concatenation of, for each character of the identifier:
for (let i = 0; i < str.length; i++) {
let code = str.charCodeAt(i);

// If the character is NULL (U+0000), then the REPLACEMENT CHARACTER (U+FFFD).
if (code === 0x0000) {
encoded += '\uFFFD';
continue;
}

if (
// If the character is in the range [\1-\1f] (U+0001 to U+001F) or is U+007F ...
// Note: Do not compare with 0x0001 since 0x0000 is precessed before
code <= 0x001F || code === 0x007F ||
// [or] ... is in the range [0-9] (U+0030 to U+0039),
(code >= 0x0030 && code <= 0x0039 && (
// If the character is the first character ...
i === 0 ||
// If the character is the second character ... and the first character is a "-" (U+002D)
i === 1 && str.charCodeAt(0) === 0x002D
))
) {
// ... then the character escaped as code point.
encoded += '\\' + code.toString(16) + ' ';
continue;
}

// If the character is not handled by one of the above rules and is greater
// than or equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one
// of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to U+005A),
// or \[a-z] (U+0061 to U+007A), then the character itself.
if (isName(code)) {
encoded += str.charAt(i);
} else {
// Otherwise, the escaped character.
encoded += '\\' + str.charAt(i);
}
}

return encoded;
}

module.exports = {
decode,
encode
};
21 changes: 17 additions & 4 deletions lib/utils/string.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
const { consumeEscaped, decodeEscaped } = require('../tokenizer/utils');
const {
isHexDigit,
isNewline,
isValidEscape
} = require('../tokenizer/char-code-definitions');

const REVERSE_SOLIDUS = 0x005c; // \
const REVERSE_SOLIDUS = 0x005c; // U+005C REVERSE SOLIDUS (\)
const QUOTATION_MARK = 0x0022; // "
const APOSTROPHE = 0x0027; // '

Expand Down Expand Up @@ -62,17 +61,31 @@ function encode(str, apostrophe) {
for (let i = 0; i < str.length; i++) {
let code = str.charCodeAt(i);

if (isNewline(code)) {
// If the character is NULL (U+0000), then the REPLACEMENT CHARACTER (U+FFFD).
if (code === 0x0000) {
encoded += '\uFFFD';
continue;
}

// If the character is in the range [\1-\1f] (U+0001 to U+001F) or is U+007F,
// the character escaped as code point.
// Note: Do not compare with 0x0001 since 0x0000 is precessed before
if (code <= 0x001f || code === 0x007F) {
encoded += '\\' + code.toString(16);
wsBeforeHexIsNeeded = true;
} else if (code === REVERSE_SOLIDUS || code === quoteCode) {
continue;
}

// If the character is '"' (U+0022) or "\" (U+005C), the escaped character.
if (code === quoteCode || code === REVERSE_SOLIDUS) {
encoded += '\\' + str.charAt(i);
wsBeforeHexIsNeeded = false;
} else {
if (wsBeforeHexIsNeeded && isHexDigit(code)) {
encoded += ' ';
}

// Otherwise, the character itself.
encoded += str.charAt(i);
wsBeforeHexIsNeeded = false;
}
Expand Down
31 changes: 21 additions & 10 deletions lib/utils/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ const { consumeEscaped, decodeEscaped } = require('../tokenizer/utils');
const {
isHexDigit,
isWhiteSpace,
isNonPrintable,
isValidEscape
} = require('../tokenizer/char-code-definitions');

const SPACE = 0x0020; // U+0020 SPACE
const REVERSE_SOLIDUS = 0x005c; // \
const REVERSE_SOLIDUS = 0x005c; // U+005C REVERSE SOLIDUS (\)
const QUOTATION_MARK = 0x0022; // "
const APOSTROPHE = 0x0027; // '
const LEFTPARENTHESIS = 0x0028; // U+0028 LEFT PARENTHESIS (()
Expand All @@ -34,7 +33,7 @@ function decode(str) {
// special case at the ending
if (i === end) {
// if the next input code point is EOF, do nothing
// otherwise include last quote as escaped
// otherwise include last left parenthesis as escaped
if (i !== len - 1) {
decoded = str.substr(i + 1);
}
Expand Down Expand Up @@ -71,15 +70,27 @@ function encode(str) {
for (let i = 0; i < str.length; i++) {
let code = str.charCodeAt(i);

if (isNonPrintable(code) || (isWhiteSpace(code) && code !== SPACE)) {
// If the character is NULL (U+0000), then the REPLACEMENT CHARACTER (U+FFFD).
if (code === 0x0000) {
encoded += '\uFFFD';
continue;
}

// If the character is in the range [\1-\1f] (U+0001 to U+001F) or is U+007F,
// the character escaped as code point.
// Note: Do not compare with 0x0001 since 0x0000 is precessed before
if (code <= 0x001f || code === 0x007F) {
encoded += '\\' + code.toString(16);
wsBeforeHexIsNeeded = true;
} else if (code === SPACE ||
code === REVERSE_SOLIDUS ||
code === QUOTATION_MARK ||
code === APOSTROPHE ||
code === LEFTPARENTHESIS ||
code === RIGHTPARENTHESIS) {
continue;
}

if (code === SPACE ||
code === REVERSE_SOLIDUS ||
code === QUOTATION_MARK ||
code === APOSTROPHE ||
code === LEFTPARENTHESIS ||
code === RIGHTPARENTHESIS) {
encoded += '\\' + str.charAt(i);
wsBeforeHexIsNeeded = false;
} else {
Expand Down
118 changes: 113 additions & 5 deletions test/decode-encode.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
const assert = require('assert');
const { string, url } = require('../lib');
const { ident, string, url } = require('../lib');

function forEachTest(tests, func) {
Object.keys(tests).forEach((from, idx) => {
Expand Down Expand Up @@ -51,7 +51,9 @@ describe('decode/encode', () => {
// (30)
'"\\a\\d\\c\\9"': '\n\r\f\t',
'"\\(\\)\\\\"': '()\\',
'"\\\r\\\n\\\r\n"': ''
'"\\\r\\\n\\\r\n"': '',
'"\\"': '"',
'"\\': ''
};

forEachTest(tests, string.decode);
Expand All @@ -67,8 +69,8 @@ describe('decode/encode', () => {
// (5)
'a\rb': '"a\\d b"',
'a\fb': '"a\\c b"',
'a\tb': '"a\tb"',
'a\nbc\n"b\tx': '"a\\a bc\\a\\"b\tx"',
'a\tb': '"a\\9 b"',
'a\nbc\n"b\tx': '"a\\a bc\\a\\"b\\9x"',
'a\\26b': '"a\\\\26b"',
// (10)
'a&b': '"a&b"',
Expand Down Expand Up @@ -105,7 +107,10 @@ describe('decode/encode', () => {
'url(\\abcdefa)': '\ufffda', // is greater than the maximum allowed code point
'url(\\def0)': '\ufffd', // is for a surrogate
'url(\\00abcdef)': '\uabcdef',
'url(\\abcdef1)': '\ufffd1'
// (20)
'url(\\abcdef1)': '\ufffd1',
'url(\\)': ')',
'url(\\': ''
};

forEachTest(tests, url.decode);
Expand Down Expand Up @@ -133,4 +138,107 @@ describe('decode/encode', () => {
forEachTest(tests, url.encode);
});
});

describe('ident', () => {
describe('decode', () => {
const tests = {
'': '',
'foo': 'foo',
'a\\\r\\\n\\\r\nb': 'ab',
'\\21': '!',
'\\021': '!',
// (5)
'\\0021': '!',
'\\00021': '!',
'\\000021': '!',
'\\0000211': '!1',
'\\000021 1': '!1',
// (10)
'\\000021\t1': '!1',
'\\0': '\ufffd',
'\\0x': '\ufffdx',
'\\abcdefa': '\ufffda', // is greater than the maximum allowed code point
'\\def0': '\ufffd', // is for a surrogate
// (15)
'\\00abcdef': '\uabcdef',
'\\abcdef1': '\ufffd1',
'\\': ''
};

forEachTest(tests, ident.decode);
});

describe('encode', () => {
// Adopted tests: https://github.com/mathiasbynens/CSS.escape/blob/master/tests/tests.js
const tests = {
'': '',
'\0': '\uFFFD',
'a\0': 'a\uFFFD',
'\0b': '\uFFFDb',
'a\0b': 'a\uFFFDb',

'\uFFFD': '\uFFFD',
'a\uFFFD': 'a\uFFFD',
'\uFFFDb': '\uFFFDb',
'a\uFFFDb': 'a\uFFFDb',

'\x01\x02\x1E\x1F': '\\1 \\2 \\1e \\1f ',

'0a': '\\30 a',
'1a': '\\31 a',
'2a': '\\32 a',
'3a': '\\33 a',
'4a': '\\34 a',
'5a': '\\35 a',
'6a': '\\36 a',
'7a': '\\37 a',
'8a': '\\38 a',
'9a': '\\39 a',

'a0b': 'a0b',
'a1b': 'a1b',
'a2b': 'a2b',
'a3b': 'a3b',
'a4b': 'a4b',
'a5b': 'a5b',
'a6b': 'a6b',
'a7b': 'a7b',
'a8b': 'a8b',
'a9b': 'a9b',

'-0a': '-\\30 a',
'-1a': '-\\31 a',
'-2a': '-\\32 a',
'-3a': '-\\33 a',
'-4a': '-\\34 a',
'-5a': '-\\35 a',
'-6a': '-\\36 a',
'-7a': '-\\37 a',
'-8a': '-\\38 a',
'-9a': '-\\39 a',

'-': '\\-',
'-a': '-a',
'--': '--',
'--a': '--a',

'\x80\x2D\x5F\xA9': '\x80\x2D\x5F\xA9',
'\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F': '\\7f \x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F',
'\xA0\xA1\xA2': '\xA0\xA1\xA2',
'a0123456789b': 'a0123456789b',
'abcdefghijklmnopqrstuvwxyz': 'abcdefghijklmnopqrstuvwxyz',
'ABCDEFGHIJKLMNOPQRSTUVWXYZ': 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',

'\x20\x21\x78\x79': '\\ \\!xy',

// astral symbol (U+1D306 TETRAGRAM FOR CENTRE)
'\uD834\uDF06': '\uD834\uDF06',
// lone surrogates
'\uDF06': '\uDF06',
'\uD834': '\uD834'
};

forEachTest(tests, ident.encode);
});
});
});

0 comments on commit 3355b15

Please sign in to comment.