Skip to content

Commit

Permalink
Refactoring of tokenizer, utils & walker
Browse files Browse the repository at this point in the history
  • Loading branch information
lahmatiy committed Jan 10, 2020
1 parent d679228 commit 3fdaf19
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 172 deletions.
7 changes: 2 additions & 5 deletions lib/lexer/match.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
const hasOwnProperty = Object.prototype.hasOwnProperty;
const matchGraph = require('./match-graph');
const MATCH = matchGraph.MATCH;
const MISMATCH = matchGraph.MISMATCH;
const DISALLOW_EMPTY = matchGraph.DISALLOW_EMPTY;
const TYPE = require('../tokenizer/const').TYPE;
const { MATCH, MISMATCH, DISALLOW_EMPTY } = require('./match-graph');
const { TYPE } = require('../tokenizer/const');

const STUB = 0;
const TOKEN = 1;
Expand Down
40 changes: 20 additions & 20 deletions lib/tokenizer/char-code-definitions.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
var EOF = 0;
const EOF = 0;

// https://drafts.csswg.org/css-syntax-3/
// § 4.2. Definitions
Expand Down Expand Up @@ -194,14 +194,14 @@ function isBOM(code) {
// > name code point
// > A name-start code point, a digit, or U+002D HYPHEN-MINUS (-)
// That means only ASCII code points has a special meaning and we define a maps for 0..127 codes only
var CATEGORY = new Array(0x80);
const CATEGORY = new Array(0x80);
charCodeCategory.Eof = 0x80;
charCodeCategory.WhiteSpace = 0x82;
charCodeCategory.Digit = 0x83;
charCodeCategory.NameStart = 0x84;
charCodeCategory.NonPrintable = 0x85;

for (var i = 0; i < CATEGORY.length; i++) {
for (let i = 0; i < CATEGORY.length; i++) {
switch (true) {
case isWhiteSpace(i):
CATEGORY[i] = charCodeCategory.WhiteSpace;
Expand Down Expand Up @@ -229,21 +229,21 @@ function charCodeCategory(code) {
};

module.exports = {
isDigit: isDigit,
isHexDigit: isHexDigit,
isUppercaseLetter: isUppercaseLetter,
isLowercaseLetter: isLowercaseLetter,
isLetter: isLetter,
isNonAscii: isNonAscii,
isNameStart: isNameStart,
isName: isName,
isNonPrintable: isNonPrintable,
isNewline: isNewline,
isWhiteSpace: isWhiteSpace,
isValidEscape: isValidEscape,
isIdentifierStart: isIdentifierStart,
isNumberStart: isNumberStart,

isBOM: isBOM,
charCodeCategory: charCodeCategory
isDigit,
isHexDigit,
isUppercaseLetter,
isLowercaseLetter,
isLetter,
isNonAscii,
isNameStart,
isName,
isNonPrintable,
isNewline,
isWhiteSpace,
isValidEscape,
isIdentifierStart,
isNumberStart,

isBOM,
charCodeCategory
};
4 changes: 2 additions & 2 deletions lib/tokenizer/const.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// CSS Syntax Module Level 3
// https://www.w3.org/TR/css-syntax-3/
var TYPE = {
const TYPE = {
EOF: 0, // <EOF-token>
Ident: 1, // <ident-token>
Function: 2, // <function-token>
Expand Down Expand Up @@ -30,6 +30,6 @@ var TYPE = {
};

module.exports = {
TYPE: TYPE,
TYPE,
NAME: Object.keys(TYPE)
};
94 changes: 43 additions & 51 deletions lib/tokenizer/index.js
Original file line number Diff line number Diff line change
@@ -1,29 +1,31 @@
var TokenStream = require('../common/TokenStream');
var adoptBuffer = require('../common/adopt-buffer');

var constants = require('./const');
var TYPE = constants.TYPE;

var charCodeDefinitions = require('./char-code-definitions');
var isNewline = charCodeDefinitions.isNewline;
var isName = charCodeDefinitions.isName;
var isValidEscape = charCodeDefinitions.isValidEscape;
var isNumberStart = charCodeDefinitions.isNumberStart;
var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
var charCodeCategory = charCodeDefinitions.charCodeCategory;
var isBOM = charCodeDefinitions.isBOM;

var utils = require('./utils');
var cmpStr = utils.cmpStr;
var getNewlineLength = utils.getNewlineLength;
var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
var consumeEscaped = utils.consumeEscaped;
var consumeName = utils.consumeName;
var consumeNumber = utils.consumeNumber;
var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;

var OFFSET_MASK = 0x00FFFFFF;
var TYPE_SHIFT = 24;
const TokenStream = require('../common/TokenStream');
const adoptBuffer = require('../common/adopt-buffer');

const constants = require('./const');
const charCodeDefinitions = require('./char-code-definitions');
const utils = require('./utils');
const { TYPE } = constants;
const {
isNewline,
isName,
isValidEscape,
isNumberStart,
isIdentifierStart,
charCodeCategory,
isBOM
} = charCodeDefinitions;
const {
cmpStr,
getNewlineLength,
findWhiteSpaceEnd,
consumeEscaped,
consumeName,
consumeNumber,
consumeBadUrlRemnants
} = utils;

const OFFSET_MASK = 0x00FFFFFF;
const TYPE_SHIFT = 24;

function tokenize(source, stream) {
function getCharCode(offset) {
Expand Down Expand Up @@ -111,7 +113,7 @@ function tokenize(source, stream) {

// Repeatedly consume the next input code point from the stream:
for (; offset < source.length; offset++) {
var code = source.charCodeAt(offset);
const code = source.charCodeAt(offset);

switch (charCodeCategory(code)) {
// ending code point
Expand Down Expand Up @@ -143,7 +145,7 @@ function tokenize(source, stream) {
break;
}

var nextCode = getCharCode(offset + 1);
const nextCode = getCharCode(offset + 1);

// Otherwise, if the next input code point is a newline, consume it.
if (isNewline(nextCode)) {
Expand Down Expand Up @@ -176,7 +178,7 @@ function tokenize(source, stream) {

// Repeatedly consume the next input code point from the stream:
for (; offset < source.length; offset++) {
var code = source.charCodeAt(offset);
const code = source.charCodeAt(offset);

switch (charCodeCategory(code)) {
// U+0029 RIGHT PARENTHESIS ())
Expand Down Expand Up @@ -253,21 +255,21 @@ function tokenize(source, stream) {
// ensure source is a string
source = String(source || '');

var sourceLength = source.length;
var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
var balance = adoptBuffer(stream.balance, sourceLength + 1);
var tokenCount = 0;
var start = isBOM(getCharCode(0));
var offset = start;
var balanceCloseType = 0;
var balanceStart = 0;
var balancePrev = 0;
const sourceLength = source.length;
const offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
const balance = adoptBuffer(stream.balance, sourceLength + 1);
const start = isBOM(getCharCode(0));
let offset = start;
let tokenCount = 0;
let balanceCloseType = 0;
let balanceStart = 0;
let balancePrev = 0;
let type;

// https://drafts.csswg.org/css-syntax-3/#consume-token
// § 4.3.1. Consume a token
while (offset < sourceLength) {
var code = source.charCodeAt(offset);
var type = 0;
const code = source.charCodeAt(offset);

balance[tokenCount] = sourceLength;

Expand Down Expand Up @@ -575,17 +577,7 @@ function tokenize(source, stream) {
return stream;
}

// extend tokenizer with constants
Object.keys(constants).forEach(function(key) {
tokenize[key] = constants[key];
});

// extend tokenizer with static methods from utils
Object.keys(charCodeDefinitions).forEach(function(key) {
tokenize[key] = charCodeDefinitions[key];
});
Object.keys(utils).forEach(function(key) {
tokenize[key] = utils[key];
});
Object.assign(tokenize, constants, charCodeDefinitions, utils);

module.exports = tokenize;
53 changes: 27 additions & 26 deletions lib/tokenizer/utils.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
var charCodeDef = require('./char-code-definitions');
var isDigit = charCodeDef.isDigit;
var isHexDigit = charCodeDef.isHexDigit;
var isUppercaseLetter = charCodeDef.isUppercaseLetter;
var isName = charCodeDef.isName;
var isWhiteSpace = charCodeDef.isWhiteSpace;
var isValidEscape = charCodeDef.isValidEscape;
const {
isDigit,
isHexDigit,
isUppercaseLetter,
isName,
isWhiteSpace,
isValidEscape
} = require('./char-code-definitions');

function getCharCode(source, offset) {
return offset < source.length ? source.charCodeAt(offset) : 0;
Expand All @@ -19,7 +20,7 @@ function getNewlineLength(source, offset, code) {
}

function cmpChar(testStr, offset, referenceCode) {
var code = testStr.charCodeAt(offset);
let code = testStr.charCodeAt(offset);

// code.toLowerCase() for A..Z
if (isUppercaseLetter(code)) {
Expand All @@ -38,9 +39,9 @@ function cmpStr(testStr, start, end, referenceStr) {
return false;
}

for (var i = start; i < end; i++) {
var testCode = testStr.charCodeAt(i);
var referenceCode = referenceStr.charCodeAt(i - start);
for (let i = start; i < end; i++) {
const referenceCode = referenceStr.charCodeAt(i - start);
let testCode = testStr.charCodeAt(i);

// testCode.toLowerCase() for A..Z
if (isUppercaseLetter(testCode)) {
Expand Down Expand Up @@ -95,14 +96,14 @@ function consumeEscaped(source, offset) {
if (isHexDigit(getCharCode(source, offset - 1))) {
// Consume as many hex digits as possible, but no more than 5.
// Note that this means 1-6 hex digits have been consumed in total.
for (var maxOffset = Math.min(source.length, offset + 5); offset < maxOffset; offset++) {
for (const maxOffset = Math.min(source.length, offset + 5); offset < maxOffset; offset++) {
if (!isHexDigit(getCharCode(source, offset))) {
break;
}
}

// If the next input code point is whitespace, consume it as well.
var code = getCharCode(source, offset);
const code = getCharCode(source, offset);
if (isWhiteSpace(code)) {
offset += getNewlineLength(source, offset, code);
}
Expand All @@ -119,7 +120,7 @@ function consumeName(source, offset) {
// Let result initially be an empty string.
// Repeatedly consume the next input code point from the stream:
for (; offset < source.length; offset++) {
var code = source.charCodeAt(offset);
const code = source.charCodeAt(offset);

// name code point
if (isName(code)) {
Expand All @@ -144,7 +145,7 @@ function consumeName(source, offset) {

// §4.3.12. Consume a number
function consumeNumber(source, offset) {
var code = source.charCodeAt(offset);
let code = source.charCodeAt(offset);

// 2. If the next input code point is U+002B PLUS SIGN (+) or U+002D HYPHEN-MINUS (-),
// consume it and append it to repr.
Expand Down Expand Up @@ -175,7 +176,7 @@ function consumeNumber(source, offset) {
// 5. If the next 2 or 3 input code points are U+0045 LATIN CAPITAL LETTER E (E)
// or U+0065 LATIN SMALL LETTER E (e), ... , followed by a digit, then:
if (cmpChar(source, offset, 101 /* e */)) {
var sign = 0;
let sign = 0;
code = source.charCodeAt(offset + 1);

// ... optionally followed by U+002D HYPHEN-MINUS (-) or U+002B PLUS SIGN (+) ...
Expand Down Expand Up @@ -206,7 +207,7 @@ function consumeNumber(source, offset) {
function consumeBadUrlRemnants(source, offset) {
// Repeatedly consume the next input code point from the stream:
for (; offset < source.length; offset++) {
var code = source.charCodeAt(offset);
const code = source.charCodeAt(offset);

// U+0029 RIGHT PARENTHESIS ())
// EOF
Expand All @@ -229,15 +230,15 @@ function consumeBadUrlRemnants(source, offset) {
}

module.exports = {
consumeEscaped: consumeEscaped,
consumeName: consumeName,
consumeNumber: consumeNumber,
consumeBadUrlRemnants: consumeBadUrlRemnants,
consumeEscaped,
consumeName,
consumeNumber,
consumeBadUrlRemnants,

cmpChar: cmpChar,
cmpStr: cmpStr,
cmpChar,
cmpStr,

getNewlineLength: getNewlineLength,
findWhiteSpaceStart: findWhiteSpaceStart,
findWhiteSpaceEnd: findWhiteSpaceEnd
getNewlineLength,
findWhiteSpaceStart,
findWhiteSpaceEnd
};
8 changes: 4 additions & 4 deletions lib/utils/clone.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
var List = require('../common/List');
const List = require('../common/List');

module.exports = function clone(node) {
var result = {};
const result = {};

for (var key in node) {
var value = node[key];
for (const key in node) {
let value = node[key];

if (value) {
if (Array.isArray(value) || value instanceof List) {
Expand Down
4 changes: 2 additions & 2 deletions lib/utils/createCustomError.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module.exports = function createCustomError(name, message) {
// use Object.create(), because some VMs prevent setting line/column otherwise
// (iOS Safari 10 even throws an exception)
var error = Object.create(SyntaxError.prototype);
var errorStack = new Error();
const error = Object.create(SyntaxError.prototype);
const errorStack = new Error();

error.name = name;
error.message = message;
Expand Down

0 comments on commit 3fdaf19

Please sign in to comment.