Skip to content

Commit

Permalink
tweak scanner consts and fix exception in node 0.10/0.12
Browse files Browse the repository at this point in the history
  • Loading branch information
lahmatiy committed Jun 21, 2016
1 parent 2bd6a5f commit 8be6f9d
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 83 deletions.
103 changes: 93 additions & 10 deletions lib/parser/const.js
@@ -1,10 +1,25 @@
// token types (note: value shouldn't intersect with using char codes)
var WHITESPACE = 1;
var IDENTIFIER = 2;
var NUMBER = 3;
var STRING = 4;
var COMMENT = 5;
var PUNCTUATOR = 6;

var TAB = 9;
var N = 10;
var F = 12;
var R = 13;
var SPACE = 32;

var TokenType = {
Whitespace: 1,
Identifier: 2,
DecimalNumber: 3,
String: 4,
Comment: 5,
Unknown: 6,
Whitespace: WHITESPACE,
Identifier: IDENTIFIER,
DecimalNumber: NUMBER,
String: STRING,
Comment: COMMENT,
Punctuator: PUNCTUATOR,

ExclamationMark: 33, // !
QuotationMark: 34, // "
NumberSign: 35, // #
Expand Down Expand Up @@ -37,13 +52,81 @@ var TokenType = {
Tilde: 126 // ~
};

var TokenName = {};
var TokenName = Object.keys(TokenType).reduce(function(result, key) {
result[TokenType[key]] = key;
return result;
}, {});

for (var key in TokenType) {
TokenName[TokenType[key]] = key;
var punctuation = [
TokenType.ExclamationMark, // '!'
TokenType.QuotationMark, // '"'
TokenType.NumberSign, // '#'
TokenType.DollarSign, // '$'
TokenType.PercentSign, // '%'
TokenType.Ampersand, // '&'
TokenType.Apostrophe, // '\''
TokenType.LeftParenthesis, // '('
TokenType.RightParenthesis, // ')'
TokenType.Asterisk, // '*'
TokenType.PlusSign, // '+'
TokenType.Comma, // ','
TokenType.HyphenMinus, // '-'
TokenType.FullStop, // '.'
TokenType.Solidus, // '/'
TokenType.Colon, // ':'
TokenType.Semicolon, // ';'
TokenType.LessThanSign, // '<'
TokenType.EqualsSign, // '='
TokenType.GreaterThanSign, // '>'
TokenType.QuestionMark, // '?'
TokenType.CommercialAt, // '@'
TokenType.LeftSquareBracket, // '['
TokenType.RightSquareBracket, // ']'
TokenType.CircumflexAccent, // '^'
TokenType.LeftCurlyBracket, // '{'
TokenType.VerticalLine, // '|'
TokenType.RightCurlyBracket, // '}'
TokenType.Tilde // '~'
];
var SYMBOL_CATEGORY_LENGTH = Math.max.apply(null, punctuation) + 1;
var SYMBOL_CATEGORY = new Uint32Array(SYMBOL_CATEGORY_LENGTH);
var IS_PUNCTUATOR = new Uint32Array(SYMBOL_CATEGORY_LENGTH);

for (var i = 0; i < SYMBOL_CATEGORY.length; i++) {
SYMBOL_CATEGORY[i] = IDENTIFIER;
}

// fill categories
punctuation.forEach(function(key) {
SYMBOL_CATEGORY[Number(key)] = PUNCTUATOR;
IS_PUNCTUATOR[Number(key)] = PUNCTUATOR;
}, SYMBOL_CATEGORY);

IS_PUNCTUATOR[TokenType.HyphenMinus] = 0;
// whitespace is punctuator
IS_PUNCTUATOR[SPACE] = PUNCTUATOR;
IS_PUNCTUATOR[TAB] = PUNCTUATOR;
IS_PUNCTUATOR[N] = PUNCTUATOR;
IS_PUNCTUATOR[R] = PUNCTUATOR;
IS_PUNCTUATOR[F] = PUNCTUATOR;

for (var i = 48; i <= 57; i++) {
SYMBOL_CATEGORY[i] = NUMBER;
}

SYMBOL_CATEGORY[SPACE] = WHITESPACE;
SYMBOL_CATEGORY[TAB] = WHITESPACE;
SYMBOL_CATEGORY[N] = WHITESPACE;
SYMBOL_CATEGORY[R] = WHITESPACE;
SYMBOL_CATEGORY[F] = WHITESPACE;

SYMBOL_CATEGORY[TokenType.Apostrophe] = STRING;
SYMBOL_CATEGORY[TokenType.QuotationMark] = STRING;

module.exports = {
TokenType: TokenType,
TokenName: TokenName
TokenName: TokenName,

SYMBOL_CATEGORY: SYMBOL_CATEGORY,
IS_PUNCTUATOR: IS_PUNCTUATOR
};
84 changes: 11 additions & 73 deletions lib/parser/scanner.js
Expand Up @@ -2,100 +2,38 @@

var TokenType = require('./const').TokenType;
var TokenName = require('./const').TokenName;
var IS_PUNCTUATOR = require('./const').IS_PUNCTUATOR;
var SYMBOL_CATEGORY = require('./const').SYMBOL_CATEGORY;
var SYMBOL_CATEGORY_LENGTH = SYMBOL_CATEGORY.length;
var cmpStr = require('./utils').cmpStr;
var isHex = require('./utils').isHex;

// token types (note: value shouldn't intersect with using char codes)
var NULL = 0;
var WHITESPACE = TokenType.Whitespace;
var IDENTIFIER = TokenType.Identifier;
var NUMBER = TokenType.DecimalNumber;
var STRING = TokenType.String;
var COMMENT = TokenType.Comment;
var PUNCTUATOR = 7;
var PUNCTUATOR = TokenType.Punctuator;

var TAB = 9;
var N = 10;
var F = 12;
var R = 13;
var SPACE = 32;
var DOUBLE_QUOTE = 34;
var QUOTE = 39;
var STAR = 42;
var SLASH = 47;
var BACK_SLASH = 92;
var HYPHEN_MINUS = 45;

var PUNCTUATION = {
33: TokenType.ExclamationMark, // '!'
34: TokenType.QuotationMark, // '"'
35: TokenType.NumberSign, // '#'
36: TokenType.DollarSign, // '$'
37: TokenType.PercentSign, // '%'
38: TokenType.Ampersand, // '&'
39: TokenType.Apostrophe, // '\''
40: TokenType.LeftParenthesis, // '('
41: TokenType.RightParenthesis, // ')'
42: TokenType.Asterisk, // '*'
43: TokenType.PlusSign, // '+'
44: TokenType.Comma, // ','
45: TokenType.HyphenMinus, // '-'
46: TokenType.FullStop, // '.'
47: TokenType.Solidus, // '/'
58: TokenType.Colon, // ':'
59: TokenType.Semicolon, // ';'
60: TokenType.LessThanSign, // '<'
61: TokenType.EqualsSign, // '='
62: TokenType.GreaterThanSign, // '>'
63: TokenType.QuestionMark, // '?'
64: TokenType.CommercialAt, // '@'
91: TokenType.LeftSquareBracket, // '['
93: TokenType.RightSquareBracket, // ']'
94: TokenType.CircumflexAccent, // '^'
123: TokenType.LeftCurlyBracket, // '{'
124: TokenType.VerticalLine, // '|'
125: TokenType.RightCurlyBracket, // '}'
126: TokenType.Tilde // '~'
};
var SYMBOL_CATEGORY_LENGTH = Math.max.apply(null, Object.keys(PUNCTUATION)) + 1;
var SYMBOL_CATEGORY = new Uint32Array(SYMBOL_CATEGORY_LENGTH);
var IS_PUNCTUATOR = new Uint32Array(SYMBOL_CATEGORY_LENGTH);

for (var i = 0; i < SYMBOL_CATEGORY.length; i++) {
SYMBOL_CATEGORY[i] = IDENTIFIER;
}

// fill categories
Object.keys(PUNCTUATION).forEach(function(key) {
SYMBOL_CATEGORY[Number(key)] = PUNCTUATOR;
IS_PUNCTUATOR[Number(key)] = PUNCTUATOR;
}, SYMBOL_CATEGORY);

IS_PUNCTUATOR[HYPHEN_MINUS] = 0;
// whitespace is punctuator
IS_PUNCTUATOR[SPACE] = PUNCTUATOR;
IS_PUNCTUATOR[TAB] = PUNCTUATOR;
IS_PUNCTUATOR[N] = PUNCTUATOR;
IS_PUNCTUATOR[R] = PUNCTUATOR;
IS_PUNCTUATOR[F] = PUNCTUATOR;

for (var i = 48; i <= 57; i++) {
SYMBOL_CATEGORY[i] = NUMBER;
}

SYMBOL_CATEGORY[SPACE] = WHITESPACE;
SYMBOL_CATEGORY[TAB] = WHITESPACE;
SYMBOL_CATEGORY[N] = WHITESPACE;
SYMBOL_CATEGORY[R] = WHITESPACE;
SYMBOL_CATEGORY[F] = WHITESPACE;

SYMBOL_CATEGORY[QUOTE] = STRING;
SYMBOL_CATEGORY[DOUBLE_QUOTE] = STRING;
var ShortArray = typeof Uint8Array !== 'undefined' ? Uint8Array : Array;
var LongArray = typeof Uint32Array !== 'undefined' && typeof Uint32Array.prototype.lastIndexOf === 'function'
? Uint32Array
: Array;

function linesLayout(scanner, source, start) {
var sourceLength = source.length;
var line = scanner.initLine;
var lines = new Uint32Array(source.length + 1);
var lines = new LongArray(source.length + 1);

for (var i = start; i < sourceLength; i++) {
var code = source.charCodeAt(i);
Expand Down Expand Up @@ -221,8 +159,8 @@ function findIdentifierEnd(source, offset) {

function tokenLayout(scanner, source, startPos) {
var sourceLength = source.length;
var offsets = new Uint32Array(sourceLength + 1);
var types = new Uint8Array(sourceLength);
var offsets = new LongArray(sourceLength + 1);
var types = new ShortArray(sourceLength);
var tokenCount = 0;
var start = startPos;
var prev = 0;
Expand Down

0 comments on commit 8be6f9d

Please sign in to comment.