From 8be6f9d4702bcc32de9973c7c1b61baf0135fd02 Mon Sep 17 00:00:00 2001 From: Roman Dvornov Date: Tue, 21 Jun 2016 23:59:17 +0300 Subject: [PATCH] tweak scanner consts and fix exception in node 0.10/0.12 --- lib/parser/const.js | 103 ++++++++++++++++++++++++++++++++++++++---- lib/parser/scanner.js | 84 +++++----------------------------- 2 files changed, 104 insertions(+), 83 deletions(-) diff --git a/lib/parser/const.js b/lib/parser/const.js index 06135ba3..35badaf4 100644 --- a/lib/parser/const.js +++ b/lib/parser/const.js @@ -1,10 +1,25 @@ +// token types (note: value shouldn't intersect with using char codes) +var WHITESPACE = 1; +var IDENTIFIER = 2; +var NUMBER = 3; +var STRING = 4; +var COMMENT = 5; +var PUNCTUATOR = 6; + +var TAB = 9; +var N = 10; +var F = 12; +var R = 13; +var SPACE = 32; + var TokenType = { - Whitespace: 1, - Identifier: 2, - DecimalNumber: 3, - String: 4, - Comment: 5, - Unknown: 6, + Whitespace: WHITESPACE, + Identifier: IDENTIFIER, + DecimalNumber: NUMBER, + String: STRING, + Comment: COMMENT, + Punctuator: PUNCTUATOR, + ExclamationMark: 33, // ! QuotationMark: 34, // " NumberSign: 35, // # @@ -37,13 +52,81 @@ var TokenType = { Tilde: 126 // ~ }; -var TokenName = {}; +var TokenName = Object.keys(TokenType).reduce(function(result, key) { + result[TokenType[key]] = key; + return result; +}, {}); -for (var key in TokenType) { - TokenName[TokenType[key]] = key; +var punctuation = [ + TokenType.ExclamationMark, // '!' + TokenType.QuotationMark, // '"' + TokenType.NumberSign, // '#' + TokenType.DollarSign, // '$' + TokenType.PercentSign, // '%' + TokenType.Ampersand, // '&' + TokenType.Apostrophe, // '\'' + TokenType.LeftParenthesis, // '(' + TokenType.RightParenthesis, // ')' + TokenType.Asterisk, // '*' + TokenType.PlusSign, // '+' + TokenType.Comma, // ',' + TokenType.HyphenMinus, // '-' + TokenType.FullStop, // '.' + TokenType.Solidus, // '/' + TokenType.Colon, // ':' + TokenType.Semicolon, // ';' + TokenType.LessThanSign, // '<' + TokenType.EqualsSign, // '=' + TokenType.GreaterThanSign, // '>' + TokenType.QuestionMark, // '?' + TokenType.CommercialAt, // '@' + TokenType.LeftSquareBracket, // '[' + TokenType.RightSquareBracket, // ']' + TokenType.CircumflexAccent, // '^' + TokenType.LeftCurlyBracket, // '{' + TokenType.VerticalLine, // '|' + TokenType.RightCurlyBracket, // '}' + TokenType.Tilde // '~' +]; +var SYMBOL_CATEGORY_LENGTH = Math.max.apply(null, punctuation) + 1; +var SYMBOL_CATEGORY = new Uint32Array(SYMBOL_CATEGORY_LENGTH); +var IS_PUNCTUATOR = new Uint32Array(SYMBOL_CATEGORY_LENGTH); + +for (var i = 0; i < SYMBOL_CATEGORY.length; i++) { + SYMBOL_CATEGORY[i] = IDENTIFIER; } +// fill categories +punctuation.forEach(function(key) { + SYMBOL_CATEGORY[Number(key)] = PUNCTUATOR; + IS_PUNCTUATOR[Number(key)] = PUNCTUATOR; +}, SYMBOL_CATEGORY); + +IS_PUNCTUATOR[TokenType.HyphenMinus] = 0; +// whitespace is punctuator +IS_PUNCTUATOR[SPACE] = PUNCTUATOR; +IS_PUNCTUATOR[TAB] = PUNCTUATOR; +IS_PUNCTUATOR[N] = PUNCTUATOR; +IS_PUNCTUATOR[R] = PUNCTUATOR; +IS_PUNCTUATOR[F] = PUNCTUATOR; + +for (var i = 48; i <= 57; i++) { + SYMBOL_CATEGORY[i] = NUMBER; +} + +SYMBOL_CATEGORY[SPACE] = WHITESPACE; +SYMBOL_CATEGORY[TAB] = WHITESPACE; +SYMBOL_CATEGORY[N] = WHITESPACE; +SYMBOL_CATEGORY[R] = WHITESPACE; +SYMBOL_CATEGORY[F] = WHITESPACE; + +SYMBOL_CATEGORY[TokenType.Apostrophe] = STRING; +SYMBOL_CATEGORY[TokenType.QuotationMark] = STRING; + module.exports = { TokenType: TokenType, - TokenName: TokenName + TokenName: TokenName, + + SYMBOL_CATEGORY: SYMBOL_CATEGORY, + IS_PUNCTUATOR: IS_PUNCTUATOR }; diff --git a/lib/parser/scanner.js b/lib/parser/scanner.js index 3276ef63..5fff3ef2 100644 --- a/lib/parser/scanner.js +++ b/lib/parser/scanner.js @@ -2,100 +2,38 @@ var TokenType = require('./const').TokenType; var TokenName = require('./const').TokenName; +var IS_PUNCTUATOR = require('./const').IS_PUNCTUATOR; +var SYMBOL_CATEGORY = require('./const').SYMBOL_CATEGORY; +var SYMBOL_CATEGORY_LENGTH = SYMBOL_CATEGORY.length; var cmpStr = require('./utils').cmpStr; var isHex = require('./utils').isHex; -// token types (note: value shouldn't intersect with using char codes) var NULL = 0; var WHITESPACE = TokenType.Whitespace; var IDENTIFIER = TokenType.Identifier; var NUMBER = TokenType.DecimalNumber; var STRING = TokenType.String; var COMMENT = TokenType.Comment; -var PUNCTUATOR = 7; +var PUNCTUATOR = TokenType.Punctuator; var TAB = 9; var N = 10; var F = 12; var R = 13; var SPACE = 32; -var DOUBLE_QUOTE = 34; -var QUOTE = 39; var STAR = 42; var SLASH = 47; var BACK_SLASH = 92; -var HYPHEN_MINUS = 45; - -var PUNCTUATION = { - 33: TokenType.ExclamationMark, // '!' - 34: TokenType.QuotationMark, // '"' - 35: TokenType.NumberSign, // '#' - 36: TokenType.DollarSign, // '$' - 37: TokenType.PercentSign, // '%' - 38: TokenType.Ampersand, // '&' - 39: TokenType.Apostrophe, // '\'' - 40: TokenType.LeftParenthesis, // '(' - 41: TokenType.RightParenthesis, // ')' - 42: TokenType.Asterisk, // '*' - 43: TokenType.PlusSign, // '+' - 44: TokenType.Comma, // ',' - 45: TokenType.HyphenMinus, // '-' - 46: TokenType.FullStop, // '.' - 47: TokenType.Solidus, // '/' - 58: TokenType.Colon, // ':' - 59: TokenType.Semicolon, // ';' - 60: TokenType.LessThanSign, // '<' - 61: TokenType.EqualsSign, // '=' - 62: TokenType.GreaterThanSign, // '>' - 63: TokenType.QuestionMark, // '?' - 64: TokenType.CommercialAt, // '@' - 91: TokenType.LeftSquareBracket, // '[' - 93: TokenType.RightSquareBracket, // ']' - 94: TokenType.CircumflexAccent, // '^' - 123: TokenType.LeftCurlyBracket, // '{' - 124: TokenType.VerticalLine, // '|' - 125: TokenType.RightCurlyBracket, // '}' - 126: TokenType.Tilde // '~' -}; -var SYMBOL_CATEGORY_LENGTH = Math.max.apply(null, Object.keys(PUNCTUATION)) + 1; -var SYMBOL_CATEGORY = new Uint32Array(SYMBOL_CATEGORY_LENGTH); -var IS_PUNCTUATOR = new Uint32Array(SYMBOL_CATEGORY_LENGTH); - -for (var i = 0; i < SYMBOL_CATEGORY.length; i++) { - SYMBOL_CATEGORY[i] = IDENTIFIER; -} - -// fill categories -Object.keys(PUNCTUATION).forEach(function(key) { - SYMBOL_CATEGORY[Number(key)] = PUNCTUATOR; - IS_PUNCTUATOR[Number(key)] = PUNCTUATOR; -}, SYMBOL_CATEGORY); - -IS_PUNCTUATOR[HYPHEN_MINUS] = 0; -// whitespace is punctuator -IS_PUNCTUATOR[SPACE] = PUNCTUATOR; -IS_PUNCTUATOR[TAB] = PUNCTUATOR; -IS_PUNCTUATOR[N] = PUNCTUATOR; -IS_PUNCTUATOR[R] = PUNCTUATOR; -IS_PUNCTUATOR[F] = PUNCTUATOR; - -for (var i = 48; i <= 57; i++) { - SYMBOL_CATEGORY[i] = NUMBER; -} - -SYMBOL_CATEGORY[SPACE] = WHITESPACE; -SYMBOL_CATEGORY[TAB] = WHITESPACE; -SYMBOL_CATEGORY[N] = WHITESPACE; -SYMBOL_CATEGORY[R] = WHITESPACE; -SYMBOL_CATEGORY[F] = WHITESPACE; -SYMBOL_CATEGORY[QUOTE] = STRING; -SYMBOL_CATEGORY[DOUBLE_QUOTE] = STRING; +var ShortArray = typeof Uint8Array !== 'undefined' ? Uint8Array : Array; +var LongArray = typeof Uint32Array !== 'undefined' && typeof Uint32Array.prototype.lastIndexOf === 'function' + ? Uint32Array + : Array; function linesLayout(scanner, source, start) { var sourceLength = source.length; var line = scanner.initLine; - var lines = new Uint32Array(source.length + 1); + var lines = new LongArray(source.length + 1); for (var i = start; i < sourceLength; i++) { var code = source.charCodeAt(i); @@ -221,8 +159,8 @@ function findIdentifierEnd(source, offset) { function tokenLayout(scanner, source, startPos) { var sourceLength = source.length; - var offsets = new Uint32Array(sourceLength + 1); - var types = new Uint8Array(sourceLength); + var offsets = new LongArray(sourceLength + 1); + var types = new ShortArray(sourceLength); var tokenCount = 0; var start = startPos; var prev = 0;