diff --git a/lib/index.js b/lib/index.js index ef845265..b78bfa8c 100644 --- a/lib/index.js +++ b/lib/index.js @@ -3,6 +3,8 @@ var names = require('./utils/names'); module.exports = { List: require('./utils/list'), + Scanner: require('./parser/scanner'), + syntax: require('./syntax'), property: names.property, keyword: names.keyword, diff --git a/lib/parser/const.js b/lib/parser/const.js index 28c1c0c1..775657b9 100644 --- a/lib/parser/const.js +++ b/lib/parser/const.js @@ -1,10 +1,10 @@ -// token types (note: value shouldn't intersect with using char codes) +// token types (note: value shouldn't intersect with used char codes) var WHITESPACE = 1; var IDENTIFIER = 2; var NUMBER = 3; var STRING = 4; var COMMENT = 5; -var PUNCTUATION = 6; +var PUNCTUATOR = 6; var TAB = 9; var N = 10; @@ -12,121 +12,121 @@ var F = 12; var R = 13; var SPACE = 32; -var TokenType = { +var TYPE = { Whitespace: WHITESPACE, Identifier: IDENTIFIER, Number: NUMBER, String: STRING, Comment: COMMENT, - Punctuation: PUNCTUATION, + Punctuator: PUNCTUATOR, - ExclamationMark: 33, // ! - QuotationMark: 34, // " - NumberSign: 35, // # - DollarSign: 36, // $ - PercentSign: 37, // % - Ampersand: 38, // & - Apostrophe: 39, // ' - LeftParenthesis: 40, // ( - RightParenthesis: 41, // ) - Asterisk: 42, // * - PlusSign: 43, // + - Comma: 44, // , - HyphenMinus: 45, // - - FullStop: 46, // . - Solidus: 47, // / - Colon: 58, // : - Semicolon: 59, // ; - LessThanSign: 60, // < - EqualsSign: 61, // = - GreaterThanSign: 62, // > - QuestionMark: 63, // ? - CommercialAt: 64, // @ - LeftSquareBracket: 91, // [ - RightSquareBracket: 93, // ] - CircumflexAccent: 94, // ^ - LowLine: 95, // _ - LeftCurlyBracket: 123, // { - VerticalLine: 124, // | - RightCurlyBracket: 125, // } - Tilde: 126 // ~ + ExclamationMark: 33, // ! + QuotationMark: 34, // " + NumberSign: 35, // # + DollarSign: 36, // $ + PercentSign: 37, // % + Ampersand: 38, // & + Apostrophe: 39, // ' + LeftParenthesis: 40, // ( + RightParenthesis: 41, // ) + Asterisk: 42, // * + PlusSign: 43, // + + Comma: 44, // , + HyphenMinus: 45, // - + FullStop: 46, // . + Solidus: 47, // / + Colon: 58, // : + Semicolon: 59, // ; + LessThanSign: 60, // < + EqualsSign: 61, // = + GreaterThanSign: 62, // > + QuestionMark: 63, // ? + CommercialAt: 64, // @ + LeftSquareBracket: 91, // [ + RightSquareBracket: 93, // ] + CircumflexAccent: 94, // ^ + LowLine: 95, // _ + LeftCurlyBracket: 123, // { + VerticalLine: 124, // | + RightCurlyBracket: 125, // } + Tilde: 126 // ~ }; -var TokenName = Object.keys(TokenType).reduce(function(result, key) { - result[TokenType[key]] = key; +var NAME = Object.keys(TYPE).reduce(function(result, key) { + result[TYPE[key]] = key; return result; }, {}); -var punctuation = [ - TokenType.ExclamationMark, // '!' - TokenType.QuotationMark, // '"' - TokenType.NumberSign, // '#' - TokenType.DollarSign, // '$' - TokenType.PercentSign, // '%' - TokenType.Ampersand, // '&' - TokenType.Apostrophe, // '\'' - TokenType.LeftParenthesis, // '(' - TokenType.RightParenthesis, // ')' - TokenType.Asterisk, // '*' - TokenType.PlusSign, // '+' - TokenType.Comma, // ',' - TokenType.HyphenMinus, // '-' - TokenType.FullStop, // '.' - TokenType.Solidus, // '/' - TokenType.Colon, // ':' - TokenType.Semicolon, // ';' - TokenType.LessThanSign, // '<' - TokenType.EqualsSign, // '=' - TokenType.GreaterThanSign, // '>' - TokenType.QuestionMark, // '?' - TokenType.CommercialAt, // '@' - TokenType.LeftSquareBracket, // '[' - TokenType.RightSquareBracket, // ']' - TokenType.CircumflexAccent, // '^' - TokenType.LeftCurlyBracket, // '{' - TokenType.VerticalLine, // '|' - TokenType.RightCurlyBracket, // '}' - TokenType.Tilde // '~' -]; -var SYMBOL_CATEGORY_LENGTH = Math.max.apply(null, punctuation) + 1; -var SYMBOL_CATEGORY = new Uint32Array(SYMBOL_CATEGORY_LENGTH); -var IS_PUNCTUATION = new Uint32Array(SYMBOL_CATEGORY_LENGTH); +var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported +var SYMBOL_TYPE = new SafeUint32Array(Math.max.apply(null, Object.keys(NAME).map(Number)) + 1); +var PUNCTUATION = new SafeUint32Array(SYMBOL_TYPE.length); -for (var i = 0; i < SYMBOL_CATEGORY.length; i++) { - SYMBOL_CATEGORY[i] = IDENTIFIER; +for (var i = 0; i < SYMBOL_TYPE.length; i++) { + SYMBOL_TYPE[i] = IDENTIFIER; } // fill categories -punctuation.forEach(function(key) { - SYMBOL_CATEGORY[Number(key)] = PUNCTUATION; - IS_PUNCTUATION[Number(key)] = PUNCTUATION; -}, SYMBOL_CATEGORY); - -IS_PUNCTUATION[TokenType.HyphenMinus] = 0; -// whitespace is punctuator -IS_PUNCTUATION[SPACE] = PUNCTUATION; -IS_PUNCTUATION[TAB] = PUNCTUATION; -IS_PUNCTUATION[N] = PUNCTUATION; -IS_PUNCTUATION[R] = PUNCTUATION; -IS_PUNCTUATION[F] = PUNCTUATION; +[ + TYPE.ExclamationMark, // '!' + TYPE.QuotationMark, // '"' + TYPE.NumberSign, // '#' + TYPE.DollarSign, // '$' + TYPE.PercentSign, // '%' + TYPE.Ampersand, // '&' + TYPE.Apostrophe, // '\'' + TYPE.LeftParenthesis, // '(' + TYPE.RightParenthesis, // ')' + TYPE.Asterisk, // '*' + TYPE.PlusSign, // '+' + TYPE.Comma, // ',' + TYPE.HyphenMinus, // '-' + TYPE.FullStop, // '.' + TYPE.Solidus, // '/' + TYPE.Colon, // ':' + TYPE.Semicolon, // ';' + TYPE.LessThanSign, // '<' + TYPE.EqualsSign, // '=' + TYPE.GreaterThanSign, // '>' + TYPE.QuestionMark, // '?' + TYPE.CommercialAt, // '@' + TYPE.LeftSquareBracket, // '[' + TYPE.RightSquareBracket, // ']' + TYPE.CircumflexAccent, // '^' + TYPE.LeftCurlyBracket, // '{' + TYPE.VerticalLine, // '|' + TYPE.RightCurlyBracket, // '}' + TYPE.Tilde // '~' +].forEach(function(key) { + SYMBOL_TYPE[Number(key)] = PUNCTUATOR; + PUNCTUATION[Number(key)] = PUNCTUATOR; +}, SYMBOL_TYPE); for (var i = 48; i <= 57; i++) { - SYMBOL_CATEGORY[i] = NUMBER; + SYMBOL_TYPE[i] = NUMBER; } -SYMBOL_CATEGORY[SPACE] = WHITESPACE; -SYMBOL_CATEGORY[TAB] = WHITESPACE; -SYMBOL_CATEGORY[N] = WHITESPACE; -SYMBOL_CATEGORY[R] = WHITESPACE; -SYMBOL_CATEGORY[F] = WHITESPACE; +SYMBOL_TYPE[SPACE] = WHITESPACE; +SYMBOL_TYPE[TAB] = WHITESPACE; +SYMBOL_TYPE[N] = WHITESPACE; +SYMBOL_TYPE[R] = WHITESPACE; +SYMBOL_TYPE[F] = WHITESPACE; + +SYMBOL_TYPE[TYPE.Apostrophe] = STRING; +SYMBOL_TYPE[TYPE.QuotationMark] = STRING; -SYMBOL_CATEGORY[TokenType.Apostrophe] = STRING; -SYMBOL_CATEGORY[TokenType.QuotationMark] = STRING; +// whitespace is punctuation ... +PUNCTUATION[SPACE] = PUNCTUATOR; +PUNCTUATION[TAB] = PUNCTUATOR; +PUNCTUATION[N] = PUNCTUATOR; +PUNCTUATION[R] = PUNCTUATOR; +PUNCTUATION[F] = PUNCTUATOR; +// ... hyper minus is not +PUNCTUATION[TYPE.HyphenMinus] = 0; module.exports = { - TokenType: TokenType, - TokenName: TokenName, + TYPE: TYPE, + NAME: NAME, - SYMBOL_CATEGORY: SYMBOL_CATEGORY, - IS_PUNCTUATION: IS_PUNCTUATION + SYMBOL_TYPE: SYMBOL_TYPE, + PUNCTUATION: PUNCTUATION }; diff --git a/lib/parser/index.js b/lib/parser/index.js index 55e8fcfd..90dede54 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -1,49 +1,49 @@ 'use strict'; -var TokenType = require('./const').TokenType; -var Scanner = require('./scanner'); var List = require('../utils/list'); -var cmpChar = require('./utils').cmpChar; -var cmpStr = require('./utils').cmpStr; -var endsWith = require('./utils').endsWith; -var isHex = require('./utils').isHex; +var Scanner = require('./scanner'); + +var scanner = new Scanner(); +var cmpChar = Scanner.cmpChar; +var cmpStr = Scanner.cmpStr; +var endsWith = Scanner.endsWith; +var isHex = Scanner.isHex; var needPositions; var filename; -var scanner; var DESCENDANT_COMBINATOR = {}; var SPACE_NODE = { type: 'Space' }; -var WHITESPACE = TokenType.Whitespace; -var IDENTIFIER = TokenType.Identifier; -var NUMBER = TokenType.Number; -var STRING = TokenType.String; -var COMMENT = TokenType.Comment; -var EXCLAMATIONMARK = TokenType.ExclamationMark; -var NUMBERSIGN = TokenType.NumberSign; -var DOLLARSIGN = TokenType.DollarSign; -var PERCENTSIGN = TokenType.PercentSign; -var LEFTPARENTHESIS = TokenType.LeftParenthesis; -var RIGHTPARENTHESIS = TokenType.RightParenthesis; -var ASTERISK = TokenType.Asterisk; -var PLUSSIGN = TokenType.PlusSign; -var COMMA = TokenType.Comma; -var HYPHENMINUS = TokenType.HyphenMinus; -var FULLSTOP = TokenType.FullStop; -var SOLIDUS = TokenType.Solidus; -var COLON = TokenType.Colon; -var SEMICOLON = TokenType.Semicolon; -var EQUALSSIGN = TokenType.EqualsSign; -var GREATERTHANSIGN = TokenType.GreaterThanSign; -var QUESTIONMARK = TokenType.QuestionMark; -var COMMERCIALAT = TokenType.CommercialAt; -var LEFTSQUAREBRACKET = TokenType.LeftSquareBracket; -var RIGHTSQUAREBRACKET = TokenType.RightSquareBracket; -var CIRCUMFLEXACCENT = TokenType.CircumflexAccent; -var LEFTCURLYBRACKET = TokenType.LeftCurlyBracket; -var VERTICALLINE = TokenType.VerticalLine; -var RIGHTCURLYBRACKET = TokenType.RightCurlyBracket; -var TILDE = TokenType.Tilde; +var WHITESPACE = Scanner.TYPE.Whitespace; +var IDENTIFIER = Scanner.TYPE.Identifier; +var NUMBER = Scanner.TYPE.Number; +var STRING = Scanner.TYPE.String; +var COMMENT = Scanner.TYPE.Comment; +var EXCLAMATIONMARK = Scanner.TYPE.ExclamationMark; +var NUMBERSIGN = Scanner.TYPE.NumberSign; +var DOLLARSIGN = Scanner.TYPE.DollarSign; +var PERCENTSIGN = Scanner.TYPE.PercentSign; +var LEFTPARENTHESIS = Scanner.TYPE.LeftParenthesis; +var RIGHTPARENTHESIS = Scanner.TYPE.RightParenthesis; +var ASTERISK = Scanner.TYPE.Asterisk; +var PLUSSIGN = Scanner.TYPE.PlusSign; +var COMMA = Scanner.TYPE.Comma; +var HYPHENMINUS = Scanner.TYPE.HyphenMinus; +var FULLSTOP = Scanner.TYPE.FullStop; +var SOLIDUS = Scanner.TYPE.Solidus; +var COLON = Scanner.TYPE.Colon; +var SEMICOLON = Scanner.TYPE.Semicolon; +var EQUALSSIGN = Scanner.TYPE.EqualsSign; +var GREATERTHANSIGN = Scanner.TYPE.GreaterThanSign; +var QUESTIONMARK = Scanner.TYPE.QuestionMark; +var COMMERCIALAT = Scanner.TYPE.CommercialAt; +var LEFTSQUAREBRACKET = Scanner.TYPE.LeftSquareBracket; +var RIGHTSQUAREBRACKET = Scanner.TYPE.RightSquareBracket; +var CIRCUMFLEXACCENT = Scanner.TYPE.CircumflexAccent; +var LEFTCURLYBRACKET = Scanner.TYPE.LeftCurlyBracket; +var VERTICALLINE = Scanner.TYPE.VerticalLine; +var RIGHTCURLYBRACKET = Scanner.TYPE.RightCurlyBracket; +var TILDE = Scanner.TYPE.Tilde; var N = 110; // 'n'.charCodeAt(0) var SCOPE_ATRULE_EXPRESSION = { @@ -59,7 +59,7 @@ var SCOPE_VALUE = { var: getVarFunction }; -var initialContext = { +var CONTEXT = { stylesheet: getStylesheet, atrule: getAtrule, atruleExpression: getAtruleExpression, @@ -1729,20 +1729,18 @@ function parse(source, options) { needPositions = Boolean(options.positions); filename = options.filename || ''; - if (!initialContext.hasOwnProperty(context)) { + if (!CONTEXT.hasOwnProperty(context)) { throw new Error('Unknown context `' + context + '`'); } - scanner = new Scanner(source, options.line, options.column); + scanner.setSource(source, options.line, options.column); if (context === 'value') { ast = getValue(false, options.property ? String(options.property) : null); } else { - ast = initialContext[context](); + ast = CONTEXT[context](); } - scanner = null; - // console.log(JSON.stringify(ast, null, 4)); return ast; }; diff --git a/lib/parser/scanner.js b/lib/parser/scanner.js index 2cd5f259..c193982f 100644 --- a/lib/parser/scanner.js +++ b/lib/parser/scanner.js @@ -1,34 +1,40 @@ 'use strict'; var CssSyntaxError = require('./error'); -var TokenType = require('./const').TokenType; -var TokenName = require('./const').TokenName; -var IS_PUNCTUATION = require('./const').IS_PUNCTUATION; -var SYMBOL_CATEGORY = require('./const').SYMBOL_CATEGORY; -var SYMBOL_CATEGORY_LENGTH = SYMBOL_CATEGORY.length; -var cmpStr = require('./utils').cmpStr; -var isHex = require('./utils').isHex; + +var constants = require('./const'); +var TYPE = constants.TYPE; +var NAME = constants.NAME; +var SYMBOL_TYPE = constants.SYMBOL_TYPE; +var SYMBOL_TYPE_LENGTH = SYMBOL_TYPE.length; + +var utils = require('./utils'); +var firstCharOffset = utils.firstCharOffset; +var cmpStr = utils.cmpStr; +var isNumber = utils.isNumber; +var findLastNonSpaceLocation = utils.findLastNonSpaceLocation; +var findWhitespaceEnd = utils.findWhitespaceEnd; +var findCommentEnd = utils.findCommentEnd; +var findStringEnd = utils.findStringEnd; +var findNumberEnd = utils.findNumberEnd; +var findIdentifierEnd = utils.findIdentifierEnd; var NULL = 0; -var WHITESPACE = TokenType.Whitespace; -var IDENTIFIER = TokenType.Identifier; -var NUMBER = TokenType.Number; -var STRING = TokenType.String; -var COMMENT = TokenType.Comment; -var PUNCTUATION = TokenType.Punctuation; - -var TAB = 9; +var WHITESPACE = TYPE.Whitespace; +var IDENTIFIER = TYPE.Identifier; +var NUMBER = TYPE.Number; +var STRING = TYPE.String; +var COMMENT = TYPE.Comment; +var PUNCTUATOR = TYPE.Punctuator; + var N = 10; var F = 12; var R = 13; -var SPACE = 32; var STAR = 42; var SLASH = 47; -var BACK_SLASH = 92; -var FULLSTOP = TokenType.FullStop; -var PLUSSIGN = TokenType.PlusSign; -var HYPHENMINUS = TokenType.HyphenMinus; -var E = 101; // 'e'.charCodeAt(0) +var FULLSTOP = TYPE.FullStop; +var PLUSSIGN = TYPE.PlusSign; +var HYPHENMINUS = TYPE.HyphenMinus; var MIN_BUFFER_SIZE = 16 * 1024; var OFFSET_MASK = 0x00FFFFFF; @@ -36,20 +42,10 @@ var TYPE_OFFSET = 24; var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported var lastIndexOf = Array.prototype.lastIndexOf; // some browser implementations have no TypedArray#lastIndexOf -var offsetAndType = new SafeUint32Array(MIN_BUFFER_SIZE); -var lines = null; - -function firstCharOffset(source) { - return source.charCodeAt(0) === 0xFEFF ? 1 : 0; -} - -function isNumber(code) { - return code >= 48 && code <= 57; -} - function computeLines(scanner, source) { var sourceLength = source.length; var start = firstCharOffset(source); + var lines = scanner.lines; var line = scanner.initLine; if (lines === null || lines.length < sourceLength + 1) { @@ -72,174 +68,31 @@ function computeLines(scanner, source) { lines[i] = line; - return lines; -} - -function findLastNonSpaceLocation(scanner) { - for (var i = scanner.source.length - 1; i >= 0; i--) { - var code = scanner.source.charCodeAt(i); - - if (code !== SPACE && code !== TAB && code !== R && code !== N && code !== F) { - break; - } - } - - return scanner.getLocation(i + 1); -}; - -function isNewline(source, offset, code) { - if (code === N || code === F || code === R) { - if (code === R && offset + 1 < source.length && source.charCodeAt(offset + 1) === N) { - return 2; - } - - return 1; - } - - return 0; -} - -function findWhitespaceEnd(source, offset) { - for (; offset < source.length; offset++) { - var code = source.charCodeAt(offset); - - if (code !== SPACE && code !== TAB && code !== R && code !== N && code !== F) { - break; - } - } - - return offset; -} - -function findCommentEnd(source, offset) { - var commentEnd = source.indexOf('*/', offset); - - if (commentEnd === -1) { - return source.length; - } - - return commentEnd + 2; -} - -function findStringEnd(source, offset, quote) { - for (; offset < source.length; offset++) { - var code = source.charCodeAt(offset); - - // TODO: bad string - if (code === BACK_SLASH) { - offset++; - } else if (code === quote) { - offset++; - break; - } - } - - return offset; -} - -function findDecimalNumberEnd(source, offset) { - for (; offset < source.length; offset++) { - var code = source.charCodeAt(offset); - - if (code < 48 || code > 57) { // not a 0 .. 9 - break; - } - } - - return offset; -} - -function findNumberEnd(source, offset, allowFraction) { - var code; - - offset = findDecimalNumberEnd(source, offset); - - // fraction: .\d+ - if (allowFraction && offset + 1 < source.length && source.charCodeAt(offset) === FULLSTOP) { - code = source.charCodeAt(offset + 1); - - if (isNumber(code)) { - offset = findDecimalNumberEnd(source, offset + 1); - } - } - - // exponent: e[+-]\d+ - if (offset + 1 < source.length) { - if ((source.charCodeAt(offset) | 32) === E) { // case insensitive check for `e` - code = source.charCodeAt(offset + 1); - - if (code === PLUSSIGN || code === HYPHENMINUS) { - if (offset + 2 < source.length) { - code = source.charCodeAt(offset + 2); - } - } - - if (isNumber(code)) { - offset = findDecimalNumberEnd(source, offset + 2); - } - } - } - - return offset; -} - -// skip escaped unicode sequence that can ends with space -// [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])? -function findEscaseEnd(source, offset) { - for (var i = 0; i < 7 && offset + i < source.length; i++) { - var code = source.charCodeAt(offset + i); - - if (i !== 6 && isHex(code)) { - continue; - } - - if (i > 0) { - offset += i - 1 + isNewline(source, offset + i, code); - if (code === SPACE || code === TAB) { - offset++; - } - } - - break; - } - - return offset; -} - -function findIdentifierEnd(source, offset) { - for (; offset < source.length; offset++) { - var code = source.charCodeAt(offset); - - if (code === BACK_SLASH) { - offset = findEscaseEnd(source, offset + 1); - } else if (code < SYMBOL_CATEGORY_LENGTH && IS_PUNCTUATION[code] === PUNCTUATION) { - break; - } - } - - return offset; + scanner.lineComputed = true; + scanner.lines = lines; } function tokenLayout(scanner, source, startPos) { var sourceLength = source.length; + var offsetAndType = scanner.offsetAndType; var tokenCount = 0; var prevType = 0; var offset = startPos; - if (offsetAndType.length < sourceLength + 1) { + if (offsetAndType === null || offsetAndType.length < sourceLength + 1) { offsetAndType = new SafeUint32Array(sourceLength + 1024); } while (offset < sourceLength) { var code = source.charCodeAt(offset); - var type = code < SYMBOL_CATEGORY_LENGTH ? SYMBOL_CATEGORY[code] : IDENTIFIER; + var type = code < SYMBOL_TYPE_LENGTH ? SYMBOL_TYPE[code] : IDENTIFIER; switch (type) { case WHITESPACE: offset = findWhitespaceEnd(source, offset + 1); break; - case PUNCTUATION: + case PUNCTUATOR: if (code === STAR && prevType === SLASH) { // /* type = COMMENT; offset = findCommentEnd(source, offset + 1); @@ -293,26 +146,33 @@ function tokenLayout(scanner, source, startPos) { // var Scanner = function(source, initLine, initColumn) { - var start = firstCharOffset(source); - - this.source = source; - this.initLine = typeof initLine === 'undefined' ? 1 : initLine; - this.initColumn = (typeof initColumn === 'undefined' ? 1 : initColumn) - start; - this.lastLocationLine = this.initLine; - this.lastLocationLineOffset = 1 - this.initColumn; + this.offsetAndType = null; this.lines = null; - this.eof = false; - this.currentToken = -1; - this.tokenType = 0; - this.tokenStart = start; - this.tokenEnd = start; - - tokenLayout(this, source, start); - this.next(); + this.setSource(source || '', initLine, initColumn); }; Scanner.prototype = { + setSource: function(source, initLine, initColumn) { + var start = firstCharOffset(source); + + this.source = source; + this.initLine = typeof initLine === 'undefined' ? 1 : initLine; + this.initColumn = (typeof initColumn === 'undefined' ? 1 : initColumn) - start; + this.lastLocationLine = this.initLine; + this.lastLocationLineOffset = 1 - this.initColumn; + this.lineComputed = false; + + this.eof = false; + this.currentToken = -1; + this.tokenType = 0; + this.tokenStart = start; + this.tokenEnd = start; + + tokenLayout(this, source, start); + this.next(); + }, + lookupType: function(offset) { offset += this.currentToken; @@ -377,7 +237,7 @@ Scanner.prototype = { eat: function(tokenType) { if (this.tokenType !== tokenType) { - this.error(TokenName[tokenType] + ' is expected'); + this.error(NAME[tokenType] + ' is expected'); } this.next(); @@ -391,8 +251,8 @@ Scanner.prototype = { }, getLocation: function(offset, source) { - if (this.lines === null) { - this.lines = computeLines(this, this.source); + if (!this.lineComputed) { + computeLines(this, this.source); } var line = this.lines[offset]; @@ -441,11 +301,21 @@ Scanner.prototype = { getTypes: function() { return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item) { - return TokenName[item >> TYPE_OFFSET]; + return NAME[item >> TYPE_OFFSET]; }); } }; +// extend scanner with constants +Object.keys(constants).forEach(function(key) { + Scanner[key] = constants[key]; +}); + +// extend scanner with static methods from utils +Object.keys(utils).forEach(function(key) { + Scanner[key] = utils[key]; +}); + // warm up tokenizer to elimitate code branches that never execute // fix soft deoptimizations (insufficient type feedback) new Scanner('\n\r\r\n\f//""\'\'/*\r\n\f*/1a;.\\31\t\+2{url(a);+1.2e3 -.4e-5 .6e+7}'); diff --git a/lib/parser/utils.js b/lib/parser/utils.js index dde2c7a5..9e8bf296 100644 --- a/lib/parser/utils.js +++ b/lib/parser/utils.js @@ -1,9 +1,46 @@ +var constants = require('./const'); +var PUNCTUATION = constants.PUNCTUATION; +var SYMBOL_TYPE = constants.SYMBOL_TYPE; +var SYMBOL_TYPE_LENGTH = SYMBOL_TYPE.length; +var TYPE = constants.TYPE; +var FULLSTOP = TYPE.FullStop; +var PLUSSIGN = TYPE.PlusSign; +var HYPHENMINUS = TYPE.HyphenMinus; +var PUNCTUATOR = TYPE.Punctuator; +var TAB = 9; +var N = 10; +var F = 12; +var R = 13; +var SPACE = 32; +var BACK_SLASH = 92; +var E = 101; // 'e'.charCodeAt(0) + +function firstCharOffset(source) { + return source.charCodeAt(0) === 0xFEFF ? 1 : 0; +} + function isHex(code) { return (code >= 48 && code <= 57) || // 0 .. 9 (code >= 65 && code <= 70) || // A .. F (code >= 97 && code <= 102); // a .. f } +function isNumber(code) { + return code >= 48 && code <= 57; +} + +function isNewline(source, offset, code) { + if (code === N || code === F || code === R) { + if (code === R && offset + 1 < source.length && source.charCodeAt(offset + 1) === N) { + return 2; + } + + return 1; + } + + return 0; +} + function cmpChar(testStr, offset, referenceCode) { var code = testStr.charCodeAt(offset); @@ -45,9 +82,156 @@ function endsWith(testStr, referenceStr) { return cmpStr(testStr, testStr.length - referenceStr.length, testStr.length, referenceStr); } +function findLastNonSpaceLocation(scanner) { + for (var i = scanner.source.length - 1; i >= 0; i--) { + var code = scanner.source.charCodeAt(i); + + if (code !== SPACE && code !== TAB && code !== R && code !== N && code !== F) { + break; + } + } + + return scanner.getLocation(i + 1); +} + +function findWhitespaceEnd(source, offset) { + for (; offset < source.length; offset++) { + var code = source.charCodeAt(offset); + + if (code !== SPACE && code !== TAB && code !== R && code !== N && code !== F) { + break; + } + } + + return offset; +} + +function findCommentEnd(source, offset) { + var commentEnd = source.indexOf('*/', offset); + + if (commentEnd === -1) { + return source.length; + } + + return commentEnd + 2; +} + +function findStringEnd(source, offset, quote) { + for (; offset < source.length; offset++) { + var code = source.charCodeAt(offset); + + // TODO: bad string + if (code === BACK_SLASH) { + offset++; + } else if (code === quote) { + offset++; + break; + } + } + + return offset; +} + +function findDecimalNumberEnd(source, offset) { + for (; offset < source.length; offset++) { + var code = source.charCodeAt(offset); + + if (code < 48 || code > 57) { // not a 0 .. 9 + break; + } + } + + return offset; +} + +function findNumberEnd(source, offset, allowFraction) { + var code; + + offset = findDecimalNumberEnd(source, offset); + + // fraction: .\d+ + if (allowFraction && offset + 1 < source.length && source.charCodeAt(offset) === FULLSTOP) { + code = source.charCodeAt(offset + 1); + + if (isNumber(code)) { + offset = findDecimalNumberEnd(source, offset + 1); + } + } + + // exponent: e[+-]\d+ + if (offset + 1 < source.length) { + if ((source.charCodeAt(offset) | 32) === E) { // case insensitive check for `e` + code = source.charCodeAt(offset + 1); + + if (code === PLUSSIGN || code === HYPHENMINUS) { + if (offset + 2 < source.length) { + code = source.charCodeAt(offset + 2); + } + } + + if (isNumber(code)) { + offset = findDecimalNumberEnd(source, offset + 2); + } + } + } + + return offset; +} + +// skip escaped unicode sequence that can ends with space +// [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])? +function findEscaseEnd(source, offset) { + for (var i = 0; i < 7 && offset + i < source.length; i++) { + var code = source.charCodeAt(offset + i); + + if (i !== 6 && isHex(code)) { + continue; + } + + if (i > 0) { + offset += i - 1 + isNewline(source, offset + i, code); + if (code === SPACE || code === TAB) { + offset++; + } + } + + break; + } + + return offset; +} + +function findIdentifierEnd(source, offset) { + for (; offset < source.length; offset++) { + var code = source.charCodeAt(offset); + + if (code === BACK_SLASH) { + offset = findEscaseEnd(source, offset + 1); + } else if (code < SYMBOL_TYPE_LENGTH && PUNCTUATION[code] === PUNCTUATOR) { + break; + } + } + + return offset; +} + module.exports = { + firstCharOffset: firstCharOffset, + isHex: isHex, + isNumber: isNumber, + isNewline: isNewline, + cmpChar: cmpChar, cmpStr: cmpStr, - endsWith: endsWith + endsWith: endsWith, + + findLastNonSpaceLocation: findLastNonSpaceLocation, + findWhitespaceEnd: findWhitespaceEnd, + findCommentEnd: findCommentEnd, + findStringEnd: findStringEnd, + findDecimalNumberEnd: findDecimalNumberEnd, + findNumberEnd: findNumberEnd, + findEscaseEnd: findEscaseEnd, + findIdentifierEnd: findIdentifierEnd }; diff --git a/test/scanner.js b/test/scanner.js index 3aa0c752..9e26f977 100644 --- a/test/scanner.js +++ b/test/scanner.js @@ -1,6 +1,6 @@ var assert = require('assert'); var Scanner = require('../lib/parser/scanner'); -var TokenName = require('../lib/parser/const').TokenName; +var TokenName = require('../lib/parser/const').NAME; describe('parser/scanner', function() { var css = '.test\n{\n prop: url(foo/bar.jpg);\n}';