Skip to content
Browse files

Revisit the parser choice in the speed comparison page.

Drop ZeParser and Narcissus, add Acorn.

http://code.google.com/p/esprima/issues/detail?id=351
  • Loading branch information...
1 parent 31515e2 commit 397e85bd700cbb8cb88f025590519d7b1a5fd871 @ariya committed Oct 4, 2012
Showing with 1,564 additions and 6,308 deletions.
  1. +0 −855 test/3rdparty/Tokenizer.js
  2. +0 −2,187 test/3rdparty/ZeParser.js
  3. +1,555 −0 test/3rdparty/acorn.js
  4. +0 −731 test/3rdparty/jsdefs.js
  5. +0 −588 test/3rdparty/jslex.js
  6. +0 −1,921 test/3rdparty/jsparse.js
  7. +5 −8 test/compare.html
  8. +4 −18 test/compare.js
View
855 test/3rdparty/Tokenizer.js
@@ -1,855 +0,0 @@
-if (typeof exports !== 'undefined') {
- var window = {Unicode: require('./unicodecategories').Unicode};
- exports.Tokenizer = Tokenizer;
-}
-
-/*!
- * Tokenizer for JavaScript / ECMAScript 5
- * (c) Peter van der Zee, qfox.nl
- */
-
-/**
- * @param {Object} inp
- * @param {Object} options
- * @property {boolean} [options.tagLiterals] Instructs the tokenizer to also parse tag literals
- */
-function Tokenizer(inp, options){
- this.inp = inp||'';
- // replace all other line terminators with \n (leave \r\n in tact though). we should probably remove the shadowInp when finished...
- // only replace \r if it is not followed by a \n else \r\n would become \n\n causing a double newline where it is just a single
- this.shadowInp = (inp||'').replace(Tokenizer.regexNormalizeNewlines, '\n');
- this.pos = 0;
- this.line = 0;
- this.column = 0;
- this.cache = {};
-
- this.errorStack = [];
-
- this.wtree = [];
- this.btree = [];
-
-// this.regexWhiteSpace = Tokenizer.regexWhiteSpace;
- this.regexLineTerminator = Tokenizer.regexLineTerminator; // used in fallback
- this.regexAsciiIdentifier = Tokenizer.regexAsciiIdentifier;
- this.hashAsciiIdentifier = Tokenizer.hashAsciiIdentifier;
-// this.regexHex = Tokenizer.regexHex;
- this.hashHex = Tokenizer.hashHex
- this.regexUnicodeEscape = Tokenizer.regexUnicodeEscape;
- this.regexIdentifierStop = Tokenizer.regexIdentifierStop;
- this.hashIdentifierStop = Tokenizer.hashIdentifierStop;
-// this.regexPunctuators = Tokenizer.regexPunctuators;
- this.regexNumber = Tokenizer.regexNumber;
- this.regexNewline = Tokenizer.regexNewline;
-
- this.regexBig = Tokenizer.regexBig;
- this.regexBigAlt = Tokenizer.regexBigAlt;
-
- // stuff for parsing tag literals
- this.regexTagName = Tokenizer.regexTagName;
- this.regexTagAttributes = Tokenizer.regexTagAttributes;
- this.regexTagUnarySuffix = Tokenizer.regexTagUnarySuffix;
- this.regexTagBinarySuffix = Tokenizer.regexTagBinarySuffix;
- this.regexTagBody = Tokenizer.regexTagBody;
- this.regexTagOpenOrClose = Tokenizer.regexTagOpenOrClose;
- this.regexTagClose = Tokenizer.regexTagClose;
- this.regexRemoveEscape = Tokenizer.regexRemoveEscape;
-
- this.tokenCount = 0;
- this.tokenCountNoWhite = 0;
-
- this.Unicode = window.Unicode;
-
- // if the Parser throws an error. it will set this property to the next match
- // at the time of the error (which was not what it was expecting at that point)
- // and pass on an "error" match. the error should be scooped on the stack and
- // this property should be returned, without looking at the input...
- this.errorEscape = null;
-
- // support tag literals
- this.tagLiterals = false || (options && options.tagLiterals);
-};
-
-Tokenizer.prototype = {
- inp:null,
- shadowInp:null,
- pos:null,
- line:null,
- column:null,
- cache:null,
- errorStack:null,
-
- wtree: null, // contains whitespace (spaces, comments, newlines)
- btree: null, // does not contain any whitespace tokens.
-
- regexLineTerminator:null,
- regexAsciiIdentifier:null,
- hashAsciiIdentifier:null,
- hashHex:null,
- regexUnicodeEscape:null,
- regexIdentifierStop:null,
- hashIdentifierStop:null,
- regexNumber:null,
- regexNewline:null,
- regexBig:null,
- regexBigAlt:null,
- tokenCount:null,
- tokenCountNoWhite:null,
-
- Unicode:null,
-
- // storeCurrentAndFetchNextToken(bool, false, false true) to get just one token
- storeCurrentAndFetchNextToken: function(noRegex, returnValue, stack, _dontStore){
- var regex = !noRegex; // TOFIX :)
- var pos = this.pos;
- var inp = this.inp;
- var shadowInp = this.shadowInp;
- var matchedNewline = false;
- do {
- if (!_dontStore) {
- ++this.tokenCount;
- stack.push(returnValue);
- // did the parent Parser throw up?
- if (this.errorEscape) {
- returnValue = this.errorEscape;
- this.errorEscape = null;
- return returnValue;
- }
- }
- _dontStore = false;
-
- if (pos >= inp.length) {
- returnValue = {start:inp.length,stop:inp.length,name:12/*EOF*/};
- break;
- }
- var returnValue = null;
-
- var start = pos;
- var chr = inp[pos];
-
- // 1 ws 2 lt 3 scmt 4 mcmt 5/6 str 7 nr 8 rx 9 punc
- //if (true) {
- // substring method (I think this is faster..)
- var part2 = inp.substring(pos,pos+4);
- var part = this.regexBig.exec(part2);
- //} else {
- // // non-substring method (lastIndex)
- // // this method does not need a substring to apply it
- // this.regexBigAlt.lastIndex = pos;
- // var part = this.regexBigAlt.exec(inp);
- //}
-
- if (part[1]) { //this.regexWhiteSpace.test(chr)) { // SP, TAB, VT, FF, NBSP, BOM (, TOFIX: USP)
- ++pos;
- returnValue = {start:start,stop:pos,name:9/*WHITE_SPACE*/,line:this.line,col:this.column,isWhite:true};
- ++this.column;
- } else if (part[2]) { //this.regexLineTerminator.test(chr)) { // LF, CR, LS, PS
- var end = pos+1;
- if (chr=='\r' && inp[pos+1] == '\n') ++end; // support crlf=>lf
- returnValue = {start:pos,stop:end,name:10/*LINETERMINATOR*/,line:this.line,col:this.column,isWhite:true};
- pos = end;
- // mark newlines for ASI
- matchedNewline = true;
- ++this.line;
- this.column = 0;
- returnValue.hasNewline = 1;
- } else if (part[3]) { //chr == '/' && inp[pos+1] == '/') {
- pos = shadowInp.indexOf('\n',pos);
- if (pos == -1) pos = inp.length;
- returnValue = {start:start,stop:pos,name:7/*COMMENT_SINGLE*/,line:this.line,col:this.column,isComment:true,isWhite:true};
- this.column = returnValue.stop;
- } else if (part[4]) { //chr == '/' && inp[pos+1] == '*') {
- var newpos = inp.indexOf('*/',pos);
- if (newpos == -1) {
- newpos = shadowInp.indexOf('\n', pos);
- if (newpos < 0) pos += 2;
- else pos = newpos;
- returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),line:this.line,col:this.column,isComment:true,isWhite:true,tokenError:true,error:Tokenizer.Error.UnterminatedMultiLineComment};
- this.errorStack.push(returnValue);
- } else {
- pos = newpos+2;
- returnValue = {start:start,stop:pos,name:8/*COMMENT_MULTI*/,value:inp.substring(start, pos),line:this.line,col:this.column,isComment:true,isWhite:true};
-
- // multi line comments are also reason for asi, but only if they contain at least one newline (use shadow input, because all line terminators would be valid...)
- var shadowValue = shadowInp.substring(start, pos);
- var i = 0, hasNewline = 0;
- while (i < (i = shadowValue.indexOf('\n', i+1))) {
- ++hasNewline;
- }
- if (hasNewline) {
- matchedNewline = true;
- returnValue.hasNewline = hasNewline;
- this.line += hasNewline;
- this.column = 0;
- } else {
- this.column = returnValue.stop;
- }
- }
- } else if (part[5]) { //chr == "'") {
- // old method
- //console.log("old method");
-
- var hasNewline = 0;
- do {
- // process escaped characters
- while (pos < inp.length && inp[++pos] == '\\') {
- if (shadowInp[pos+1] == '\n') ++hasNewline;
- ++pos;
- }
- if (this.regexLineTerminator.test(inp[pos])) {
- returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedDoubleStringNewline};
- this.errorStack.push(returnValue);
- break;
- }
- } while (pos < inp.length && inp[pos] != "'");
- if (returnValue) {} // error
- else if (inp[pos] != "'") {
- returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedDoubleStringOther};
- this.errorStack.push(returnValue);
- } else {
- ++pos;
- returnValue = {start:start,stop:pos,name:5/*STRING_SINGLE*/,isPrimitive:true,isString:true};
- if (hasNewline) {
- returnValue.hasNewline = hasNewline;
- this.line += hasNewline;
- this.column = 0;
- } else {
- this.column += (pos-start);
- }
- }
- } else if (part[6]) { //chr == '"') {
- var hasNewline = 0;
- // TODO: something like this: var regexmatch = /([^\']|$)+/.match();
- do {
- // process escaped chars
- while (pos < inp.length && inp[++pos] == '\\') {
- if (shadowInp[pos+1] == '\n') ++hasNewline;
- ++pos;
- }
- if (this.regexLineTerminator.test(inp[pos])) {
- returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedSingleStringNewline};
- this.errorStack.push(returnValue);
- break;
- }
- } while (pos < inp.length && inp[pos] != '"');
- if (returnValue) {}
- else if (inp[pos] != '"') {
- returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedSingleStringOther};
- this.errorStack.push(returnValue);
- } else {
- ++pos;
- returnValue = {start:start,stop:pos,name:6/*STRING_DOUBLE*/,isPrimitive:true,isString:true};
- if (hasNewline) {
- returnValue.hasNewline = hasNewline;
- this.line += hasNewline;
- this.column = 0;
- } else {
- this.column += (pos-start);
- }
- }
- } else if (part[7]) { //(chr >= '0' && chr <= '9') || (chr == '.' && inp[pos+1] >= '0' && inp[pos+1] <= '9')) {
- var nextPart = inp.substring(pos, pos+30);
- var match = nextPart.match(this.regexNumber);
- if (match[2]) { // decimal
- var value = match[2];
- var parsingOctal = value[0] == '0' && value[1] && value[1] != 'e' && value[1] != 'E' && value[1] != '.';
- if (parsingOctal) {
- returnValue = {start:start,stop:pos,name:14/*error*/,isNumber:true,isOctal:true,tokenError:true,error:Tokenizer.Error.IllegalOctalEscape,value:value};
- this.errorStack.push(returnValue);
- } else {
- returnValue = {start:start,stop:start+value.length,name:4/*NUMERIC_DEC*/,isPrimitive:true,isNumber:true,value:value};
- }
- } else if (match[1]) { // hex
- var value = match[1];
- returnValue = {start:start,stop:start+value.length,name:3/*NUMERIC_HEX*/,isPrimitive:true,isNumber:true,value:value};
- } else {
- throw 'unexpected parser errror... regex fail :(';
- }
-
- if (value.length < 300) {
- pos += value.length;
- } else {
- // old method of parsing numbers. only used for extremely long number literals (300+ chars).
- // this method does not require substringing... just memory :)
- var tmpReturnValue = this.oldNumberParser(pos, chr, inp, returnValue, start, Tokenizer);
- pos = tmpReturnValue[0];
- returnValue = tmpReturnValue[1];
- }
- } else if (regex && part[8]) { //chr == '/') { // regex cannot start with /* (would be multiline comment, and not make sense anyways). but if it was /* then an earlier if would have eated it. so we only check for /
- var twinfo = []; // matching {[( info
- var found = false;
- var parens = [];
- var nonLethalError = null;
- while (++pos < inp.length) {
- chr = shadowInp[pos];
- // parse RegularExpressionChar
- if (chr == '\n') {
- returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,errorHasContent:true,error:Tokenizer.Error.UnterminatedRegularExpressionNewline};
- this.errorStack.push(returnValue);
- break; // fail
- } else if (chr == '/') {
- found = true;
- break;
- } else if (chr == '?' || chr == '*' || chr == '+') {
- nonLethalError = Tokenizer.Error.NothingToRepeat;
- } else if (chr == '^') {
- if (
- inp[pos-1] != '/' &&
- inp[pos-1] != '|' &&
- inp[pos-1] != '(' &&
- !(inp[pos-3] == '(' && inp[pos-2] == '?' && (inp[pos-1] == ':' || inp[pos-1] == '!' || inp[pos-1] == '='))
- ) {
- nonLethalError = Tokenizer.Error.StartOfMatchShouldBeAtStart;
- }
- } else if (chr == '$') {
- if (inp[pos+1] != '/' && inp[pos+1] != '|' && inp[pos+1] != ')') nonLethalError = Tokenizer.Error.DollarShouldBeEnd;
- } else if (chr == '}') {
- nonLethalError = Tokenizer.Error.MissingOpeningCurly;
- } else { // it's a "character" (can be group or class), something to match
- // match parenthesis
- if (chr == '(') {
- parens.push(pos-start);
- } else if (chr == ')') {
- if (parens.length == 0) {
- nonLethalError = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.RegexNoOpenGroups};
- } else {
- var twin = parens.pop();
- var now = pos-start;
- twinfo[twin] = now;
- twinfo[now] = twin;
- }
- }
- // first process character class
- if (chr == '[') {
- var before = pos-start;
- while (++pos < inp.length && shadowInp[pos] != '\n' && inp[pos] != ']') {
- // only newline is not allowed in class range
- // anything else can be escaped, most of it does not have to be escaped...
- if (inp[pos] == '\\') {
- if (shadowInp[pos+1] == '\n') break;
- else ++pos; // skip next char. (mainly prohibits ] to be picked up as closing the group...)
- }
- }
- if (inp[pos] != ']') {
- returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.ClosingClassRangeNotFound};
- this.errorStack.push(returnValue);
- break;
- } else {
- var after = pos-start;
- twinfo[before] = after;
- twinfo[after] = before;
- }
- } else if (chr == '\\' && shadowInp[pos+1] != '\n') {
- // is ok anywhere in the regex (match next char literally, regardless of its otherwise special meaning)
- ++pos;
- }
-
- // now process repeaters (+, ? and *)
-
- // non-collecting group (?:...) and positive (?=...) or negative (?!...) lookahead
- if (chr == '(') {
- if (inp[pos+1] == '?' && (inp[pos+2] == ':' || inp[pos+2] == '=' || inp[pos+2] == '!')) {
- pos += 2;
- }
- }
- // matching "char"
- else if (inp[pos+1] == '?') ++pos;
- else if (inp[pos+1] == '*' || inp[pos+1] == '+') {
- ++pos;
- if (inp[pos+1] == '?') ++pos; // non-greedy match
- } else if (inp[pos+1] == '{') {
- pos += 1;
- var before = pos-start;
- // quantifier:
- // - {n}
- // - {n,}
- // - {n,m}
- if (!/[0-9]/.test(inp[pos+1])) {
- nonLethalError = Tokenizer.Error.QuantifierRequiresNumber;
- }
- while (++pos < inp.length && /[0-9]/.test(inp[pos+1]));
- if (inp[pos+1] == ',') {
- ++pos;
- while (pos < inp.length && /[0-9]/.test(inp[pos+1])) ++pos;
- }
- if (inp[pos+1] != '}') {
- nonLethalError = Tokenizer.Error.QuantifierRequiresClosingCurly;
- } else {
- ++pos;
- var after = pos-start;
- twinfo[before] = after;
- twinfo[after] = before;
- if (inp[pos+1] == '?') ++pos; // non-greedy match
- }
- }
- }
- }
- // if found=false, fail right now. otherwise try to parse an identifiername (that's all RegularExpressionFlags is..., but it's constructed in a stupid fashion)
- if (!found || returnValue) {
- if (!returnValue) {
- returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.UnterminatedRegularExpressionOther};
- this.errorStack.push(returnValue);
- }
- } else {
- // this is the identifier scanner, for now
- do ++pos;
- while (pos < inp.length && this.hashAsciiIdentifier[inp[pos]]); /*this.regexAsciiIdentifier.test(inp[pos])*/
-
- if (parens.length) {
- // nope, this is still an error, there was at least one paren that did not have a matching twin
- if (parens.length > 0) returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.RegexOpenGroup};
- this.errorStack.push(returnValue);
- } else if (nonLethalError) {
- returnValue = {start:start,stop:pos,name:14/*error*/,errorHasContent:true,tokenError:true,error:nonLethalError};
- this.errorStack.push(returnValue);
- } else {
- returnValue = {start:start,stop:pos,name:1/*REG_EX*/,isPrimitive:true};
- }
- }
- returnValue.twinfo = twinfo;
- } else if (regex && part[9]) { // this.tagLiterals
- // allows you to use this literally (in places where an expression is allowed) in js:
-
- // simple tag:
- // <div></div>
-
- // tree, unary, content, multiline:
- // <foo> <bar>hello </bar> <baz/>
- // </foo>
-
- // attributes, default true attributes, single and double quotes:
- // <gah this="an" attribute single='quote'/>
-
- // dynamic content (content normally parsed as js in a sub-parser):
- // <div>{["hello","world"].join(' ')}</div>
-
- // escaping content with single backslash
- // <div>hah\&lt;\<a{"foo\u0500t\t"+"bar"}</div>
-
- // note: tag content is escaped (one slash removed), js content is not
- // currently not really possible to use } or > in js code unless you
- // can somehow prefix them with a backslash (strings, regex)
- // if you must have these otherwise the fallback is eval
-
- var processValue = function(val){
- // post process dynamic parts of this value
- // anything wrapped in (unescaped) { and } is considered to be
- // a literal js expression. so we should parse an expression here
- // and that's where the voodoo inception starts. we must now
- // invoke a new instance of ZeParser, make it read an
- // expression and ensure the next char is the closing curly.
- // only then is it deemed valid.
-
- // ...
- // too difficult for now. let's just go with "escape all teh curlies!"
-
- var arrtxtjs = []; // uneven array. uneven elements are text, even elements are js
-
- var last = 0;
- for (var i=0; i<val.length; ++i) {
- if (val[i] == '\\') ++i;
- else if (val[i] == '{') {
- for (var j=i; j<val.length; ++j) {
- if (val[j] == '\\') ++j;
- else if (val[j] == '}') {
- var js = val.slice(i+1, j);
- arrtxtjs.push(
- val.slice(last, i),
- js
- );
- break;
- }
- }
- i = j;
- last = j + 1;
- }
- }
- // remainder (can be empty string)
- arrtxtjs.push(val.slice(last, i));
-
- if (arrtxtjs.length > 1) { // if we did find any dynamic js block...
- console.log(["has",arrtxtjs.length,"items",arrtxtjs])
- for (var i=1; i<arrtxtjs.length; i+=2) {
- arrtxtjs[i] = arrtxtjs[i].replace(this.regexRemoveEscape, '$1'); // remove a single backslash from the content (it was used as an escape character)
- }
- console.log([arrtxtjs])
- return arrtxtjs; // return array with [string,js,string,js,...]
- } else { // no dynamic js found, return a string
- val = arrtxtjs[0].replace(this.regexRemoveEscape, '$1'); // remove a single backslash from the content (it was used as an escape character)
- return val;
- }
- };
-
- var tagOpen = function(node){
- var regexTagName = this.regexTagName;
- regexTagName.lastIndex = pos+1;
- var tag = regexTagName.exec(inp);
- if (tag) {
- pos = regexTagName.lastIndex;
- node.name = tag[1];
- node.attributes = {};
-
- // now fetch all attribute=value pairs
- var regexTagAttributes = this.regexTagAttributes;
- var attr = '';
- var lastIndex = pos = regexTagAttributes.lastIndex = regexTagName.lastIndex;
- attr = regexTagAttributes.exec(inp);
- while (attr && attr.index == pos) {
- if (typeof attr[2] == 'undefined') {
- // attribute without value assignment (implicit "true")
- node.attributes[attr[1]] = attr[3];
- } else {
- node.attributes[attr[1]] = processValue.call(this, attr[2]);
- }
- pos = lastIndex = regexTagAttributes.lastIndex;
- attr = regexTagAttributes.exec(inp);
- }
-
- // it was a unary tag
- var regexTagUnarySuffix = this.regexTagUnarySuffix;
- regexTagUnarySuffix.lastIndex = lastIndex;
- var x = regexTagUnarySuffix.exec(inp);
- node.unary = !!x && x.index == pos;
- if (node.unary) {
- pos = regexTagUnarySuffix.lastIndex;
- return true;
- }
- // it was a binary tag
- var regexTagBinarySuffix = this.regexTagBinarySuffix;
- regexTagBinarySuffix.lastIndex = lastIndex;
- var x = regexTagBinarySuffix.exec(inp);
- if (x && x.index == pos) {
- node.children = [];
- // now parse strings and other tags until you find a closing tag on the same level...
- pos = regexTagBinarySuffix.lastIndex;
- return true;
- }
- // i dont know what that was
- throw console.warn("Error parsing tag");
- return false;
- }
- }.bind(this);
-
- var tagBody = function(node){
- do {
- var start = pos;
-
- var regexTagBody = this.regexTagBody;
- regexTagBody.lastIndex = pos;
- var text = regexTagBody.exec(inp);
- if (text && text[1]) {
- var txt = processValue(text[1]);
-// var txt = text[1].replace(this.regexRemoveEscape, '$1'); // remove a single backslash from the content (it was used as an escape character)
- node.children.push(txt);
- pos = regexTagBody.lastIndex;
- }
- if (inp[pos] == '<') {
- var regexTagOpenOrClose = this.regexTagOpenOrClose;
- regexTagOpenOrClose.lastIndex = pos;
- var x = regexTagOpenOrClose.exec(inp);
- if (x && x.index == pos) {
- return node; // end of body
- }
- node.children.push(tag({}));
- }
- } while (start != pos);
- }.bind(this);
-
- var tagClose = function(node){
- var regexTagClose = this.regexTagClose;
- regexTagClose.lastIndex = pos;
- var ctag = regexTagClose.exec(inp);
- if (ctag) {
- pos = regexTagClose.lastIndex;
- if (node.name == ctag[1]) return true;
- return false; // was not expecting to close this tag
- }
-
- // tagClose should only be called if the next chars are starting a closing tag...
- return false;
- }.bind(this);
-
- var tag = function(node){
- if (!tagOpen(node)) {
- return node;
- }
- if (!node.unary) {
- tagBody(node);
- tagClose(node);
- }
- return node;
- }.bind(this);
-
- var root = tag({});
-
- returnValue = {start:start,stop:pos,name:15/*TAG*/,isPrimitive:true,root:root};
- } else {
- // note: operators need to be ordered from longest to smallest. regex will take care of the rest.
- // no need to worry about div vs regex. if looking for regex, earlier if will have eaten it
- //var result = this.regexPunctuators.exec(inp.substring(pos,pos+4));
-
- // note: due to the regex, the single / or < might be caught by an earlier part of the regex. so check for that.
- var result = part[8] || part[9] || part[10];
- if (result) {
- //result = result[1];
- returnValue = {start:pos,stop:pos+=result.length,name:11/*PUNCTUATOR*/,value:result};
- } else {
- var found = false;
- // identifiers cannot start with a number. but if the leading string would be a number, another if would have eaten it already for numeric literal :)
- while (pos < inp.length) {
- var c = inp[pos];
-
- if (this.hashAsciiIdentifier[c]) ++pos; //if (this.regexAsciiIdentifier.test(c)) ++pos;
- else if (c == '\\' && this.regexUnicodeEscape.test(inp.substring(pos,pos+6))) pos += 6; // this is like a \uxxxx
- // ok, now test unicode ranges...
- // basically this hardly ever happens so there's little risk of this hitting performance
- // however, if you do happen to have used them, it's not a problem. the parser will support it :)
- else if (this.Unicode) { // the unicode is optional.
- // these chars may not be part of identifier. i want to try to prevent running the unicode regexes here...
- if (this.hashIdentifierStop[c] /*this.regexIdentifierStop.test(c)*/) break;
- // for most scripts, the code wont reach here. which is good, because this is going to be relatively slow :)
- var Unicode = this.Unicode; // cache
- if (!(
- // these may all occur in an identifier... (pure a specification compliance thing :)
- Unicode.Lu.test(c) || Unicode.Ll.test(c) || Unicode.Lt.test(c) || Unicode.Lm.test(c) ||
- Unicode.Lo.test(c) || Unicode.Nl.test(c) || Unicode.Mn.test(c) || Unicode.Mc.test(c) ||
- Unicode.Nd.test(c) || Unicode.Pc.test(c) || Unicode.sp.test(c)
- )) break; // end of match.
- // passed, next char
- ++pos;
- } else break; // end of match.
-
- found = true;
- }
-
- if (found) {
- returnValue = {start:start,stop:pos,name:2/*IDENTIFIER*/,value:inp.substring(start,pos)};
- if (returnValue.value == 'undefined' || returnValue.value == 'null' || returnValue.value == 'true' || returnValue.value == 'false') returnValue.isPrimitive = true;
- } else {
- if (inp[pos] == '`') {
- returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.BacktickNotSupported};
- this.errorStack.push(returnValue);
- } else if (inp[pos] == '\\') {
- if (inp[pos+1] == 'u') {
- returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.InvalidUnicodeEscape};
- this.errorStack.push(returnValue);
- } else {
- returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.InvalidBackslash};
- this.errorStack.push(returnValue);
- }
- } else {
- returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.Unknown,value:c};
- this.errorStack.push(returnValue);
- // try to skip this char. it's not going anywhere.
- }
- ++pos;
- }
- }
- }
-
- if (returnValue) {
- // note that ASI's are slipstreamed in here from the parser since the tokenizer cant determine that
- // if this part ever changes, make sure you change that too :)
- returnValue.tokposw = this.wtree.length;
- this.wtree.push(returnValue);
- if (!returnValue.isWhite) {
- returnValue.tokposb = this.btree.length;
- this.btree.push(returnValue);
- }
- }
-
-
- } while (stack && returnValue && returnValue.isWhite); // WHITE_SPACE LINETERMINATOR COMMENT_SINGLE COMMENT_MULTI
- ++this.tokenCountNoWhite;
-
- this.pos = pos;
-
- if (matchedNewline) returnValue.newline = true;
- return returnValue;
- },
- addTokenToStreamBefore: function(token, match){
- var wtree = this.wtree;
- var btree = this.btree;
- if (match.name == 12/*asi*/) {
- token.tokposw = wtree.length;
- wtree.push(token);
- token.tokposb = btree.length;
- btree.push(token);
- } else {
- token.tokposw = match.tokposw;
- wtree[token.tokposw] = token;
- match.tokposw += 1;
- wtree[match.tokposw] = match;
-
- if (match.tokposb) {
- token.tokposb = match.tokposb;
- btree[token.tokposb] = token;
- match.tokposb += 1;
- btree[match.tokposb] = match;
- }
- }
- },
- oldNumberParser: function(pos, chr, inp, returnValue, start, Tokenizer){
- ++pos;
- // either: 0x 0X 0 .3
- if (chr == '0' && (inp[pos] == 'x' || inp[pos] == 'X')) {
- // parsing hex
- while (++pos < inp.length && this.hashHex[inp[pos]]); // this.regexHex.test(inp[pos]));
- returnValue = {start:start,stop:pos,name:3/*NUMERIC_HEX*/,isPrimitive:true,isNumber:true};
- } else {
- var parsingOctal = chr == '0' && inp[pos] >= '0' && inp[pos] <= '9';
- // parsing dec
- if (chr != '.') { // integer part
- while (pos < inp.length && inp[pos] >= '0' && inp[pos] <= '9') ++pos;
- if (inp[pos] == '.') ++pos;
- }
- // decimal part
- while (pos < inp.length && inp[pos] >= '0' && inp[pos] <= '9') ++pos;
- // exponent part
- if (inp[pos] == 'e' || inp[pos] == 'E') {
- if (inp[++pos] == '+' || inp[pos] == '-') ++pos;
- var expPosBak = pos;
- while (pos < inp.length && inp[pos] >= '0' && inp[pos] <= '9') ++pos;
- if (expPosBak == pos) {
- returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.NumberExponentRequiresDigits};
- this.errorStack.push(returnValue);
- }
- }
- if (returnValue.name != 14/*error*/) {
- if (parsingOctal) {
- returnValue = {start:start,stop:pos,name:14/*error*/,isNumber:true,isOctal:true,tokenError:true,error:Tokenizer.Error.IllegalOctalEscape};
- this.errorStack.push(returnValue);
- console.log("foo")
- } else {
- returnValue = {start:start,stop:pos,name:4/*NUMERIC_DEC*/,isPrimitive:true,isNumber:true};
- }
- }
- }
- return [pos, returnValue];
- },
- tokens: function(arrx){
- arrx = arrx || [];
- var n = 0;
- var last;
- var stack = [];
- while ((last = this.storeCurrentAndFetchNextToken(!arrx[n++], false, false, true)) && last.name != 12/*EOF*/) stack.push(last);
- return stack;
- },
- fixValues: function(){
- this.wtree.forEach(function(t){
- if (!t.value) t.value = this.inp.substring(t.start, t.stop);
- },this);
- }
-};
-
-//#ifdef TEST_SUITE
-Tokenizer.escape = function(s){
- return s.replace(/\n/g,'\\n').replace(/\t/g,'\\t').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\uFFFF/g, '\\uFFFF').replace(/\s/g, function(s){
- // replace whitespace as is...
- var ord = s.charCodeAt(0).toString(16);
- switch (ord.length) {
- case 1: ord = '000'+ord; break;
- case 2: ord = '00'+ord; break;
- case 3: ord = '0'+ord; break;
- }
- return '\\u'+ord;
- });
-};
-Tokenizer.testSuite = function(arr){
- var out = document.createElement('pre');
- document.body.appendChild(out);
- var debug = function(){
- var f = document.createElement('div');
- f.innerHTML = Array.prototype.slice.call(arguments).join(' ');
- out.appendChild(f);
- return arguments[0];
- };
-
- debug("Running test suite...",arr.length,"tests");
- debug(' ');
- var start = +new Date;
- var ok = 0;
- var fail = 0;
- for (var i=0; i<arr.length; ++i) {
- var test = arr[i], result;
- var input = test[0];
- var outputLen = test[1].length ? test[1][0] : test[1];
- var regexHints = test[3] ? test[2] : null; // if flags, then len=4
- var desc = test[3] || test[2];
-
- var result = new Tokenizer(input).tokens(regexHints); // regexHints can be null, that's ok
- if (result.length == outputLen) {
- debug('<span class="green">Test '+i+' ok:</span>',desc);
- ++ok;
- } else {
- debug('<b class="red">Test failed:</span>',desc,'(found',result.length,'expected',outputLen+')'),console.log(desc, result);
- ++fail;
- }
- debug('<b>'+Tokenizer.escape(input)+'</b>');
- debug('<br/>');
- }
- debug("Tokenizer test suite finished ("+(+new Date - start)+' ms). ok:'+ok+', fail:'+fail);
-};
-//#endif
-
-Tokenizer.regexWhiteSpace = /[ \t\u000B\u000C\u00A0\uFFFF]/;
-Tokenizer.regexLineTerminator = /[\u000A\u000D\u2028\u2029]/;
-Tokenizer.regexAsciiIdentifier = /[a-zA-Z0-9\$_]/;
-Tokenizer.hashAsciiIdentifier = {_:1,$:1,a:1,b:1,c:1,d:1,e:1,f:1,g:1,h:1,i:1,j:1,k:1,l:1,m:1,n:1,o:1,p:1,q:1,r:1,s:1,t:1,u:1,v:1,w:1,x:1,y:1,z:1,A:1,B:1,C:1,D:1,E:1,F:1,G:1,H:1,I:1,J:1,K:1,L:1,M:1,N:1,O:1,P:1,Q:1,R:1,S:1,T:1,U:1,V:1,W:1,X:1,Y:1,Z:1,0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:1,9:1};
-Tokenizer.regexHex = /[0-9A-Fa-f]/;
-Tokenizer.hashHex = {0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:1,9:1,a:1,b:1,c:1,d:1,e:1,f:1,A:1,B:1,C:1,D:1,E:1,F:1};
-Tokenizer.regexUnicodeEscape = /u[0-9A-Fa-f]{4}/; // the \ is already checked at usage...
-Tokenizer.regexIdentifierStop = /[\>\=\!\|\<\+\-\&\*\%\^\/\{\}\(\)\[\]\.\;\,\~\?\:\ \t\n\\\'\"]/;
-Tokenizer.hashIdentifierStop = {'>':1,'=':1,'!':1,'|':1,'<':1,'+':1,'-':1,'&':1,'*':1,'%':1,'^':1,'/':1,'{':1,'}':1,'(':1,')':1,'[':1,']':1,'.':1,';':1,',':1,'~':1,'?':1,':':1,'\\':1,'\'':1,'"':1,' ':1,'\t':1,'\n':1};
-Tokenizer.regexNewline = /\n/g;
-//Tokenizer.regexPunctuators = /^(>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|\&\&|\|\||\+=|-=|\*=|%=|\&=|\|=|\^=|\/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|\*|%|\||\&|\||\^|!|~|\?|:|=|\/)/;
-Tokenizer.Unidocde = window.Unicode;
-Tokenizer.regexNumber = /^(?:(0[xX][0-9A-Fa-f]+)|((?:(?:(?:(?:[0-9]+)(?:\.[0-9]*)?))|(?:\.[0-9]+))(?:[eE][-+]?[0-9]{1,})?))/;
-Tokenizer.regexNormalizeNewlines = /(\u000D[^\u000A])|[\u2028\u2029]/;
-// tag parsing regex
- // ws name (must start with non-number-or-dash)
-Tokenizer.regexTagName = /[^\S]*([a-zA-Z][a-zA-Z0-9-]*)/g;
- // ws attrname "..[\"].." '..[\']..'
-Tokenizer.regexTagAttributes = /[^\S]+([a-zA-Z0-9-]+)(?:=(?:(?:"((?:(?:\\.)|(?:[^"]))*?)")|(?:'((?:(?:\\')|(?:[^']))*?)')))?/g;
- // ws />
-Tokenizer.regexTagUnarySuffix = /[^\S]*\/[^\S]*>/g;
- // ws >
-Tokenizer.regexTagBinarySuffix = /[^\S]*?>/g;
- // anything as long as its not a <, unless preceeded by \
-Tokenizer.regexTagBody = /((?:(?:\\.)|(?:[^<]))*)/g;
- // < ws /> / (?? TOFIX not sure whether this is correct or intentional...)
-Tokenizer.regexTagOpenOrClose = /<[^\S]*[\/>]*\//g;
- // < ws / ws name ws >
-Tokenizer.regexTagClose = /<[^\S]*\/[^\S]*([a-zA-Z][a-zA-Z0-9-]*)[^\S]*>/g;
- // backslash with either a non-backslash following or the EOL following
-Tokenizer.regexRemoveEscape = /\\(?:([^\\])|$)/g;
-
-
-// 1 ws 2 lt 3 scmt 4 mcmt 5/6 str 7 nr 8 rx 9 dom 10 punc
-Tokenizer.regexBig = /^([ \t\u000B\u000C\u00A0\uFFFF])?([\u000A\u000D\u2028\u2029])?(\/\/)?(\/\*)?(')?(")?(\.?[0-9])?(?:(\/)[^=])?(?:(<)[^<=])?(>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|\&\&|\|\||\+=|-=|\*=|%=|\&=|\|=|\^=|\/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|\*|%|\||\&|\||\^|!|~|\?|:|=|\/)?/;
-Tokenizer.regexBigAlt = /([ \t\u000B\u000C\u00A0\uFFFF])?([\u000A\u000D\u2028\u2029])?(\/\/)?(\/\*)?(')?(")?(\.?[0-9])?(?:(\/)[^=])?(>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|\&\&|\|\||\+=|-=|\*=|%=|\&=|\|=|\^=|\/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|\*|%|\||\&|\||\^|!|~|\?|:|=|\/)?/g;
-
-Tokenizer.Error = {
- UnterminatedSingleStringNewline: {msg:'Newlines are not allowed in string literals'},
- UnterminatedSingleStringOther: {msg:'Unterminated single string'},
- UnterminatedDoubleStringNewline: {msg:'Newlines are not allowed in string literals'},
- UnterminatedDoubleStringOther: {msg:'Unterminated double string'},
- UnterminatedRegularExpressionNewline: {msg:'Newlines are not allowed in regular expressions'},
- NothingToRepeat: {msg:'Used a repeat character (*?+) in a regex without something prior to it to match'},
- ClosingClassRangeNotFound: {msg: 'Unable to find ] for class range'},
- RegexOpenGroup: {msg: 'Open group did not find closing parenthesis'},
- RegexNoOpenGroups: {msg: 'Closing parenthesis found but no group open'},
- UnterminatedRegularExpressionOther: {msg:'Unterminated regular expression'},
- UnterminatedMultiLineComment: {msg:'Unterminated multi line comment'},
- UnexpectedIdentifier: {msg:'Unexpected identifier'},
- IllegalOctalEscape: {msg:'Octal escapes are not valid'},
- Unknown: {msg:'Unknown input'}, // if this happens, my parser is bad :(
- NumberExponentRequiresDigits: {msg:'Numbers with exponents require at least one digit after the `e`'},
- BacktickNotSupported: {msg:'The backtick is not used in js, maybe you copy/pasted from a fancy site/doc?'},
- InvalidUnicodeEscape: {msg:'Encountered an invalid unicode escape, must be followed by exactly four hex numbers'},
- InvalidBackslash: {msg:'Encountered a backslash where it not allowed'},
- StartOfMatchShouldBeAtStart: {msg: 'The ^ signifies the start of match but was not found at a start'},
- DollarShouldBeEnd: {msg: 'The $ signifies the stop of match but was not found at a stop'},
- QuantifierRequiresNumber: {msg:'Quantifier curly requires at least one digit before the comma'},
- QuantifierRequiresClosingCurly: {msg:'Quantifier curly requires to be closed'},
- MissingOpeningCurly: {msg:'Encountered closing quantifier curly without seeing an opening curly'}
-};
View
2,187 test/3rdparty/ZeParser.js
@@ -1,2187 +0,0 @@
-if (typeof exports !== 'undefined') {
- var Tokenizer = require('./Tokenizer').Tokenizer;
- exports.ZeParser = ZeParser;
-}
-
-/**
- * This is my js Parser: Ze. It's actually the post-dev pre-cleanup version. Clearly.
- * Some optimizations have been applied :)
- * (c) Peter van der Zee, qfox.nl
- * @param {String} inp Input
- * @param {Tokenizer} tok
- * @param {Array} stack The tokens will be put in this array. If you're looking for the AST, this would be it :)
- */
-function ZeParser(inp, tok, stack, simple){
- this.input = inp;
- this.tokenizer = tok;
- this.stack = stack;
- this.stack.root = true;
- this.scope = stack.scope = [{value:'this', isDeclared:true, isEcma:true, thisIsGlobal:true}]; // names of variables
- this.scope.global = true;
- this.statementLabels = [];
-
- this.errorStack = [];
-
- stack.scope = this.scope; // hook root
- stack.labels = this.statementLabels;
-
- this.regexLhsStart = ZeParser.regexLhsStart;
-/*
- this.regexStartKeyword = ZeParser.regexStartKeyword;
- this.regexKeyword = ZeParser.regexKeyword;
- this.regexStartReserved = ZeParser.regexStartReserved;
- this.regexReserved = ZeParser.regexReserved;
-*/
- this.regexStartKeyOrReserved = ZeParser.regexStartKeyOrReserved;
- this.hashStartKeyOrReserved = ZeParser.hashStartKeyOrReserved;
- this.regexIsKeywordOrReserved = ZeParser.regexIsKeywordOrReserved;
- this.regexAssignments = ZeParser.regexAssignments;
- this.regexNonAssignmentBinaryExpressionOperators = ZeParser.regexNonAssignmentBinaryExpressionOperators;
- this.regexUnaryKeywords = ZeParser.regexUnaryKeywords;
- this.hashUnaryKeywordStart = ZeParser.hashUnaryKeywordStart;
- this.regexUnaryOperators = ZeParser.regexUnaryOperators;
- this.regexLiteralKeywords = ZeParser.regexLiteralKeywords;
- this.testing = {'this':1,'null':1,'true':1,'false':1};
-
- this.ast = !simple; ///#define FULL_AST
-};
-/**
- * Returns just a stacked parse tree (regular array)
- * @param {string} input
- * @param {boolean} simple=false
- * @return {Array}
- */
-ZeParser.parse = function(input, simple){
- var tok = new Tokenizer(input);
- var stack = [];
- try {
- var parser = new ZeParser(input, tok, stack);
- if (simple) parser.ast = false;
- parser.parse();
- return stack;
- } catch (e) {
- console.log("Parser has a bug for this input, please report it :)", e);
- return null;
- }
-};
-/**
- * Returns a new parser instance with parse details for input
- * @param {string} input
- * @param {Object} options
- * @property {boolean} [options.tagLiterals] Instructs the tokenizer to also parse tag literals
- * @returns {ZeParser}
- */
-ZeParser.createParser = function(input, options){
- var tok = new Tokenizer(input, options);
- var stack = [];
- try {
- var parser = new ZeParser(input, tok, stack, options);
- parser.parse();
- return parser;
- } catch (e) {
- console.log("Parser has a bug for this input, please report it :)", e);
- return null;
- }
-};
-ZeParser.prototype = {
- input: null,
- tokenizer: null,
- stack: null,
- scope: null,
- statementLabels: null,
- errorStack: null,
-
- ast: null,
-
- parse: function(match){
- if (match) match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, this.stack); // meh
- else match = this.tokenizer.storeCurrentAndFetchNextToken(false, null, this.stack, true); // initialization step, dont store the match (there isnt any!)
-
- match = this.eatSourceElements(match, this.stack);
-
- var cycled = false;
- do {
- if (match && match.name != 12/*eof*/) {
- // if not already an error, insert an error before it
- if (match.name != 14/*error*/) this.failignore('UnexpectedToken', match, this.stack);
- // just parse the token as is and continue.
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, this.stack);
- cycled = true;
- }
-
- // keep gobbling any errors...
- } while (match && match.name == 14/*error*/);
-
- // now try again (but only if we gobbled at least one token)...
- if (cycled && match && match.name != 12/*eof*/) match = this.parse(match);
-
- // pop the last token off the stack if it caused an error at eof
- if (this.tokenizer.errorEscape) {
- this.stack.push(this.tokenizer.errorEscape);
- this.tokenizer.errorEscape = null;
- }
-
- return match;
- },
-
- eatSemiColon: function(match, stack){
- //this.stats.eatSemiColon = (+//this.stats.eatSemiColon||0)+1;
- if (match.value == ';') match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- else {
- // try asi
- // only if:
- // - this token was preceeded by at least one newline (match.newline) or next token is }
- // - this is EOF
- // - prev token was one of return,continue,break,throw (restricted production), not checked here.
-
- // the exceptions to this rule are
- // - if the next line is a regex
- // - the semi is part of the for-header.
- // these exceptions are automatically caught by the way the parser is built
-
- // not eof and just parsed semi or no newline preceeding and next isnt }
- if (match.name != 12/*EOF*/ && (match.semi || (!match.newline && match.value != '}')) && !(match.newline && (match.value == '++' || match.value == '--'))) {
- this.failignore('NoASI', match, stack);
- } else {
- // ASI
- // (match is actually the match _after_ this asi, so the position of asi is match.start, not stop (!)
- var asi = {start:match.start,stop:match.start,name:13/*ASI*/};
- stack.push(asi);
-
- // slip it in the stream, before the current match.
- // for the other tokens see the tokenizer near the end of the main parsing function
- this.tokenizer.addTokenToStreamBefore(asi, match);
- }
- }
- match.semi = true;
- return match;
- },
- /**
- * Eat one or more "AssignmentExpression"s. May also eat a labeled statement if
- * the parameters are set that way. This is the only way to linearly distinct between
- * an expression-statement and a labeled-statement without double lookahead. (ok, maybe not "only")
- * @param {boolean} mayParseLabeledStatementInstead=false If the first token is an identifier and the second a colon, accept this match as a labeled statement instead... Only true if the match in the parameter is an (unreserved) identifier (so no need to validate that further)
- * @param {Object} match
- * @param {Array} stack
- * @param {boolean} onlyOne=false Only parse a AssignmentExpression
- * @param {boolean} forHeader=false Do not allow the `in` operator
- * @param {boolean} isBreakOrContinueArg=false The argument for break or continue is always a single identifier
- * @return {Object}
- */
- eatExpressions: function(mayParseLabeledStatementInstead, match, stack, onlyOne, forHeader, isBreakOrContinueArg){
- if (this.ast) { //#ifdef FULL_AST
- var pstack = stack;
- stack = [];
- stack.desc = 'expressions';
- stack.nextBlack = match.tokposb;
- pstack.push(stack);
-
- var parsedExpressions = 0;
- } //#endif
-
- var first = true;
- do {
- var parsedNonAssignmentOperator = false; // once we parse a non-assignment, this expression can no longer parse an assignment
- // TOFIX: can probably get the regex out somehow...
- if (!first) {
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) match = this.failsafe('ExpectedAnotherExpressionComma', match);
- }
-
- if (this.ast) { //#ifdef FULL_AST
- ++parsedExpressions;
-
- var astack = stack;
- stack = [];
- stack.desc = 'expression';
- stack.nextBlack = match.tokposb;
- astack.push(stack);
- } //#endif
-
- // start of expression is given: match
- // it should indeed be a properly allowed lhs
- // first eat all unary operators
- // they can be added to the stack, but we need to ensure they have indeed a valid operator
-
- var parseAnotherExpression = true;
- while (parseAnotherExpression) { // keep parsing lhs+operator as long as there is an operator after the lhs.
- if (this.ast) { //#ifdef FULL_AST
- var estack = stack;
- stack = [];
- stack.desc = 'sub-expression';
- stack.nextBlack = match.tokposb;
- estack.push(stack);
-
- var news = 0; // encountered new operators waiting for parenthesis
- } //#endif
-
- // start checking lhs
- // if lhs is identifier (new/call expression), allow to parse an assignment operator next
- // otherwise keep eating unary expressions and then any "value"
- // after that search for a binary operator. if we only ate a new/call expression then
- // also allow to eat assignments. repeat for the rhs.
- var parsedUnaryOperator = false;
- var isUnary = null;
- while (
- !isBreakOrContinueArg && // no unary for break/continue
- (isUnary =
- (match.value && this.hashUnaryKeywordStart[match.value[0]] && this.regexUnaryKeywords.test(match.value)) || // (match.value == 'delete' || match.value == 'void' || match.value == 'typeof' || match.value == 'new') ||
- (match.name == 11/*PUNCTUATOR*/ && this.regexUnaryOperators.test(match.value))
- )
- ) {
- if (isUnary) match.isUnaryOp = true;
- if (this.ast) { //#ifdef FULL_AST
- // find parenthesis
- if (match.value == 'new') ++news;
- } //#endif
-
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- // ensure that it is in fact a valid lhs-start. TAG is a custom extension for optional tag literal syntax support.
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) match = this.failsafe('ExpectedAnotherExpressionRhs', match);
- // not allowed to parse assignment
- parsedUnaryOperator = true;
- };
-
- // if we parsed any kind of unary operator, we cannot be parsing a labeled statement
- if (parsedUnaryOperator) mayParseLabeledStatementInstead = false;
-
- // so now we know match is a valid lhs-start and not a unary operator
- // it must be a string, number, regex, identifier
- // or the start of an object literal ({), array literal ([) or group operator (().
-
- var acceptAssignment = false;
-
- // take care of the "open" cases first (group, array, object)
- if (match.value == '(') {
- if (this.ast) { //#ifdef FULL_AST
- var groupStack = stack;
- stack = [];
- stack.desc = 'grouped';
- stack.nextBlack = match.tokposb;
- groupStack.push(stack);
-
- var lhp = match;
-
- match.isGroupStart = true;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) match = this.failsafe('GroupingShouldStartWithExpression', match);
- // keep parsing expressions as long as they are followed by a comma
- match = this.eatExpressions(false, match, stack);
-
- if (match.value != ')') match = this.failsafe('UnclosedGroupingOperator', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhp;
- lhp.twin = match;
-
- match.isGroupStop = true;
-
- if (stack[stack.length-1].desc == 'expressions') {
- // create ref to this expression group to the opening paren
- lhp.expressionArg = stack[stack.length-1];
- }
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // might be div
-
- if (this.ast) { //#ifdef FULL_AST
- stack = groupStack;
- } //#endif
- // you can assign to group results. and as long as the group does not contain a comma (and valid ref), it will work too :)
- acceptAssignment = true;
- // there's an extra rule for [ namely that, it must start with an expression but after that, expressions are optional
- } else if (match.value == '[') {
- if (this.ast) { //#ifdef FULL_AST
- stack.sub = 'array literal';
- stack.hasArrayLiteral = true;
- var lhsb = match;
-
- match.isArrayLiteralStart = true;
-
- if (!this.scope.arrays) this.scope.arrays = [];
- match.arrayId = this.scope.arrays.length;
- this.scope.arrays.push(match);
-
- match.targetScope = this.scope;
- } //#endif
- // keep parsing expressions as long as they are followed by a comma
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
-
- // arrays may start with "elided" commas
- while (match.value == ',') match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
-
- var foundAtLeastOneComma = true; // for entry in while
- while (foundAtLeastOneComma && match.value != ']') {
- foundAtLeastOneComma = false;
-
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value)) && match.name != 14/*error*/) match = this.failsafe('ArrayShouldStartWithExpression', match);
- match = this.eatExpressions(false, match, stack, true);
-
- while (match.value == ',') {
- foundAtLeastOneComma = true;
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- }
- }
- if (match.value != ']') {
- match = this.failsafe('UnclosedPropertyBracket', match);
- }
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhsb;
- lhsb.twin = match;
-
- match.isArrayLiteralStop = true;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // might be div
- while (match.value == '++' || match.value == '--') {
- // gobble and ignore?
- this.failignore('InvalidPostfixOperandArray', match, stack);
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- }
- // object literals need seperate handling...
- } else if (match.value == '{') {
- if (this.ast) { //#ifdef FULL_AST
- stack.sub = 'object literal';
- stack.hasObjectLiteral = true;
-
- match.isObjectLiteralStart = true;
-
- if (!this.scope.objects) this.scope.objects = [];
- match.objectId = this.scope.objects.length;
- this.scope.objects.push(match);
-
- var targetObject = match;
- match.targetScope = this.scope;
-
- var lhc = match;
- } //#endif
-
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.name == 12/*eof*/) {
- match = this.failsafe('ObjectLiteralExpectsColonAfterName', match);
- }
- // ObjectLiteral
- // PropertyNameAndValueList
-
- while (match.value != '}' && match.name != 14/*error*/) { // will stop if next token is } or throw if not and no comma is found
- // expecting a string, number, or identifier
- //if (match.name != 5/*STRING_SINGLE*/ && match.name != 6/*STRING_DOUBLE*/ && match.name != 3/*NUMERIC_HEX*/ && match.name != 4/*NUMERIC_DEC*/ && match.name != 2/*IDENTIFIER*/) {
- // TOFIX: more specific errors depending on type...
- if (!match.isNumber && !match.isString && match.name != 2/*IDENTIFIER*/) {
- match = this.failsafe('IllegalPropertyNameToken', match);
- }
-
- if (this.ast) { //#ifdef FULL_AST
- var objLitStack = stack;
- stack = [];
- stack.desc = 'objlit pair';
- stack.isObjectLiteralPair = true;
- stack.nextBlack = match.tokposb;
- objLitStack.push(stack);
-
- var propNameStack = stack;
- stack = [];
- stack.desc = 'objlit pair name';
- stack.nextBlack = match.tokposb;
- propNameStack.push(stack);
-
- propNameStack.sub = 'data';
-
- var propName = match;
- propName.isPropertyName = true;
- } //#endif
-
- var getset = match.value;
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (this.ast) { //#ifdef FULL_AST
- stack = propNameStack;
- } //#endif
-
- // for get/set we parse a function-like definition. but only if it's immediately followed by an identifier (otherwise it'll just be the property 'get' or 'set')
- if (getset == 'get') {
- // "get" PropertyName "(" ")" "{" FunctionBody "}"
- if (match.value == ':') {
- if (this.ast) { //#ifdef FULL_AST
- propName.isPropertyOf = targetObject;
- } //#endif
- match = this.eatObjectLiteralColonAndBody(match, stack);
- } else {
- if (this.ast) { //#ifdef FULL_AST
- match.isPropertyOf = targetObject;
- propNameStack.sub = 'getter';
- propNameStack.isAccessor = true;
- } //#endif
- // if (match.name != 2/*IDENTIFIER*/ && match.name != 5/*STRING_SINGLE*/ && match.name != 6/*STRING_DOUBLE*/ && match.name != 3/*NUMERIC_HEX*/ && match.name != 4/*NUMERIC_DEC*/) {
- if (!match.isNumber && !match.isString && match.name != 2/*IDENTIFIER*/) match = this.failsafe('IllegalGetterSetterNameToken', match, true);
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != '(') match = this.failsafe('GetterSetterNameFollowedByOpenParen', match);
- if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != ')') match = this.failsafe('GetterHasNoArguments', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhp;
- lhp.twin = match;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatFunctionBody(match, stack);
- }
- } else if (getset == 'set') {
- // "set" PropertyName "(" PropertySetParameterList ")" "{" FunctionBody "}"
- if (match.value == ':') {
- if (this.ast) { //#ifdef FULL_AST
- propName.isPropertyOf = targetObject;
- } //#endif
- match = this.eatObjectLiteralColonAndBody(match, stack);
- } else {
- if (this.ast) { //#ifdef FULL_AST
- match.isPropertyOf = targetObject;
- propNameStack.sub = 'setter';
- propNameStack.isAccessor = true;
- } //#endif
- if (!match.isNumber && !match.isString && match.name != 2/*IDENTIFIER*/) match = this.failsafe('IllegalGetterSetterNameToken', match);
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != '(') match = this.failsafe('GetterSetterNameFollowedByOpenParen', match);
- if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.name != 2/*IDENTIFIER*/) {
- if (match.value == ')') match = this.failsafe('SettersMustHaveArgument', match);
- else match = this.failsafe('IllegalSetterArgumentNameToken', match);
- }
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != ')') {
- if (match.value == ',') match = this.failsafe('SettersOnlyGetOneArgument', match);
- else match = this.failsafe('SetterHeaderShouldHaveClosingParen', match);
- }
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhp;
- lhp.twin = match;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatFunctionBody(match, stack);
- }
- } else {
- // PropertyName ":" AssignmentExpression
- if (this.ast) { //#ifdef FULL_AST
- propName.isPropertyOf = targetObject;
- } //#endif
- match = this.eatObjectLiteralColonAndBody(match, stack);
- }
-
- if (this.ast) { //#ifdef FULL_AST
- stack = objLitStack;
- } //#endif
-
- // one trailing comma allowed
- if (match.value == ',') {
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value == ',') match = this.failsafe('IllegalDoubleCommaInObjectLiteral', match);
- } else if (match.value != '}') match = this.failsafe('UnclosedObjectLiteral', match);
-
- // either the next token is } and the loop breaks or
- // the next token is the start of the next PropertyAssignment...
- }
- // closing curly
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhc;
- lhc.twin = match;
-
- match.isObjectLiteralStop = true;
- } //#endif
-
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // next may be div
- while (match.value == '++' || match.value == '--') {
- this.failignore('InvalidPostfixOperandObject', match, stack);
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- }
- } else if (match.value == 'function') { // function expression
- if (this.ast) { //#ifdef FULL_AST
- var oldstack = stack;
- stack = [];
- stack.desc = 'func expr';
- stack.isFunction = true;
- stack.nextBlack = match.tokposb;
- if (!this.scope.functions) this.scope.functions = [];
- match.functionId = this.scope.functions.length;
- this.scope.functions.push(match);
- oldstack.push(stack);
- var oldscope = this.scope;
- // add new scope
- match.scope = stack.scope = this.scope = [
- this.scope,
- {value:'this', isDeclared:true, isEcma:true, functionStack: stack},
- {value:'arguments', isDeclared:true, isEcma:true, varType:['Object']}
- ]; // add the current scope (to build chain up-down)
- this.scope.upper = oldscope;
- // ref to back to function that's the cause for this scope
- this.scope.scopeFor = match;
- match.targetScope = oldscope; // consistency
- match.isFuncExprKeyword = true;
- match.functionStack = stack;
- } //#endif
- var funcExprToken = match;
-
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (mayParseLabeledStatementInstead && match.value == ':') match = this.failsafe('LabelsMayNotBeReserved', match);
- if (match.name == 2/*IDENTIFIER*/) {
- funcExprToken.funcName = match;
- match.meta = "func expr name";
- match.varType = ['Function'];
- match.functionStack = stack; // ref to the stack, in case we detect the var being a constructor
- if (this.ast) { //#ifdef FULL_AST
- // name is only available to inner scope
- this.scope.push({value:match.value});
- } //#endif
- if (this.hashStartKeyOrReserved[match.value[0]] /*this.regexStartKeyOrReserved.test(match.value[0])*/ && this.regexIsKeywordOrReserved.test(match.value)) match = this.failsafe('FunctionNameMustNotBeReserved', match);
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- }
- match = this.eatFunctionParametersAndBody(match, stack, true, funcExprToken); // first token after func-expr is div
-
- while (match.value == '++' || match.value == '--') {
- this.failignore('InvalidPostfixOperandFunction', match, stack);
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- }
-
- if (this.ast) { //#ifdef FULL_AST
- // restore stack and scope
- stack = oldstack;
- this.scope = oldscope;
- } //#endif
- } else if (match.name <= 6 || match.name == 15/*TAG*/) { // IDENTIFIER STRING_SINGLE STRING_DOUBLE NUMERIC_HEX NUMERIC_DEC REG_EX or TAG
- // save it in case it turns out to be a label.
- var possibleLabel = match;
-
- // validate the identifier, if any
- if (match.name == 2/*IDENTIFIER*/) {
- if (
- // this, null, true, false are actually allowed here
- !this.regexLiteralKeywords.test(match.value) &&
- // other reserved words are not
- this.hashStartKeyOrReserved[match.value[0]] /*this.regexStartKeyOrReserved.test(match.value[0])*/ && this.regexIsKeywordOrReserved.test(match.value)
- ) {
- // if break/continue, we skipped the unary operator check so throw the proper error here
- if (isBreakOrContinueArg) {
- this.failignore('BreakOrContinueArgMustBeJustIdentifier', match, stack);
- } else if (match.value == 'else') {
- this.failignore('DidNotExpectElseHere', match, stack);
- } else {
- //if (mayParseLabeledStatementInstead) {new ZeParser.Error('LabelsMayNotBeReserved', match);
- // TOFIX: lookahead to see if colon is following. throw label error instead if that's the case
- // any forbidden keyword at this point is likely to be a statement start.
- // its likely that the parser will take a while to recover from this point...
- this.failignore('UnexpectedToken', match, stack);
- // TOFIX: maybe i should just return at this point. cut my losses and hope for the best.
- }
- }
-
- // only accept assignments after a member expression (identifier or ending with a [] suffix)
- acceptAssignment = true;
- } else if (isBreakOrContinueArg) match = this.failsafe('BreakOrContinueArgMustBeJustIdentifier', match);
-
- // the current match is the lead value being queried. tag it that way
- if (this.ast) { //#ifdef FULL_AST
- // dont mark labels
- if (!isBreakOrContinueArg) {
- match.meta = 'lead value';
- match.leadValue = true;
- }
- } //#endif
-
-
- // ok. gobble it.
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // division allowed
-
- // now check for labeled statement (if mayParseLabeledStatementInstead then the first token for this expression must be an (unreserved) identifier)
- if (mayParseLabeledStatementInstead && match.value == ':') {
- if (possibleLabel.name != 2/*IDENTIFIER*/) {
- // label was not an identifier
- // TOFIX: this colon might be a different type of error... more analysis required
- this.failignore('LabelsMayOnlyBeIdentifiers', match, stack);
- }
-
- mayParseLabeledStatementInstead = true; // mark label parsed (TOFIX:speed?)
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
-
- possibleLabel.isLabel = true;
- if (this.ast) { //#ifdef FULL_AST
- delete possibleLabel.meta; // oh oops, it's not a lead value.
-
- possibleLabel.isLabelDeclaration = true;
- this.statementLabels.push(possibleLabel.value);
-
- stack.desc = 'labeled statement';
- } //#endif
-
- var errorIdToReplace = this.errorStack.length;
- // eat another statement now, its the body of the labeled statement (like if and while)
- match = this.eatStatement(false, match, stack);
-
- // if no statement was found, check here now and correct error
- if (match.error && match.error.msg == ZeParser.Errors.UnableToParseStatement.msg) {
- // replace with better error...
- match.error = new ZeParser.Error('LabelRequiresStatement');
- // also replace on stack
- this.errorStack[errorIdToReplace] = match.error;
- }
-
- match.wasLabel = true;
-
- return match;
- }
-
- mayParseLabeledStatementInstead = false;
- } else if (match.value == '}') {
- // ignore... its certainly the end of this expression, but maybe asi can be applied...
- // it might also be an object literal expecting more, but that case has been covered else where.
- // if it turns out the } is bad after all, .parse() will try to recover
- } else if (match.name == 14/*error*/) {
- do {
- if (match.tokenError) {
- var pe = new ZeParser.Error('TokenizerError', match);
- pe.msg += ': '+match.error.msg;
- this.errorStack.push(pe);
-
- this.failSpecial({start:match.start,stop:match.start,name:14/*error*/,error:pe}, match, stack)
- }
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- } while (match.name == 14/*error*/);
- } else if (match.name == 12/*eof*/) {
- // cant parse any further. you're probably just typing...
- return match;
- } else {
- //if (!this.errorStack.length && match.name != 12/*eof*/) console.log(["unknown token", match, stack, Gui.escape(this.input)]);
- this.failignore('UnknownToken', match, stack);
- // we cant really ignore this. eat the token and try again. possibly you're just typing?
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- }
-
- // search for "value" suffix. property access and call parens.
- while (match.value == '.' || match.value == '[' || match.value == '(') {
- if (isBreakOrContinueArg) match = this.failsafe('BreakOrContinueArgMustBeJustIdentifier', match);
-
- if (match.value == '.') {
- // property access. read in an IdentifierName (no keyword checks). allow assignments
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.name != 2/*IDENTIFIER*/) this.failignore('PropertyNamesMayOnlyBeIdentifiers', match, stack);
- if (this.ast) { //#ifdef FULL_AST
- match.isPropertyName = true;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // may parse div
- acceptAssignment = true;
- } else if (match.value == '[') {
- if (this.ast) { //#ifdef FULL_AST
- var lhsb = match;
- match.propertyAccessStart = true;
- } //#endif
- // property access, read expression list. allow assignments
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) {
- if (match.value == ']') match = this.failsafe('SquareBracketsMayNotBeEmpty', match);
- else match = this.failsafe('SquareBracketExpectsExpression', match);
- }
- match = this.eatExpressions(false, match, stack);
- if (match.value != ']') match = this.failsafe('UnclosedSquareBrackets', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhsb;
- match.propertyAccessStop = true;
- lhsb.twin = match;
-
- if (stack[stack.length-1].desc == 'expressions') {
- // create ref to this expression group to the opening bracket
- lhsb.expressionArg = stack[stack.length-1];
- }
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // might be div
- acceptAssignment = true;
- } else if (match.value == '(') {
- if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- match.isCallExpressionStart = true;
- if (news) {
- match.parensBelongToNew = true;
- --news;
- }
- } //#endif
- // call expression, eat optional expression list, disallow assignments
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value)) match = this.eatExpressions(false, match, stack); // arguments are optional
- if (match.value != ')') match = this.failsafe('UnclosedCallParens', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhp;
- lhp.twin = match;
- match.isCallExpressionStop = true;
-
- if (stack[stack.length-1].desc == 'expressions') {
- // create ref to this expression group to the opening bracket
- lhp.expressionArg = stack[stack.length-1];
- }
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // might be div
- acceptAssignment = false;
- }
- }
-
- // check for postfix operators ++ and --
- // they are stronger than the + or - binary operators
- // they can be applied to any lhs (even when it wouldnt make sense)
- // if there was a newline, it should get an ASI
- if ((match.value == '++' || match.value == '--') && !match.newline) {
- if (isBreakOrContinueArg) match = this.failsafe('BreakOrContinueArgMustBeJustIdentifier', match);
- match = this.tokenizer.storeCurrentAndFetchNextToken(true, match, stack); // may parse div
- }
-
- if (this.ast) { //#ifdef FULL_AST
- // restore "expression" stack
- stack = estack;
- } //#endif
- // now see if there is an operator following...
-
- do { // this do allows us to parse multiple ternary expressions in succession without screwing up.
- var ternary = false;
- if (
- (!forHeader && match.value == 'in') || // one of two named binary operators, may not be first expression in for-header (when semi's occur in the for-header)
- (match.value == 'instanceof') || // only other named binary operator
- ((match.name == 11/*PUNCTUATOR*/) && // we can only expect a punctuator now
- (match.isAssignment = this.regexAssignments.test(match.value)) || // assignments are only okay with proper lhs
- this.regexNonAssignmentBinaryExpressionOperators.test(match.value) // test all other binary operators
- )
- ) {
- if (match.isAssignment) {
- if (!acceptAssignment) this.failignore('IllegalLhsForAssignment', match, stack);
- else if (parsedNonAssignmentOperator) this.failignore('AssignmentNotAllowedAfterNonAssignmentInExpression', match, stack);
- }
- if (isBreakOrContinueArg) match = this.failsafe('BreakOrContinueArgMustBeJustIdentifier', match);
-
- if (!match.isAssignment) parsedNonAssignmentOperator = true; // last allowed assignment
- if (this.ast) { //#ifdef FULL_AST
- match.isBinaryOperator = true;
- // we build a stack to ensure any whitespace doesnt break the 1+(n*2) children rule for expressions
- var ostack = stack;
- stack = [];
- stack.desc = 'operator-expression';
- stack.isBinaryOperator = true;
- stack.sub = match.value;
- stack.nextBlack = match.tokposb;
- ostack.sub = match.value;
- stack.isAssignment = match.isAssignment;
- ostack.push(stack);
- } //#endif
- ternary = match.value == '?';
- // math, logic, assignment or in or instanceof
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
-
- if (this.ast) { //#ifdef FULL_AST
- // restore "expression" stack
- stack = ostack;
- } //#endif
-
- // minor exception to ternary operator, we need to parse two expressions nao. leave the trailing expression to the loop.
- if (ternary) {
- // LogicalORExpression "?" AssignmentExpression ":" AssignmentExpression
- // so that means just one expression center and right.
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) this.failignore('InvalidCenterTernaryExpression', match, stack);
- match = this.eatExpressions(false, match, stack, true, forHeader); // only one expression allowed inside ternary center/right
-
- if (match.value != ':') {
- if (match.value == ',') match = this.failsafe('TernarySecondExpressionCanNotContainComma', match);
- else match = this.failsafe('UnfinishedTernaryOperator', match);
- }
- if (this.ast) { //#ifdef FULL_AST
- // we build a stack to ensure any whitespace doesnt break the 1+(n*2) children rule for expressions
- var ostack = stack;
- stack = [];
- stack.desc = 'operator-expression';
- stack.sub = match.value;
- stack.nextBlack = match.tokposb;
- ostack.sub = match.value;
- stack.isAssignment = match.isAssignment;
- ostack.push(stack);
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (this.ast) { //#ifdef FULL_AST
- stack = ostack;
- } //#endif
- // rhs of the ternary can not contain a comma either
- match = this.eatExpressions(false, match, stack, true, forHeader); // only one expression allowed inside ternary center/right
- }
- } else {
- parseAnotherExpression = false;
- }
- } while (ternary); // if we just parsed a ternary expression, we need to check _again_ whether the next token is a binary operator.
-
- // start over. match is the rhs for the lhs we just parsed, but lhs for the next expression
- if (parseAnotherExpression && !(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) {
- // no idea what to do now. lets just ignore and see where it ends. TOFIX: maybe just break the loop or return?
- this.failignore('InvalidRhsExpression', match, stack);
- }
- }
-
- if (this.ast) { //#ifdef FULL_AST
- // restore "expressions" stack
- stack = astack;
- } //#endif
-
- // at this point we should have parsed one AssignmentExpression
- // lets see if we can parse another one...
- mayParseLabeledStatementInstead = first = false;
- } while (!onlyOne && match.value == ',');
-
- if (this.ast) { //#ifdef FULL_AST
- // remove empty array
- if (!stack.length) pstack.length = pstack.length-1;
- pstack.numberOfExpressions = parsedExpressions;
- if (pstack[0]) pstack[0].numberOfExpressions = parsedExpressions;
- stack.expressionCount = parsedExpressions;
- } //#endif
- return match;
- },
- eatFunctionDeclaration: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- var prevscope = this.scope;
- stack.desc = 'func decl';
- stack.isFunction = true;
- stack.nextBlack = match.tokposb;
- if (!this.scope.functions) this.scope.functions = [];
- match.functionId = this.scope.functions.length;
- this.scope.functions.push(match);
- // add new scope
- match.scope = stack.scope = this.scope = [
- this.scope, // add current scope (build scope chain up-down)
- // Object.create(null,
- {value:'this', isDeclared:true, isEcma:true, functionStack:stack},
- // Object.create(null,
- {value:'arguments', isDeclared:true, isEcma:true, varType:['Object']}
- ];
- // ref to back to function that's the cause for this scope
- this.scope.scopeFor = match;
- match.targetScope = prevscope; // consistency
-
- match.functionStack = stack;
-
- match.isFuncDeclKeyword = true;
- } //#endif
- // only place that this function is used already checks whether next token is function
- var functionKeyword = match;
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.name != 2/*IDENTIFIER*/) match = this.failsafe('FunctionDeclarationsMustHaveName', match);
- if (this.hashStartKeyOrReserved[match.value[0]] /*this.regexStartKeyOrReserved.test(match.value[0])*/ && this.regexIsKeywordOrReserved.test(match.value)) this.failignore('FunctionNameMayNotBeReserved', match, stack);
- if (this.ast) { //#ifdef FULL_AST
- functionKeyword.funcName = match;
- prevscope.push({value:match.value});
- match.meta = 'func decl name'; // that's what it is, really
- match.varType = ['Function'];
- match.functionStack = stack;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatFunctionParametersAndBody(match, stack, false, functionKeyword); // first token after func-decl is regex
- if (this.ast) { //#ifdef FULL_AST
- // restore previous scope
- this.scope = prevscope;
- } //#endif
- return match;
- },
- eatObjectLiteralColonAndBody: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- var propValueStack = stack;
- stack = [];
- stack.desc = 'objlit pair colon';
- stack.nextBlack = match.tokposb;
- propValueStack.push(stack);
- } //#endif
- if (match.value != ':') match = this.failsafe('ObjectLiteralExpectsColonAfterName', match);
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (this.ast) { //#ifdef FULL_AST
- stack = propValueStack;
- } //#endif
-
- // this might actually fail due to ASI optimization.
- // if the property name does not exist and it is the last item
- // of the objlit, the expression parser will see an unexpected
- // } and ignore it, giving some leeway to apply ASI. of course,
- // that doesnt work for objlits. but we dont want to break the
- // existing mechanisms. so we check this differently... :)
- var prevMatch = match;
- match = this.eatExpressions(false, match, stack, true); // only one expression
- if (match == prevMatch) match = this.failsafe('ObjectLiteralMissingPropertyValue', match);
-
- return match;
- },
- eatFunctionParametersAndBody: function(match, stack, div, funcToken){
- // div: the first token _after_ a function expression may be a division...
- if (match.value != '(') match = this.failsafe('ExpectingFunctionHeaderStart', match);
- else if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- funcToken.lhp = match;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.name == 2/*IDENTIFIER*/) { // params
- if (this.hashStartKeyOrReserved[match.value[0]] /*this.regexStartKeyOrReserved.test(match.value[0])*/ && this.regexIsKeywordOrReserved.test(match.value)) this.failignore('FunctionArgumentsCanNotBeReserved', match, stack);
- if (this.ast) { //#ifdef FULL_AST
- if (!funcToken.paramNames) funcToken.paramNames = [];
- stack.paramNames = funcToken.paramNames;
- funcToken.paramNames.push(match);
- this.scope.push({value:match.value}); // add param name to scope
- match.meta = 'parameter';
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- while (match.value == ',') {
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.name != 2/*IDENTIFIER*/) {
- // example: if name is 12, the source is incomplete...
- this.failignore('FunctionParametersMustBeIdentifiers', match, stack);
- } else if (this.hashStartKeyOrReserved[match.value[0]] /*this.regexStartKeyOrReserved.test(match.value[0])*/ && this.regexIsKeywordOrReserved.test(match.value)) {
- this.failignore('FunctionArgumentsCanNotBeReserved', match, stack);
- }
- if (this.ast) { //#ifdef FULL_AST
- // Object.create(null,
- this.scope.push({value:match.value}); // add param name to scope
- match.meta = 'parameter';
- if (match.name == 2/*IDENTIFIER*/) funcToken.paramNames.push(match);
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- }
- }
- if (this.ast) { //#ifdef FULL_AST
- if (lhp) {
- match.twin = lhp;
- lhp.twin = match;
- funcToken.rhp = match;
- }
- } //#endif
- if (match.value != ')') match = this.failsafe('ExpectedFunctionHeaderClose', match); // TOFIX: can be various things here...
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatFunctionBody(match, stack, div, funcToken);
- return match;
- },
- eatFunctionBody: function(match, stack, div, funcToken){
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- stack.desc = 'func body';
- stack.nextBlack = match.tokposb;
-
- // create EMPTY list of functions. labels cannot cross function boundaries
- var labelBackup = this.statementLabels;
- this.statementLabels = [];
- stack.labels = this.statementLabels;
- } //#endif
-
- // if div, a division can occur _after_ this function expression
- //this.stats.eatFunctionBody = (+//this.stats.eatFunctionBody||0)+1;
- if (match.value != '{') match = this.failsafe('ExpectedFunctionBodyCurlyOpen', match);
- if (this.ast) { //#ifdef FULL_AST
- var lhc = match;
- if (funcToken) funcToken.lhc = lhc;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatSourceElements(match, stack);
- if (match.value != '}') match = this.failsafe('ExpectedFunctionBodyCurlyClose', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhc;
- lhc.twin = match;
- if (funcToken) funcToken.rhc = match;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(div, match, stack);
-
- if (this.ast) { //#ifdef FULL_AST
- // restore label set
- this.statementLabels = labelBackup;
- } //#endif
-
- return match;
- },
- eatVar: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- stack.desc = 'statement';
- stack.sub = 'var';
- stack.nextBlack = match.tokposb;
- match.stack = stack;
- match.isVarKeyword = true;
- } //#endif
- match = this.eatVarDecl(match, stack);
- match = this.eatSemiColon(match, stack);
-
- return match;
- },
- eatVarDecl: function(match, stack, forHeader){
- // assumes match is indeed the identifier 'var'
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- stack.desc = 'var decl';
- stack.nextBlack = match.tokposb;
-
- var targetScope = this.scope;
- while (targetScope.catchScope) targetScope = targetScope[0];
- } //#endif
- var first = true;
- var varsDeclared = 0;
- do {
- ++varsDeclared;
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack); // start: var, iteration: comma
- if (this.ast) { //#ifdef FULL_AST
- var declStack = stack;
- var stack = [];
- stack.desc = 'single var decl';
- stack.varStack = declStack; // reference to the var statement stack, it might hook to jsdoc needed for these vars
- stack.nextBlack = match.tokposb;
- declStack.push(stack);
-
- var singleDecStack = stack;
- stack = [];
- stack.desc = 'sub-expression';
- stack.nextBlack = match.tokposb;
- singleDecStack.push(stack);
- } //#endif
-
- // next token should be a valid identifier
- if (match.name == 12/*eof*/) {
- if (first) match = this.failsafe('VarKeywordMissingName', match);
- // else, ignore. TOFIX: return?
- else match = this.failsafe('IllegalTrailingComma', match);
- } else if (match.name != 2/*IDENTIFIER*/) {
- match = this.failsafe('VarNamesMayOnlyBeIdentifiers', match);
- } else if (this.hashStartKeyOrReserved[match.value[0]] /*this.regexStartKeyOrReserved.test(match.value[0])*/ && this.regexIsKeywordOrReserved.test(match.value)) {
- match = this.failsafe('VarNamesCanNotBeReserved', match);
- }
- // mark the match as being a variable name. we need it for lookup later :)
- if (this.ast) { //#ifdef FULL_AST
- match.meta = 'var name';
- targetScope.push({value:match.value});
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
-
- if (this.ast) { //#ifdef FULL_AST
- stack = singleDecStack;
- } //#endif
-
- // next token should either be a = , or ;
- // if = parse an expression and optionally a comma
- if (match.value == '=') {
- if (this.ast) { //#ifdef FULL_AST
- singleDecStack = stack;
- stack = [];
- stack.desc = 'operator-expression';
- stack.sub = '=';
- stack.nextBlack = match.tokposb;
- singleDecStack.push(stack);
-
- stack.isAssignment = true;
- } //#endif
- match.isInitialiser = true;
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (this.ast) { //#ifdef FULL_AST
- stack = singleDecStack;
- } //#endif
-
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || match.name == 14/*error*/ || this.regexLhsStart.test(match.value))) match = this.failsafe('VarInitialiserExpressionExpected', match);
- match = this.eatExpressions(false, match, stack, true, forHeader); // only one expression
- // var statement: comma or semi now
- // for statement: semi, comma or 'in'
- }
- if (this.ast) { //#ifdef FULL_AST
- stack = declStack;
- } //#endif
-
- // determines proper error message in one case
- first = false;
- // keep parsing name(=expression) sequences as long as you see a comma here
- } while (match.value == ',');
-
- if (this.ast) { //#ifdef FULL_AST
- stack.varsDeclared = varsDeclared;
- } //#endif
-
- return match;
- },
-
- eatIf: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- stack.desc = 'statement';
- stack.sub = 'if';
- stack.hasElse = false;
- stack.nextBlack = match.tokposb;
- } //#endif
- // (
- // expression
- // )
- // statement
- // [else statement]
- var ifKeyword = match;
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != '(') match = this.failsafe('ExpectedStatementHeaderOpen', match);
- if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- match.statementHeaderStart = true;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) match = this.failsafe('StatementHeaderIsNotOptional', match);
- match = this.eatExpressions(false, match, stack);
- if (match.value != ')') match = this.failsafe('ExpectedStatementHeaderClose', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhp;
- match.statementHeaderStop = true;
- lhp.twin = match;
-
- if (stack[stack.length-1].desc == 'expressions') {
- // create ref to this expression group to the opening bracket
- lhp.expressionArg = stack[stack.length-1];
- }
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatStatement(false, match, stack);
-
- // match might be null here... (if the if-statement was end part of the source)
- if (match && match.value == 'else') {
- if (this.ast) { //#ifdef FULL_AST
- ifKeyword.hasElse = match;
- } //#endif
- match = this.eatElse(match, stack);
- }
-
- return match;
- },
- eatElse: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- stack.hasElse = true;
- stack.push(stack = []);
- stack.desc = 'statement';
- stack.sub = 'else';
- stack.nextBlack = match.tokposb;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatStatement(false, match, stack);
-
- return match;
- },
- eatDo: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- stack.desc = 'statement';
- stack.sub = 'do';
- stack.isIteration = true;
- stack.nextBlack = match.tokposb;
- this.statementLabels.push(''); // add "empty"
- var doToken = match;
- } //#endif
- // statement
- // while
- // (
- // expression
- // )
- // semi-colon
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatStatement(false, match, stack);
- if (match.value != 'while') match = this.failsafe('DoShouldBeFollowedByWhile', match);
- if (this.ast) { //#ifdef FULL_AST
- match.hasDo = doToken;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != '(') match = this.failsafe('ExpectedStatementHeaderOpen', match);
- if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- match.statementHeaderStart = true;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) match = this.failsafe('StatementHeaderIsNotOptional', match);
- match = this.eatExpressions(false, match, stack);
- if (match.value != ')') match = this.failsafe('ExpectedStatementHeaderClose', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhp;
- match.statementHeaderStop = true;
- match.isForDoWhile = true; // prevents missing block warnings
- lhp.twin = match;
-
- if (stack[stack.length-1].desc == 'expressions') {
- // create ref to this expression group to the opening bracket
- lhp.expressionArg = stack[stack.length-1];
- }
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatSemiColon(match, stack); // TOFIX: this is not optional according to the spec, but browsers apply ASI anyways
-
- return match;
- },
- eatWhile: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- stack.desc = 'statement';
- stack.sub = 'while';
- stack.isIteration = true;
- stack.nextBlack = match.tokposb;
- this.statementLabels.push(''); // add "empty"
- } //#endif
-
- // (
- // expression
- // )
- // statement
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != '(') match = this.failsafe('ExpectedStatementHeaderOpen', match);
- if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- match.statementHeaderStart = true;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (!(/*is left hand side start?*/ match.name <= 6 || match.name == 15/*TAG*/ || this.regexLhsStart.test(match.value))) match = this.failsafe('StatementHeaderIsNotOptional', match);
- match = this.eatExpressions(false, match, stack);
- if (match.value != ')') match = this.failsafe('ExpectedStatementHeaderClose', match);
- if (this.ast) { //#ifdef FULL_AST
- match.twin = lhp;
- match.statementHeaderStop = true;
- lhp.twin = match;
-
- if (stack[stack.length-1].desc == 'expressions') {
- // create ref to this expression group to the opening bracket
- lhp.expressionArg = stack[stack.length-1];
- }
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- match = this.eatStatement(false, match, stack);
-
- return match;
- },
-
- eatFor: function(match, stack){
- if (this.ast) { //#ifdef FULL_AST
- stack.push(stack = []);
- stack.desc = 'statement';
- stack.sub = 'for';
- stack.isIteration = true;
- stack.nextBlack = match.tokposb;
- this.statementLabels.push(''); // add "empty"
- } //#endif
- // either a for(..in..) or for(..;..;..)
- // start eating an expression but refuse to parse
- // 'in' on the top-level of that expression. they are fine
- // in sub-levels (group, array, etc). Now the expression
- // must be followed by either ';' or 'in'. Else throw.
- // Branch on that case, ; requires two.
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
- if (match.value != '(') match = this.failsafe('ExpectedStatementHeaderOpen', match);
- if (this.ast) { //#ifdef FULL_AST
- var lhp = match;
- match.statementHeaderStart = true;
- match.forHeaderStart = true;
- } //#endif
- match = this.tokenizer.storeCurrentAndFetchNextToken(false, match, stack);
-
- // for (either case) may start with var, in which case you'll parse a var declaration before encountering the 'in' or first semi.
- if (match.value == 'var') {
- match = this.eatVarDecl(match, stack, true);
- } else if (match.value != ';') { // expressions are optional in for-each