Skip to content

Commit

Permalink
Support for unicode identifiers, range tested by a metacircular test.
Browse files Browse the repository at this point in the history
  • Loading branch information
Gregor Richards authored and Gregor Richards committed Apr 20, 2011
1 parent b0cc62c commit 18391f6
Showing 1 changed file with 24 additions and 3 deletions.
27 changes: 24 additions & 3 deletions lib/jslex.js
Expand Up @@ -361,14 +361,16 @@ Narcissus.lexer = (function() {
},

// FIXME: Unicode escape sequences
// FIXME: Unicode identifiers
lexIdent: function (ch) {
var token = this.token, input = this.source;

/* check for ASCII sequences, and only use expensive
* isValidIdentifierChar for non-ASCII */
do {
ch = input[this.cursor++];
} while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') || ch === '$' || ch === '_');
(ch >= '0' && ch <= '9') || ch === '$' || ch === '_' ||
(ch >= '\u007F' && isValidIdentifierChar(ch, false)));

this.cursor--; // Put the non-word character back.

Expand Down Expand Up @@ -408,7 +410,8 @@ Narcissus.lexer = (function() {
token.lineno = this.lineno;

var ch = input[this.cursor++];
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '$' || ch === '_') {
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '$' || ch === '_' ||
(ch >= '\u007F' && isValidIdentifierChar(ch, true))) {
this.lexIdent(ch);
} else if (scanOperand && ch === '/') {
this.lexRegExp(ch);
Expand Down Expand Up @@ -454,6 +457,24 @@ Narcissus.lexer = (function() {
},
};

/* is this a valid identifier character? Since JS doesn't expose a
* convenient way to determine if a a character is in a particular Unicode
* category, we use metacircularity to accomplish this (oh yeaaaah!). If ch
* isn't guaranteed to be one character, this can be VERY unsafe */
function isValidIdentifierChar(ch, first) {
// create an object to test this in
var x = {};
x["x"+ch] = true;
x[ch] = true;

// then try
try {
return (eval("(x." + (first?"":"x") + ch + ")") ? true : false);
} catch (ex) {
return false;
}
}

return { Tokenizer: Tokenizer };

}());

0 comments on commit 18391f6

Please sign in to comment.