Permalink
Browse files

better token

  • Loading branch information...
1 parent 7d99cb9 commit abd6938e5d64fa802f1c04e0e8b60ea6bdd39317 @elmerbulthuis committed Nov 2, 2011
Showing with 37 additions and 55 deletions.
  1. +27 −27 index.js
  2. +1 −1 package.json
  3. +0 −18 test/html.html
  4. +2 −2 test/js.js
  5. +1 −1 test/switching.js
  6. +6 −6 test/tag.js
View
@@ -61,7 +61,7 @@ module.exports = function(tokenCallback, expressionSet, options) {
/**
matches in the current buffer. Serves as a cache.
*/
- var matchSet = {};
+ var tokenSet = {};
/**
What categories should we look for?
@@ -95,7 +95,7 @@ module.exports = function(tokenCallback, expressionSet, options) {
flush(0);
tokenCallback.call(tokenizer, null, buffer);
buffer = '';
- matchSet = {};
+ tokenSet = {};
}
/**
@@ -108,7 +108,7 @@ module.exports = function(tokenCallback, expressionSet, options) {
/*
the offset to which we are going to flush.
*/
- var offset = token.match.index + token.match[0].length;
+ var offset = token.index + token[0].length;
/*
trim the buffer.
@@ -118,20 +118,20 @@ module.exports = function(tokenCallback, expressionSet, options) {
/*
sync the cache with the buffer.
*/
- for(var category in matchSet) {
- var match = matchSet[category];
+ for(var category in tokenSet) {
+ var token = tokenSet[category];
/*
when the index of the cached match is before the
offset, it is trimmed off! so remove it from the
cache
*/
- if(match.index < offset) delete matchSet[category];
+ if(token.index < offset) delete tokenSet[category];
/*
if the cached match is not before the offset, it is
still in the buffer but if moved a litte to the
beginning. So adjust the index.
*/
- else match.index -= offset;
+ else token.index -= offset;
}
}
@@ -141,17 +141,17 @@ module.exports = function(tokenCallback, expressionSet, options) {
finds the next token in the buffer.
*/
function nextToken() {
- var token = null;
+ var foundToken = null;
categoryList.forEach(function(category) {
var expression = expressionSet[category];
/*
- look for a cached match
+ look for a cached token
*/
- var match = matchSet[category];
+ var token = tokenSet[category];
/*
- if there is no cached match
+ if there is no cached token
*/
- if(!match) {
+ if(!token) {
/*
if expression is a string we are just going
to look for the string.
@@ -160,47 +160,47 @@ module.exports = function(tokenCallback, expressionSet, options) {
var index = buffer.indexOf(expression);
/*
if we found the string (remember, ~-1 == 0) then
- mimic the match object returned by RegExp
+ create a token object
*/
if(~index) {
- match = extend([
+ token = extend([
expression
], {
index: index
+ , category: category
});
}
}
/*
- if it's not a string, it should be a RegExp. Just execute
- it.
+ if it's not a string, it must be a RegExp. Execute it
+ and make it a token
*/
else {
- match = expression.exec(buffer);
+ var match = expression.exec(buffer);
+ if(match) {
+ token = extend(match, {
+ category: category
+ });
+ }
}
/*
if there is a match, cache it!
*/
- if(match) matchSet[category] = match;
+ if(token) tokenSet[category] = token;
}
/*
if there is no token or, if there is a token and it is before
the current token.
*/
- if (match && (!token || match.index < token.match.index)) {
- /*
- define a new token.
- */
- token = {
- category: category
- , match: match
- };
+ if (token && (!foundToken || token.index < foundToken.index)) {
+ foundToken = token;
}
});
/*
if there is no token found, this will return null
*/
- return token;
+ return foundToken;
}
/**
View
@@ -1,7 +1,7 @@
{
"name": "2kenizer"
, "description": "efficient tokenizer"
- , "version": "0.0.1"
+ , "version": "0.0.2"
, "author": "Elmer Bulthuis <elmerbulthuis@gmail.com>"
, "repositories": [{
"type": "git",
View
@@ -1,18 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-<title></title>
-</head>
-<body>
-
-<p>
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed nec sapien varius urna aliquet interdum. Nulla ultricies magna ac purus tincidunt vitae accumsan risus feugiat. Pellentesque lobortis viverra volutpat. Nullam mattis mollis lorem, tincidunt tempor massa rhoncus non. Integer luctus felis at ante scelerisque sed fermentum dolor volutpat. In ut ante at arcu rutrum viverra at non lectus. Aliquam erat volutpat. Curabitur nec risus vel dolor eleifend semper quis ut nisl. Proin neque nibh, malesuada et adipiscing eget, imperdiet sed risus. Curabitur malesuada ullamcorper tortor, quis pulvinar tellus pellentesque eget. Curabitur non sem neque, ut adipiscing odio. Donec sollicitudin feugiat lectus, eu porttitor urna faucibus id. In interdum pharetra semper. Cras in nisi accumsan dui auctor rhoncus.
-</p>
-
-<p>
-Morbi auctor elit nec libero pretium eleifend non non diam. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Fusce accumsan vehicula neque et bibendum. Maecenas laoreet arcu sit amet elit convallis a dapibus turpis tincidunt. Curabitur quis ligula vel orci varius pretium. Sed imperdiet scelerisque arcu vitae placerat. Aenean facilisis nisl sed risus consectetur ac congue diam hendrerit. Fusce imperdiet turpis sapien. Sed erat diam, accumsan sed congue ac, varius eu massa. Sed consequat mauris nec neque fringilla rhoncus. Nam id imperdiet justo. Ut in est non enim ornare varius quis vel sem. Nunc commodo elit dignissim felis elementum mollis.
-</p>
-
-</body>
-</html>
-
View
@@ -6,7 +6,7 @@ function parse(data) {
var tokenizer = new Tokenizer(function(token, buffer) {
if(!token) return;
- //console.log(token.category, token.match[0]);
+ //console.log(token.category, token[0]);
if(token.category in tokenActions) tokenActions[token.category](token, buffer);
else throw "invalid category '" + token.category + "' in this context";
@@ -114,7 +114,7 @@ function testFile(filePath, options) {
return;
}
- //console.log('[' + match[3] + ']');
+ console.log('[' + match[3] + ']');
try {
parse(fs.readFileSync(match[0], 'utf-8'));
}
View
@@ -7,7 +7,7 @@ var tokenizer = new Tokenizer(function(token, buffer) {
switch(token.category) {
case 'A':
- if(token.match[0] == 'yz') {
+ if(token[0] == 'yz') {
tokenizer.filter('0');
}
break;
View
@@ -11,11 +11,11 @@ function parse(data) {
var tokenizer = new Tokenizer(function(token, buffer) {
if(!token) return;
- for(var index = buffer.indexOf(newline); ~index && index < token.match.index; index = buffer.indexOf(newline, index + newline.length)) {
+ for(var index = buffer.indexOf(newline); ~index && index < token.index; index = buffer.indexOf(newline, index + newline.length)) {
line++;
}
- //console.log(token.match[0], token.category);
+ //console.log(token[0], token.category);
if(token.category in tokenActions) tokenActions[token.category](token, buffer);
else throw "invalid category '" + token.category + "' in this context";
@@ -50,14 +50,14 @@ function parse(data) {
, "tag": function(token, buffer) {
enterContext({
category: token.category
- , tag: token.match[1]
+ , tag: token[1]
, filter: ["tag1"]
});
}
, "tag1": function(token, buffer) {
var tag = currentContext.tag;
exitContext();
- if(token.match[1] != "/" && !~voidTags.indexOf(tag.toLowerCase())) {
+ if(token[1] != "/" && !~voidTags.indexOf(tag.toLowerCase())) {
enterContext({
category: token.category
, tag: tag
@@ -68,7 +68,7 @@ function parse(data) {
, "tag2": function(token, buffer) {
var tag = currentContext.tag;
- assert.equal(tag, token.match[1], "<" + tag + "> at line " + line + " should be closed before closing <" + token.match[1] + ">");
+ assert.equal(tag, token[1], "<" + tag + "> at line " + line + " should be closed before closing <" + token[1] + ">");
exitContext();
}
@@ -104,7 +104,7 @@ function testFile(filePath, options) {
return;
}
- //console.log('[' + match[3] + ']');
+ console.log('[' + match[3] + ']');
try {
parse(fs.readFileSync(match[0], 'utf-8'));
}

0 comments on commit abd6938

Please sign in to comment.