Permalink
Browse files

first commit

  • Loading branch information...
0 parents commit 7d99cb90eb0c568b421ecc2e13c9bd3788e0a53d @elmerbulthuis committed Oct 31, 2011
Showing with 589 additions and 0 deletions.
  1. +237 −0 index.js
  2. +17 −0 package.json
  3. +19 −0 readme.markdown
  4. +18 −0 test/html.html
  5. +130 −0 test/js.js
  6. +19 −0 test/simple.js
  7. +29 −0 test/switching.js
  8. +120 −0 test/tag.js
237 index.js
@@ -0,0 +1,237 @@
+/*!
+ * 2kenizer
+ * Copyright(c) 2011 Elmer Bulthuis <elmerbulthuis@gmail.com>
+ * MIT Licensed
+ */
+
+
+/**
+add members of the second and following arguments to the first argument
+*/
+function extend(o) {
+ var argumentCount = arguments.length;
+ for (var argumentIndex = 1; argumentIndex < argumentCount; argumentIndex++) {
+ var argument = arguments[argumentIndex];
+ if(!argument) continue;
+ for (var argumentKey in argument) {
+ o[argumentKey] = argument[argumentKey];
+ }
+ }
+ return o;
+}
+
+/**
+Transforms an array-like object (like arguments) into an array
+*/
+function toArray(list) {
+ var result = [];
+ var count = list.length;
+ for(var index = 0; index < count; index++) {
+ result.push(list[index]);
+ }
+ return result;
+}
+
+/**
+takes all elements from list and puts them in another list (result)
+when one of the elements in list is an array, concat it to the result.
+*/
+function flatten(list, result) {
+ if(!result) result = [];
+ list.forEach(function(item) {
+ if(Array.isArray(item)) flatten(item, result);
+ else result.push(item);
+ });
+ return result;
+}
+
+
+/**
+Efficient tokenizer. Calls tokenCallback on every token
+found, looks for categories in expressionSet.
+*/
+module.exports = function(tokenCallback, expressionSet, options) {
+ var options = extend({bufferSize: 1024, bufferLimit: 4096}, options);
+ var tokenizer = this;
+
+ /**
+ */
+ var buffer = '';
+
+ /**
+ matches in the current buffer. Serves as a cache.
+ */
+ var matchSet = {};
+
+ /**
+ What categories should we look for?
+ */
+ var categoryList;
+
+
+ /**
+ write data to the buffer and process it.
+ */
+ function write() {
+ flatten(toArray(arguments)).forEach(function(argument) {
+ while(argument.length > 0) {
+ var chunk = argument.substring(0, options.bufferLimit);
+ argument = argument.substring(chunk.length);
+ buffer += chunk;
+ if(buffer.length > options.bufferLimit) {
+ flush(options.bufferSize);
+ }
+ }
+
+ });
+ }
+
+ /**
+ clear the buffer and flush the buffer by calling tokenCallback
+ without a token.
+ */
+ function end() {
+ write.apply(tokenizer, arguments);
+ flush(0);
+ tokenCallback.call(tokenizer, null, buffer);
+ buffer = '';
+ matchSet = {};
+ }
+
+ /**
+ process data in the buffer.
+ */
+ function flush(bufferSize) {
+ var token;
+ while(buffer.length > bufferSize && (token = nextToken())) {
+ tokenCallback.call(tokenizer, token, buffer);
+ /*
+ the offset to which we are going to flush.
+ */
+ var offset = token.match.index + token.match[0].length;
+
+ /*
+ trim the buffer.
+ */
+ buffer = buffer.substr(offset);
+
+ /*
+ sync the cache with the buffer.
+ */
+ for(var category in matchSet) {
+ var match = matchSet[category];
+ /*
+ when the index of the cached match is before the
+ offset, it is trimmed off! so remove it from the
+ cache
+ */
+ if(match.index < offset) delete matchSet[category];
+ /*
+ if the cached match is not before the offset, it is
+ still in the buffer but if moved a litte to the
+ beginning. So adjust the index.
+ */
+ else match.index -= offset;
+ }
+
+ }
+ }
+
+ /**
+ finds the next token in the buffer.
+ */
+ function nextToken() {
+ var token = null;
+ categoryList.forEach(function(category) {
+ var expression = expressionSet[category];
+ /*
+ look for a cached match
+ */
+ var match = matchSet[category];
+ /*
+ if there is no cached match
+ */
+ if(!match) {
+ /*
+ if expression is a string we are just going
+ to look for the string.
+ */
+ if(typeof expression == 'string') {
+ var index = buffer.indexOf(expression);
+ /*
+ if we found the string (remember, ~-1 == 0) then
+ mimic the match object returned by RegExp
+ */
+ if(~index) {
+ match = extend([
+ expression
+ ], {
+ index: index
+ });
+ }
+ }
+ /*
+ if it's not a string, it should be a RegExp. Just execute
+ it.
+ */
+ else {
+ match = expression.exec(buffer);
+ }
+ /*
+ if there is a match, cache it!
+ */
+ if(match) matchSet[category] = match;
+ }
+
+ /*
+ if there is no token or, if there is a token and it is before
+ the current token.
+ */
+ if (match && (!token || match.index < token.match.index)) {
+ /*
+ define a new token.
+ */
+ token = {
+ category: category
+ , match: match
+ };
+ }
+ });
+ /*
+ if there is no token found, this will return null
+ */
+ return token;
+ }
+
+ /**
+ specify what categories to look for.
+ */
+ function filter() {
+ categoryList = flatten(toArray(arguments));
+ /*
+ when there are no arguments, match any category there is.
+ */
+ if(categoryList.length == 0) {
+ for(var category in expressionSet) {
+ categoryList.push(category);
+ }
+ }
+ }
+
+ /*
+ initialize empty filter (find any category in expressionSet)
+ */
+ filter();
+
+ /*
+ exporting methods
+ */
+ extend(tokenizer, {
+ end: end
+ , write: write
+ , filter: filter
+ });
+
+}
+;
+
@@ -0,0 +1,17 @@
+{
+ "name": "2kenizer"
+ , "description": "efficient tokenizer"
+ , "version": "0.0.1"
+ , "author": "Elmer Bulthuis <elmerbulthuis@gmail.com>"
+ , "repositories": [{
+ "type": "git",
+ "url": "git@github.com:LuvDaSun/2kenizer.git"
+ }]
+ , "main": "index.js"
+ , "dependencies": {
+ }
+ , "devDependencies": {
+ }
+
+}
+
@@ -0,0 +1,19 @@
+# 2kenizer
+
+Efficient tokenizer, used by the JsHtml view engine.
+
+
+## Installation
+
+ npm install 2kenizer
+
+
+## License
+
+Copyright (c) 2011 Elmer Bulthuis <elmerbulthuis@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title></title>
+</head>
+<body>
+
+<p>
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed nec sapien varius urna aliquet interdum. Nulla ultricies magna ac purus tincidunt vitae accumsan risus feugiat. Pellentesque lobortis viverra volutpat. Nullam mattis mollis lorem, tincidunt tempor massa rhoncus non. Integer luctus felis at ante scelerisque sed fermentum dolor volutpat. In ut ante at arcu rutrum viverra at non lectus. Aliquam erat volutpat. Curabitur nec risus vel dolor eleifend semper quis ut nisl. Proin neque nibh, malesuada et adipiscing eget, imperdiet sed risus. Curabitur malesuada ullamcorper tortor, quis pulvinar tellus pellentesque eget. Curabitur non sem neque, ut adipiscing odio. Donec sollicitudin feugiat lectus, eu porttitor urna faucibus id. In interdum pharetra semper. Cras in nisi accumsan dui auctor rhoncus.
+</p>
+
+<p>
+Morbi auctor elit nec libero pretium eleifend non non diam. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Fusce accumsan vehicula neque et bibendum. Maecenas laoreet arcu sit amet elit convallis a dapibus turpis tincidunt. Curabitur quis ligula vel orci varius pretium. Sed imperdiet scelerisque arcu vitae placerat. Aenean facilisis nisl sed risus consectetur ac congue diam hendrerit. Fusce imperdiet turpis sapien. Sed erat diam, accumsan sed congue ac, varius eu massa. Sed consequat mauris nec neque fringilla rhoncus. Nam id imperdiet justo. Ut in est non enim ornare varius quis vel sem. Nunc commodo elit dignissim felis elementum mollis.
+</p>
+
+</body>
+</html>
+
Oops, something went wrong.

0 comments on commit 7d99cb9

Please sign in to comment.