Permalink
Browse files

Pulling in the initial work and tests.

  • Loading branch information...
0 parents commit e2a9c45b70dfe0c06ed039265591498eb3ecb48c @jeresig committed Mar 16, 2011
Showing with 178,725 additions and 0 deletions.
  1. +18 −0 README.txt
  2. +110 −0 build-trie.js
  3. +1 −0 dict/optimized.js
  4. +178,385 −0 dict/ospd4.txt
  5. +1 −0 dict/simple.js
  6. +1 −0 dict/string.txt
  7. +1 −0 dict/suffix.js
  8. +27 −0 dump-trie.js
  9. +6 −0 test-build-dummy.js
  10. +16 −0 test-build-hash.js
  11. +16 −0 test-build-string.js
  12. +16 −0 test-build-trie.js
  13. +26 −0 test-find-hash.js
  14. +26 −0 test-find-string.js
  15. +26 −0 test-find-trie.js
  16. +49 −0 util.js
@@ -0,0 +1,18 @@
+A Simple JavaScript Trie Generator
+ By John Resig
+
+Copyright 2011 John Resig
+MIT Licensed
+
+All code is designed to work in Node.js.
+
+To build an optimized Trie run:
+
+ node build-trie.js > dict/suffix.js
+
+To dump a full dictionary of words from the Trie do:
+
+ node dump-trie.js
+
+A sample function for finding a word in the Trie
+can be see in util.js, named findTrieWord.
@@ -0,0 +1,110 @@
+var txt = require("fs").readFileSync("dict/ospd4.txt", "utf8"),
+ words = txt.split("\n"),
+ trie = {},
+ end = {},
+ keepEnd = {},
+ endings = [ 0 ];
+
+// Build a simple Trie structure
+for ( var i = 0, l = words.length; i < l; i++ ) {
+ var word = words[i], letters = word.split(""), cur = trie;
+
+ for ( var j = 0; j < letters.length; j++ ) {
+ var letter = letters[j], pos = cur[ letter ];
+
+ if ( pos == null ) {
+ cur = cur[ letter ] = j === letters.length - 1 ? 0 : {};
+
+ } else if ( pos === 0 ) {
+ cur = cur[ letter ] = { $: 0 };
+
+ } else {
+ cur = cur[ letter ];
+ }
+ }
+}
+
+// Optimize the structure
+optimize( trie );
+
+// Figure out common suffixes
+suffixes( trie, end );
+
+for ( var key in end ) {
+ if ( end[ key ].count > 10 ) {
+ keepEnd[ key ] = endings.length;
+ endings.push( end[ key ].obj );
+ }
+}
+
+// And extract the suffixes
+finishSuffixes( trie, keepEnd, end );
+
+trie.$ = endings;
+
+console.log( JSON.stringify( trie ).replace(/"/g, "") );
+
+function optimize( cur ) {
+ var num = 0, last;
+
+ for ( var node in cur ) {
+ if ( typeof cur[ node ] === "object" ) {
+ var ret = optimize( cur[ node ] );
+
+ if ( ret ) {
+ delete cur[ node ];
+ cur[ node + ret.name ] = ret.value;
+ node = node + ret.name;
+ }
+ }
+
+ last = node;
+ num++;
+ }
+
+ if ( num === 1 ) {
+ return { name: last, value: cur[ last ] };
+ }
+}
+
+function suffixes( cur, end ) {
+ var hasObject = false, key = "";
+
+ for ( var node in cur ) {
+ if ( typeof cur[ node ] === "object" ) {
+ hasObject = true;
+
+ var ret = suffixes( cur[ node ], end );
+
+ if ( ret ) {
+ cur[ node ] = ret;
+ }
+ }
+
+ key += "," + node;
+ }
+
+ if ( !hasObject ) {
+ if ( end[ key ] ) {
+ end[ key ].count++;
+
+ } else {
+ end[ key ] = { obj: cur, count: 1 };
+ }
+
+ return key;
+ }
+}
+
+function finishSuffixes( cur, keepEnd, end ) {
+ for ( var node in cur ) {
+ var val = cur[ node ];
+
+ if ( typeof val === "object" ) {
+ finishSuffixes( val, keepEnd, end );
+
+ } else if ( typeof val === "string" ) {
+ cur[ node ] = keepEnd[ val ] || end[ val ].obj;
+ }
+ }
+}
Oops, something went wrong.
Oops, something went wrong.

0 comments on commit e2a9c45

Please sign in to comment.