Permalink
Browse files

some housecleaning

  • Loading branch information...
1 parent 6af24ff commit e909565c145287c1286030202afe71cf1d801ea0 @jedp committed Oct 12, 2011
Showing with 247 additions and 4 deletions.
  1. +18 −0 .gitignore
  2. +22 −0 LICENSE
  3. +4 −3 collector.js
  4. +0 −1 lib/porter-stemmer
  5. +25 −0 package.json
  6. +175 −0 stop_words.txt
  7. +3 −0 test/test-collector.js
View
@@ -0,0 +1,18 @@
+# vim ancillary files
+*.un~
+*.swp
+*.swo
+
+# compiled python
+*.pyc
+
+# other stuff
+*.zip
+*.gz
+*.tar
+*.log
+.DS_Store?
+
+# locally installed node modules
+node_modules
+
View
22 LICENSE
@@ -0,0 +1,22 @@
+Copyright (c) 2011 Jed Parsons <jedp@jedparsons.com>
+
+(MIT License)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
View
@@ -24,16 +24,17 @@ var IDS = 'ids' // set -> { document Ids }
var _ = require('underscore');
var r = require('redis').createClient();
-var stemmer = require('./lib/porter-stemmer/porter').memoizingStemmer;
+var stemmer = require('porter-stemmer').memoizingStemmer;
+//var stopWords = require('fs').readFileSync('./stop_words.txt').split('\n');
+
+// @@@ to do - filter out stop words
function stemText(text) {
return _.map(
text.trim().split(/\s+/),
function(t) { return stemmer(t.replace(/\W+/g, '').toLowerCase()) });
}
-
-
calculateWeight = function (id, term, callback) {
// For a given term and document id, calculate the
// importance, or weight, of that term according to
Submodule porter-stemmer deleted from 07951d
View
@@ -0,0 +1,25 @@
+{"name": "redis-tfidf",
+ "description": "Basic search engine with redis",
+ "keywords": ["redis", "search", "tfidf", "IR"],
+
+ "version": "0.1.0",
+ "authors": ["Jed Parsons <jed@jedparsons.com> (http://jedparsons.com)"],
+
+ "repository":
+ {"type": "git",
+ "url": "https://github.com/jedp/redis-tfidf"
+ },
+
+ "licenses":
+ [ {"type": "MIT",
+ "url": "https://github.com/jedp/redis-tfidf/blob/master/LICENSE" }],
+
+ "dependencies": {
+ "porter-stemmer": "",
+ "hiredis": "",
+ "nodeunit": "",
+ "redis": "",
+ "underscore": ""
+ }
+}
+
View
@@ -0,0 +1,175 @@
+a
+about
+above
+after
+again
+against
+all
+am
+an
+and
+any
+are
+aren't
+as
+at
+be
+because
+been
+before
+being
+below
+between
+both
+but
+by
+can't
+cannot
+could
+couldn't
+did
+didn't
+do
+does
+doesn't
+doing
+don't
+down
+during
+each
+few
+for
+from
+further
+had
+hadn't
+has
+hasn't
+have
+haven't
+having
+he
+he'd
+he'll
+he's
+her
+here
+here's
+hers
+herself
+him
+himself
+his
+how
+how's
+i
+i'd
+i'll
+i'm
+i've
+if
+in
+into
+is
+isn't
+it
+it's
+its
+itself
+let's
+me
+more
+most
+mustn't
+my
+myself
+no
+nor
+not
+of
+off
+on
+once
+only
+or
+other
+ought
+our
+ours
+ ourselves
+out
+over
+own
+same
+shan't
+she
+she'd
+she'll
+she's
+should
+shouldn't
+so
+some
+such
+than
+that
+that's
+the
+their
+theirs
+them
+themselves
+then
+there
+there's
+these
+they
+they'd
+they'll
+they're
+they've
+this
+those
+through
+to
+too
+under
+until
+up
+very
+was
+wasn't
+we
+we'd
+we'll
+we're
+we've
+were
+weren't
+what
+what's
+when
+when's
+where
+where's
+which
+while
+who
+who's
+whom
+why
+why's
+with
+won't
+would
+wouldn't
+you
+you'd
+you'll
+you're
+you've
+your
+yours
+yourself
+yourselves
+
View
@@ -1,10 +1,13 @@
+// Run me with nodeunit
+
var _ = require('underscore');
var r = require('redis').createClient();
var collector = require('../collector');
var doc1 = {id:1, text:"I like pie."};
var doc2 = {id:2, text:"I like potatoes."};
var doc3 = {id:3, text:"I have an irrational love, yes I do, of flan."};
+var doc4 = {id:4, text:"I like pie and potatoes."};
var testCase = require('nodeunit').testCase;
var test_db = '_test_tfidf';

0 comments on commit e909565

Please sign in to comment.