Skip to content

Commit

Permalink
some housecleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
jedp committed Oct 12, 2011
1 parent 6af24ff commit e909565
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 4 deletions.
18 changes: 18 additions & 0 deletions .gitignore
@@ -0,0 +1,18 @@
# vim ancillary files
*.un~
*.swp
*.swo

# compiled python
*.pyc

# other stuff
*.zip
*.gz
*.tar
*.log
.DS_Store?

# locally installed node modules
node_modules

22 changes: 22 additions & 0 deletions LICENSE
@@ -0,0 +1,22 @@
Copyright (c) 2011 Jed Parsons <jedp@jedparsons.com>

(MIT License)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

7 changes: 4 additions & 3 deletions collector.js
Expand Up @@ -24,16 +24,17 @@ var IDS = 'ids' // set -> { document Ids }
var _ = require('underscore'); var _ = require('underscore');


var r = require('redis').createClient(); var r = require('redis').createClient();
var stemmer = require('./lib/porter-stemmer/porter').memoizingStemmer; var stemmer = require('porter-stemmer').memoizingStemmer;


//var stopWords = require('fs').readFileSync('./stop_words.txt').split('\n');

// @@@ to do - filter out stop words
function stemText(text) { function stemText(text) {
return _.map( return _.map(
text.trim().split(/\s+/), text.trim().split(/\s+/),
function(t) { return stemmer(t.replace(/\W+/g, '').toLowerCase()) }); function(t) { return stemmer(t.replace(/\W+/g, '').toLowerCase()) });
} }




calculateWeight = function (id, term, callback) { calculateWeight = function (id, term, callback) {
// For a given term and document id, calculate the // For a given term and document id, calculate the
// importance, or weight, of that term according to // importance, or weight, of that term according to
Expand Down
1 change: 0 additions & 1 deletion lib/porter-stemmer
Submodule porter-stemmer deleted from 07951d
25 changes: 25 additions & 0 deletions package.json
@@ -0,0 +1,25 @@
{"name": "redis-tfidf",
"description": "Basic search engine with redis",
"keywords": ["redis", "search", "tfidf", "IR"],

"version": "0.1.0",
"authors": ["Jed Parsons <jed@jedparsons.com> (http://jedparsons.com)"],

"repository":
{"type": "git",
"url": "https://github.com/jedp/redis-tfidf"
},

"licenses":
[ {"type": "MIT",
"url": "https://github.com/jedp/redis-tfidf/blob/master/LICENSE" }],

"dependencies": {
"porter-stemmer": "",
"hiredis": "",
"nodeunit": "",
"redis": "",
"underscore": ""
}
}

175 changes: 175 additions & 0 deletions stop_words.txt
@@ -0,0 +1,175 @@
a
about
above
after
again
against
all
am
an
and
any
are
aren't
as
at
be
because
been
before
being
below
between
both
but
by
can't
cannot
could
couldn't
did
didn't
do
does
doesn't
doing
don't
down
during
each
few
for
from
further
had
hadn't
has
hasn't
have
haven't
having
he
he'd
he'll
he's
her
here
here's
hers
herself
him
himself
his
how
how's
i
i'd
i'll
i'm
i've
if
in
into
is
isn't
it
it's
its
itself
let's
me
more
most
mustn't
my
myself
no
nor
not
of
off
on
once
only
or
other
ought
our
ours
ourselves
out
over
own
same
shan't
she
she'd
she'll
she's
should
shouldn't
so
some
such
than
that
that's
the
their
theirs
them
themselves
then
there
there's
these
they
they'd
they'll
they're
they've
this
those
through
to
too
under
until
up
very
was
wasn't
we
we'd
we'll
we're
we've
were
weren't
what
what's
when
when's
where
where's
which
while
who
who's
whom
why
why's
with
won't
would
wouldn't
you
you'd
you'll
you're
you've
your
yours
yourself
yourselves

3 changes: 3 additions & 0 deletions test/test-collector.js
@@ -1,10 +1,13 @@
// Run me with nodeunit

var _ = require('underscore'); var _ = require('underscore');
var r = require('redis').createClient(); var r = require('redis').createClient();
var collector = require('../collector'); var collector = require('../collector');


var doc1 = {id:1, text:"I like pie."}; var doc1 = {id:1, text:"I like pie."};
var doc2 = {id:2, text:"I like potatoes."}; var doc2 = {id:2, text:"I like potatoes."};
var doc3 = {id:3, text:"I have an irrational love, yes I do, of flan."}; var doc3 = {id:3, text:"I have an irrational love, yes I do, of flan."};
var doc4 = {id:4, text:"I like pie and potatoes."};


var testCase = require('nodeunit').testCase; var testCase = require('nodeunit').testCase;
var test_db = '_test_tfidf'; var test_db = '_test_tfidf';
Expand Down

0 comments on commit e909565

Please sign in to comment.