Skip to content

Commit

Permalink
new tfidf script
Browse files Browse the repository at this point in the history
  • Loading branch information
elishowk committed Feb 3, 2011
1 parent 2252c07 commit d3725f9
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions tfidf.py
@@ -0,0 +1,13 @@
import pymongo
from pymongo import Connection
MONGODB_PORT = 27017
import nltk
from nltk.corpus import brown
from nltk.text import TextCollection
mongodb=Connection("localhost", MONGODB_PORT)['cablegate']
browntext = TextCollection(brown.words(categories=['news','government']))
count=0
for ng in mongodb.ngrams.find(timeout=False):
mongodb.ngrams.update({"_id":ng["_id"]},{"$set":{"tfidf": browntext.tf_idf(ng['label'],brown.words(categories=['news','government'])) }})
count+=1
print "updated tfidf for %d topics"%count

0 comments on commit d3725f9

Please sign in to comment.