Permalink
Browse files

- bug fixes for NB implementation. Might have a loop issue.

  • Loading branch information...
1 parent f49b89d commit 4f47b21fdb3c1dcce2acdb74b5897fb549f2232a @jfinken committed Apr 27, 2012
Showing with 13 additions and 7 deletions.
  1. +13 −7 pa3-sentiment/python/NaiveBayes.py
View
20 pa3-sentiment/python/NaiveBayes.py 100644 → 100755
@@ -1,3 +1,5 @@
+#!/usr/bin/python
+
# NLP Programming Assignment #3
# NaiveBayes
# 2012
@@ -48,7 +50,7 @@ def __init__(self):
self.Nc = dict() # number of documents of klass
self.textC = dict() # concat of tokens (of all docs) of klass
self.prior = dict()
- self.condprob = [][]
+ self.condprob = dict() # list of dict, thus making [][]
#############################################################################
# TODO TODO TODO TODO TODO
@@ -64,7 +66,7 @@ def classify(self, words):
# extract tokens of doc from V:
W = []
for t in words:
- if t in self.unigrams
+ if t in self.unigrams:
W += t
# apply
@@ -91,10 +93,10 @@ def addExample(self, klass, words):
"""
# update V
for token in words:
- if word in self.unigrams:
- self.unigrams[word] += 1.0
+ if token in self.unigrams:
+ self.unigrams[token] += 1.0
else:
- self.unigrams[word] = 1.0
+ self.unigrams[token] = 1.0
self.V = len(self.unigrams)
# update textC
@@ -110,15 +112,19 @@ def addExample(self, klass, words):
else:
self.Nc[klass] = 1.0
- self.prior[klass] = self.Nc / self.totalsDocs
+ self.prior[klass] = (self.Nc[klass] / self.totalDocs)
# now update the condition probabilities with add-one smoothing
Tc = 0.0
words = self.textC[klass]
for t in self.unigrams:
# num occurrences of t in all text of klass
Tc = words.count(t)
- self.condprob[t][klass] = (Tc + 1.0) / (len(words) + self.V)
+ if t in self.condprob:
+ self.condprob[t][klass] = (Tc + 1.0) / (len(words) + self.V)
+ else:
+ self.condprob[t] = dict()
+ self.condprob[t][klass] = (Tc + 1.0) / (len(words) + self.V)
pass

0 comments on commit 4f47b21

Please sign in to comment.