Permalink
Browse files

Started working on majority voting

  • Loading branch information...
1 parent a43792d commit eed36e40d89f9ae72ea1ec6ce6541d603fa7b7c6 @pranjalv123 pranjalv123 committed Jan 18, 2012
Showing with 67 additions and 16 deletions.
  1. +2 −2 PyML-0.7.9/PyML/containers/parsers.py
  2. +31 −2 classifier.py
  3. +34 −2 movie.py
  4. +0 −10 subjectivity/toFiles.sh
View
4 PyML-0.7.9/PyML/containers/parsers.py
@@ -304,7 +304,7 @@ def __init__(self, file, **args) :
self._first = 0
else :
self._first = max(self.idColumn, self.labelsColumn) + 1
- print 'label at ', self.labelsColumn
+# print 'label at ', self.labelsColumn
def check(self) :
"""very loose checking of the format of the file:
@@ -327,7 +327,7 @@ def check(self) :
else :
self.delim = None
#line,pos = self.skipHeader(line,pos)
- print 'delimiter', self.delim
+# print 'delimiter', self.delim
# a file that does not contain a ":" is assumed to be in
# CSV format
View
33 classifier.py
@@ -179,7 +179,7 @@ def classify(self, pt):
f.close()
data = SparseDataSet(fname)
os.remove(fname)
- return self.svm.test(data, verbose=0).getPredictedLabels()[0]
+ return int(self.svm.test(data, verbose=0).getPredictedLabels()[0])
class MaximumEntropyClassifier(Classifier):
def __init__(self, restrictFeatures=False):
@@ -213,7 +213,36 @@ def classify(self, point, label='1'):
if result >= 0.5:
return 1
return -1
-
+
+class MajorityVotingClassifier(Classifier):
+ def __init__(self):
+ self.classifiers = []
+ self.reliabilities = []
+
+ def addClassifier(self, classifier, train_files, test_files = [], reliability=1):
+ self.classifiers.append(classifier)
+ self.reliability.append(reliability)
+
+ def addFeatureVector(self, vec):
+ for cls in self.classifiers:
+ cls.addFeatureVector(vec)
+
+ def classify(self, vec):
+ results = {}
+ for cls in self.classifiers:
+ r = cls.classify(vec)
+ if r not in results:
+ results[r] = 1
+ else:
+ results[r] += 1
+ mx = 0
+ mxarg = 0
+
+ for r in results:
+ if results[r] > mx:
+ mxarg = r
+ mx = results[r]
+ return mxarg
def test_bayes():
trainingset = array([[2, 2, 2, 1],
View
36 movie.py
@@ -106,6 +106,11 @@ def test(self):
print "Negative: %s of %s, %s accuracy" % (neg_correct,len(neg_tests),
(float(neg_correct)/len(neg_tests)))
return (float(pos_correct)/len(pos_tests), float(neg_correct)/len(neg_tests))
+
+class MajorityVotingTester():
+ def __init__(self):
+ self.classifiers = []
+ def addClassifer(self):
def select_dataset(dataset):
return {'default':(POS_DIR, NEG_DIR), #untagged
'partofspeech':(POS_PARTOFSPEECH_DIR, NEG_PARTOFSPEECH_DIR), #part of speech tagged
@@ -130,13 +135,40 @@ def test(classif, n=1, train_size=500, mode='k', iterations=1, dataset='', limit
print "Total:", round((neg_correct + pos_correct)/(2*iterations)*100), "%"
if __name__ == "__main__":
- test(classifier.BayesClassifier,n=[1],train_size=800,mode='k',
- iterations=3,dataset='position',limit=[16165],binary=False, idf=True)
+ #test(classifier.BayesClassifier,n=[1],train_size=800,mode='k',
+ # iterations=3,dataset='position',limit=[16165],binary=False, idf=True)
#test(classifier.LinearSVMClassifier,n=[2],train_size=800,mode='k',
# iterations=3,dataset='default',limit=[16165],binary=False, idf=True)
#test(classifier.MaximumEntropyClassifier,n=[1],train_size=800,mode='k',
# iterations=3,dataset='default',limit=[16165],binary=True)
+ mvc = classifier.MajorityVotingClassifier()
+ ind = Indexes(mode='k',iterations=3,train_size=800)
+ ind.next()
+ print ind
+ (pos_dir, neg_dir) = select_dataset('default')
+ m = TestConfiguration(classifier.BayesClassifier, [1], ind, pos_dir, neg_dir, binary=False, limit=[16165], idf=False)
+ m.train()
+ mvc.addClassifier(m.classifier)
+
+ (pos_dir, neg_dir) = select_dataset('default')
+ m = TestConfiguration(classifier.LinearSVMClassifier, [1], ind, pos_dir, neg_dir, binary=False, limit=[16165], idf=False)
+ m.train()
+ mvc.addClassifier(m.classifier)
+
+
+ (pos_dir, neg_dir) = select_dataset('default')
+ m = TestConfiguration(classifier.LinearSVMClassifier, [2], ind, pos_dir, neg_dir, binary=False, limit=[16165], idf=False)
+ m.train()
+ mvc.addClassifier(m.classifier)
+
+
+ m.classifier = mvc
+ m.test()
+ exit()
+
+
+
# with train_size = 800, no shuffling, bayes classifier
# [ns] dataset [limits] binary --> +results -results
# [2] position [114370] 0 --> 0.96 0.56
View
10 subjectivity/toFiles.sh
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-i="0"
-
-while [ $i -lt 5000 ]
-do
-head -n$i plot.tok.gt9.5000 | tail -n1 > objective/default/file$i
-head -n$i quote.tok.gt9.5000 | tail -n1 > subjective/default/file$i
-i=$[$i+1]
-done

0 comments on commit eed36e4

Please sign in to comment.