diff --git a/classifier.py b/classifier.py old mode 100644 new mode 100755 index 5b3184220..3c4fefc16 --- a/classifier.py +++ b/classifier.py @@ -1,3 +1,5 @@ +#!/usr/bin/python + import random import data from numpy import * diff --git a/data.py b/data.py old mode 100644 new mode 100755 diff --git a/movie.py b/movie.py old mode 100644 new mode 100755 index 067ac4eed..a375a0d54 --- a/movie.py +++ b/movie.py @@ -1,3 +1,5 @@ +#!/usr/bin/python + import data import ngrams import validate diff --git a/ngrams.py b/ngrams.py old mode 100644 new mode 100755 index 26e433e12..34c99b190 --- a/ngrams.py +++ b/ngrams.py @@ -1,6 +1,9 @@ +#!/usr/bin/python + import collections import data from numpy import * + def words(s): words = [] current = "" @@ -50,17 +53,17 @@ def ngrams_range(b, e, s): return g def ngrams_to_matrix(grams, classes): - print "a" + print "Entering ngrams_to_matrix" keysets = [set(k) for k in grams] allgramset = set() - print "b" + print "> Sets created" for k in keysets: allgramset = allgramset.union(k) - print "c" + print "> Sets unioned" allgrams = list(allgramset) - print "d" + print "> Listed" vecs = [] - print "e" + print "> []" allgramsdict = {} for i in range(len(allgrams)): allgramsdict[allgrams[i]] = i @@ -80,9 +83,9 @@ def ngram_vector(n, s, dictionary): return array(vec) if __name__ == "__main__": - print ngrams(3, "Now is the time for all good men to not come to the aid of their party! Now is the time for all bad women to leave the aid of their country? This, being war, is bad") + print "Trigram example: %s" % ngrams(3, "Now is the time for all good men to not come to the aid of their party! Now is the time for all bad women to leave the aid of their country? This, being war, is bad") g1 = ngrams(1, "Hello how are you") g2 = ngrams(1, "Are you feeling well") g3 = ngrams(1, "Well hello there") - print g3 - print ngrams_to_matrix([g1, g2, g3], [1, 2, 1]).asMatrix() + print "Unigram example: %s" % g3 + print "Matrix example: %s" % ngrams_to_matrix([g1, g2, g3], [1, 2, 1]).asMatrix() diff --git a/validate.py b/validate.py old mode 100644 new mode 100755 index 75f677da5..9dd918925 --- a/validate.py +++ b/validate.py @@ -1,3 +1,5 @@ +#!/usr/bin/python + import classifier import data from numpy import * @@ -31,20 +33,19 @@ def errorrate(classif, testdata): error += 1 return (error, count) - if __name__ == "__main__": d = data.Data(data.DefDict((), { - (1,2,3) : (1,), - (3,3,1) : (0,), - (1,2,3) : (1,), - (1,4,3) : (1,), - (1,2,4) : (1,), - (1,2,1) : (1,), - (1,2,6) : (1,), - (1,4,5) : (0,), - (1,5,3) : (1,), - (1,6,3) : (0,) - })) + (1,2,3) : (1,), + (3,3,1) : (0,), + (1,2,3) : (1,), + (1,4,3) : (1,), + (1,2,4) : (1,), + (1,2,1) : (1,), + (1,2,6) : (1,), + (1,4,5) : (0,), + (1,5,3) : (1,), + (1,6,3) : (0,) + })) classif = classifier.OneClassifier print d.asMatrix()