In [1]:
import nltk
import random
from nltk.corpus import movie_reviews

# Importing the nltk wrapper for sklearn classifiers
from nltk.classify.scikitlearn import SklearnClassifier

# Importing sklearn classifiers
from sklearn.naive_bayes import BernoulliNB, MultinomialNB, GaussianNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC

In [16]:
random.seed(42)

In [27]:
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

In [28]:
random.shuffle(documents)

In [29]:
all_words = []
for w in movie_reviews.words():
    all_words.append(w.lower())
    
all_words = nltk.FreqDist(all_words)

word_features = list(all_words.keys())[:3000]

In [30]:
def featurize(document):
    words = set(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)
    return features

featureset = [(featurize(doc), category) for (doc, category) in documents]

In [31]:
training_set = featureset[:1900]
testing_set = featureset[1900:]

In [33]:
classifier = SklearnClassifier(BernoulliNB())
classifier.train(training_set)
print('accuracy:',nltk.classify.accuracy(classifier,testing_set)*100)

accuracy: 84.0


In [41]:
algo =[BernoulliNB, MultinomialNB, LogisticRegression, SGDClassifier, SVC, LinearSVC, NuSVC]

In [42]:
for al in algo:
    classifier = SklearnClassifier(al())
    classifier.train(training_set)
    print('Accuracy for {} : {}'.format(str(al), nltk.classify.accuracy(classifier, testing_set)*100))

Accuracy for <class 'sklearn.naive_bayes.BernoulliNB'> : 84.0
Accuracy for <class 'sklearn.naive_bayes.MultinomialNB'> : 85.0
Accuracy for <class 'sklearn.linear_model.logistic.LogisticRegression'> : 90.0




Accuracy for <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> : 88.0
Accuracy for <class 'sklearn.svm.classes.SVC'> : 82.0
Accuracy for <class 'sklearn.svm.classes.LinearSVC'> : 91.0
Accuracy for <class 'sklearn.svm.classes.NuSVC'> : 92.0


**Note:** GaussianNB doesn't work here for some reason!