In [20]:
import nltk.classify.util
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier

In [21]:
def extract_features(word_list):
    return dict([(word,True) for word in word_list])

In [22]:
#Load positive fields and negative reviews
positive_fileids = movie_reviews.fileids('pos')
negative_fileids = movie_reviews.fileids('neg')

In [23]:
features_positive = [(extract_features(movie_reviews.words(fileids=[f])),'Positive') for f in positive_fileids]
features_negative = [(extract_features(movie_reviews.words(fileids=[f])),'Negative') for f in negative_fileids]

In [24]:
# Split the data into train and test
threshold_factor = 0.8
threshold_positive = int(threshold_factor * len(features_positive))
threshold_negative = int(threshold_factor * len(features_negative))

In [25]:
features_train = features_positive[:threshold_positive] + features_negative[:threshold_negative]
features_test = features_positive[threshold_positive:] + features_negative[threshold_negative:]
print("Number of training datapoints :" ,len(features_train))
print("Number of test datapoints :" ,len(features_test))

Number of training datapoints : 1600
Number of test datapoints : 400


In [26]:
# Training a Naive Bayes Classifier
classifier = NaiveBayesClassifier.train(features_train)
print("Accuracy :",nltk.classify.accuracy(classifier,features_test))

Accuracy : 0.735


In [28]:
input_reviews=["The movie was worth the buck",
              "I really enjoyed watching it",
              "Successfully wasted 3 hours of my life",
              "The movie was very boring",
              "I slept in the movie hall while watching it"]

print("\nPredictions")
for review in input_reviews:
    print("\nReview :",review)
    
    problist = classifier.prob_classify(extract_features(review.split()))
    pred_sentiment = problist.max()
    
    print("Predicted Sentiment :",pred_sentiment)
    print("Probability :",round(problist.prob(pred_sentiment), 2))


Predictions

Review : The movie was worth the buck
Predicted Sentiment : Positive
Probability : 0.54

Review : I really enjoyed watching it
Predicted Sentiment : Positive
Probability : 0.63

Review : Successfully wasted 3 hours of my life
Predicted Sentiment : Negative
Probability : 0.85

Review : The movie was very boring
Predicted Sentiment : Negative
Probability : 0.76

Review : I slept in the movie hall while watching it
Predicted Sentiment : Positive
Probability : 0.68
