In [1]:
import nltk
from nltk.corpus import movie_reviews
 
pos_reviews = []
for fileid in movie_reviews.fileids('pos'):
    words = movie_reviews.words(fileid)
    pos_reviews.append(words)
 
neg_reviews = []
for fileid in movie_reviews.fileids('neg'):
    words = movie_reviews.words(fileid)
    neg_reviews.append(words)

In [2]:
from nltk.corpus import stopwords
import string

stopwords_english = stopwords.words('english')

# feature extractor function
def bag_of_words(words):
    words_clean = []
    
    for word in words:
        word = word.lower()
        if word not in stopwords_english and word not in string.punctuation:
            words_clean.append(word)
    
    words_dictionary = dict([word, True] for word in words_clean)
    
    return words_dictionary

In [3]:
# positive reviews feature set
pos_reviews_set = []
for words in pos_reviews:
    pos_reviews_set.append((bag_of_words(words), 'The review for this movie is POSITIVE'))
 
# negative reviews feature set
neg_reviews_set = []
for words in neg_reviews:
    neg_reviews_set.append((bag_of_words(words), 'The review for this movie is NEGATIVE'))
    

In [4]:
print ("Length of positive review set:",len(pos_reviews_set)) 
print ("Length of negitive review set:",len(neg_reviews_set)) 

# radomize pos_reviews_set and neg_reviews_set
# doing so will output different accuracy result everytime we run the program
from random import shuffle
shuffle(pos_reviews_set)
shuffle(neg_reviews_set)
 
test_set = pos_reviews_set[:125] + neg_reviews_set[:125]
train_set = pos_reviews_set[125:] + neg_reviews_set[125:]
 
print("Length of testing set:",len(test_set))
print("Length of training set:",len(train_set)) 

Length of positive review set: 1000
Length of negitive review set: 1000
Length of testing set: 250
Length of training set: 1750


In [5]:
from nltk import classify
from nltk import NaiveBayesClassifier
 
classifier = NaiveBayesClassifier.train(train_set)
 
accuracy = classify.accuracy(classifier, test_set)
print("Accuracy percent of NaiveBayesClassifier: ",accuracy) 


Accuracy percent of NaiveBayesClassifier:  0.728


In [6]:
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(train_set)
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, test_set))*100)

MNB_classifier accuracy percent: 84.0


In [7]:
from nltk.tokenize import word_tokenize


# Negative review (using naive bias)
 
custom_review = "I hated the film. It was a disaster. Poor direction, bad acting."
custom_review_tokens = word_tokenize(custom_review)
custom_review_set = bag_of_words(custom_review_tokens)
print (classifier.classify(custom_review_set)) 

# probability result
prob_result = classifier.prob_classify(custom_review_set)
print (prob_result.max()) 
print (prob_result.prob("The review for this movie is NEGATIVE")) #tells probability of each tag 
print (prob_result.prob("The review for this movie is POSITIVE")) 

# Positive review (using bias)

custom_review = "It was a wonderful and amazing movie. I loved it. it was quite thrilling and interesting."
custom_review_tokens = word_tokenize(custom_review)
custom_review_set = bag_of_words(custom_review_tokens)
print (classifier.classify(custom_review_set)) 
 
# probability result
prob_result = classifier.prob_classify(custom_review_set)
print (prob_result.max()) 
print (prob_result.prob("The review for this movie is NEGATIVE")) 
print (prob_result.prob("The review for this movie is POSITIVE")) 

The review for this movie is NEGATIVE
The review for this movie is NEGATIVE
0.8384733871353828
0.16152661286461645
The review for this movie is POSITIVE
The review for this movie is POSITIVE
0.02820263340033779
0.9717973665996618


In [8]:
from nltk.tokenize import word_tokenize


def clicked():
    custom_review = txt.get()
    custom_review_tokens = word_tokenize(custom_review)
    custom_review_set = bag_of_words(custom_review_tokens)
    output1=MNB_classifier.classify(custom_review_set)
    l2.configure(text=output1)
    output2=classifier.classify(custom_review_set)
    l4.configure(text=output2)

import tkinter
from tkinter import *
window=tkinter.Tk()
window.title("TEXT BASED SENTIMENT ANALYSIS")
label=tkinter.Label(window, text=" Enter your movie review-", font=(40))
label.pack()

l3=tkinter.Label(window, text=" According to MNB:")
l3.place(relx = 0.5, rely = 0.5, anchor = 's')
l2=tkinter.Label(window)
l2.place(relx = 0.5, rely = 0.6, anchor = 's') 
l5=tkinter.Label(window, text=" According to Normal NB:")
l5.place(relx = 0.5, rely = 0.7, anchor = 's')
l4=tkinter.Label(window)
l4.place(relx = 0.5, rely = 0.8, anchor = 's') 

window.geometry('1000x600')
    
txt=Entry(window,width=70)
txt.place(relx=0.5,rely=0.2,anchor='center')


bt= Button(window, text = 'Enter',bg="black",fg="white", command=clicked)
bt.place(relx=0.5,rely=0.4,anchor='center')



window.mainloop()

