In [15]:
import random

class Sentiment:
    NEGATIVE = "NEGATIVE"
    NEUTRAL = "NEUTRAL"
    POSITIVE = "POSITIVE"

class Review:
    def __init__(self, text, score):
        self.text = text
        self.score = score
        self.sentiment = self.get_sentiment()

    def get_sentiment(self):
        if self.score <= 2:
            return Sentiment.NEGATIVE
        elif self.score == 3:
            return Sentiment.NEUTRAL
        else:
            return Sentiment.POSITIVE

class ReviewContainer:
    def __init__(self, reviews):
        self.reviews = reviews

    def get_text(self):
        return[x.text for x in self.reviews]

    def get_sentiment(self):
        return [x.sentiment for x in self.reviews]

    #Evenly distribute and randomize our data to minimize biasness of our model
    def evenly_distribute(self):
        negative = list(filter(lambda x: x.sentiment == Sentiment.NEGATIVE, self.reviews))
        positive = list(filter(lambda x: x.sentiment == Sentiment.POSITIVE, self.reviews))
        positive_shrunk = positive[:len(negative)]
        
        self.reviews = negative + positive_shrunk

        random.shuffle(self.reviews)


In [16]:
import json

file_name = "books_dataset.json"

reviews = []

# Reading our dataset
with open(file_name) as f:
    for line in f:
        review = json.loads(line)
        reviews.append(Review(review["reviewText"],review["overall"]))

In [17]:
from sklearn.model_selection import train_test_split

#Spliting our dataset into train and test
train_data, test_data = train_test_split(reviews, test_size = 0.33, random_state = 42)

train_container = ReviewContainer(train_data)

test_container = ReviewContainer(test_data)

In [18]:
train_container.evenly_distribute()
train_x = train_container.get_text()
train_y = train_container.get_sentiment()

test_container.evenly_distribute()
test_x = test_container.get_text()
test_y = test_container.get_sentiment()

# Bag of Words Vectorization

In [19]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

vectorizer = TfidfVectorizer()

train_x_vectors = vectorizer.fit_transform(train_x)

test_x_vectors = vectorizer.transform(test_x)

# LINEAR SVM

In [20]:
from sklearn import svm

clf_svm = svm.SVC(kernel="linear", probability=True)

clf_svm.fit(train_x_vectors,train_y)

clf_svm.predict(test_x_vectors[0])

prob = clf_svm.predict_proba(test_x_vectors[0])

# Decision Trees

In [21]:
from sklearn.tree import DecisionTreeClassifier

clf_dec = DecisionTreeClassifier()

clf_dec.fit(train_x_vectors, train_y)

clf_dec.predict(test_x_vectors[0])

array(['NEGATIVE'], dtype='<U8')

# NAIVE BAYES

In [22]:
from sklearn.naive_bayes import GaussianNB

clf_gnb = GaussianNB()

clf_gnb.fit(train_x_vectors.toarray(),train_y)

clf_gnb.predict(test_x_vectors[0].toarray())

array(['POSITIVE'], dtype='<U8')

# LOGISTIC REGRESSION

In [23]:
from sklearn.linear_model import LogisticRegression

clf_log = LogisticRegression()

clf_log.fit(train_x_vectors, train_y)

clf_log.predict(test_x_vectors[0])

array(['NEGATIVE'], dtype='<U8')

# Evaluation

In [24]:
svm_score = clf_svm.score(test_x_vectors, test_y)

dec_score = clf_dec.score(test_x_vectors, test_y)

gnb_score = clf_gnb.score(test_x_vectors.toarray(), test_y)

log_score = clf_log.score(test_x_vectors, test_y)

print("SVM SCORE: ", svm_score)
print("DECISION TREE SCORE: ", dec_score)
print("NAIVE BAYES SCORE: ", gnb_score)
print("LOGISTIC REGRESSION SCORE: ", log_score)

SVM SCORE:  0.8076923076923077
DECISION TREE SCORE:  0.6514423076923077
NAIVE BAYES SCORE:  0.6610576923076923
LOGISTIC REGRESSION SCORE:  0.8052884615384616


# F1 Score

In [25]:
from sklearn.metrics import f1_score

#Check F1 score to ensure that our model isn't bias
f1_score(test_y, clf_svm.predict(test_x_vectors), average=None, labels=[Sentiment.POSITIVE,Sentiment.NEGATIVE])


array([0.80582524, 0.80952381])

In [26]:
# Defined this as a funtion so that our GUI can call it every time a input is submitted
def model_run(feedback):

    test_set = [feedback]

    new_test = vectorizer.transform(test_set)

    return [clf_svm.predict(new_test)[0], clf_svm.predict_proba(new_test)]


# GUI

In [27]:
import tkinter as tk
from tkinter import font
from PIL import Image, ImageTk

In [1]:
import tkinter as tk
from tkinter import font
from PIL import Image, ImageTkscreen = tk.Tk()

screen.title("Feedback Classifier")

# Height and Width is used to define the size of our window
HEIGHT = 600
WIDTH = 700


def get_review(feedback):
    label["text"] = format_response(feedback)


def format_response(feedback):
    prediction, prob = model_run(feedback)[0], model_run(feedback)[1]
    if prediction=="POSITIVE":
        probablity = prob[0][1]
    else:
        probablity = prob[0][0]
    output = "Feedback is {0}.\n\n\n\n\nThe probablity of the feedback being {0}:\n {1}%\n\n\n\n\n\n\n\nOur algorithm has an accuracy of 80.77%\nSome predictions might be inaccurate.".format(prediction, round((probablity*100), 2))
    return output


#To remove the "Enter a user review..." text when the input area is clicked
def on_entry_click(event):
    if entry.get() == "Enter a user review...":
       entry.delete(0, "end")
       entry.insert(0, "")


# To bring back the "Enter a user review..." text when any other area is clicked
def on_focusout(event):
    if entry.get() == "":
        entry.insert(0, "Enter a user review...")


canvas = tk.Canvas(screen, height=HEIGHT, width=WIDTH)
canvas.pack()


# To set the background of the Tkinter window
bg_img = tk.PhotoImage(file="bg_img.png")
bg_label = tk.Label(screen, image=bg_img)
bg_label.place(relwidth=1, relheight=1)


# To set the app icon
icon_img = tk.PhotoImage(file="icon_img.png")
screen.iconphoto(False, icon_img)



frame = tk.Frame(screen, bg="#fca503", bd=5)
frame.place(relx= 0.5, rely=0.1, relwidth=0.75, relheight=0.1, anchor="n")


entry = tk.Entry(frame, font=("Courier", 12))
entry.insert(0, 'Enter a user review...')
entry.bind('<FocusIn>', on_entry_click)
entry.bind('<FocusOut>', on_focusout)
entry.place(relwidth=0.65, relheight=1)


button = tk.Button(frame, text="Classify", font=("Courier", 12), command=lambda: get_review(entry.get()))
button.place(relx=0.7, relheight=1, relwidth=0.3)


frame2 = tk.Frame(screen, bg="#fca503", bd=10)
frame2.place(relx=0.5, rely=0.25, relwidth=0.75, relheight=0.6, anchor="n")


label = tk.Label(frame2, font=("Courier", 14), anchor="nw", justify="left", bd=4)
label.place(relwidth=1, relheight=1)


screen.mainloop()

SyntaxError: invalid syntax (Temp/ipykernel_3900/3214719999.py, line 3)