STEP.1

Import necessary packages:

In [256]:
# Importing Pandas Package/Library.
import pandas as pd
# Importing Numpy Package/Library.
import numpy as np
# Importing String.
import string
# Importing Regular Expression.
import re
# Importing Counter Vectorizer.
from sklearn.feature_extraction.text import CountVectorizer
# Importing Matplot Library.
import matplotlib.pyplot as plt
# Importing Multinomial NB package (Algorithm Technique)
from sklearn.naive_bayes import MultinomialNB 
# Importing Logistic Regression package (Algorithm Technique)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score,accuracy_score,precision_score

STEP.2

Import the data set:

In [265]:
DataFrame=pd.read_csv('Restaurant_Reviews.txt',delimiter='\t')

In [266]:
DataFrame.shape

(1000, 2)

In [267]:
DataFrame.head(10)

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1
5,Now I am getting angry and I want my damn pho.,0
6,Honeslty it didn't taste THAT fresh.),0
7,The potatoes were like rubber and you could te...,0
8,The fries were great too.,1
9,A great touch.,1


STEP.3

Show like & dislike reviews count and graph:

In [268]:
DataFrame.Liked.value_counts()
# By using value count:
# we get positive review count and negative review count

1    500
0    500
Name: Liked, dtype: int64

STEP.4

Cleaning the data: Remove characters "0 9,!,@,#,$,%,^,&,*,(,),_,+,?,>,<,:,",},{,|," and remove punctuation:

In [269]:
# Defining a function tor clean complete dataset.
def cleaning(text):
    
    text = re.sub(r'<.*?>', ' ', text)
    text = re.sub(r"can't", 'can not', text)
    text = re.sub(r"don't", 'do not', text)
    text = re.sub(r"didn't", 'did not', text)
    text = re.sub(r"couldn't", 'could not', text)
    text = re.sub(r"shouldn't", 'should not', text)
    
    # Remove Mobile Number:
    text = re.sub(r'[\d]{10,12}', 'mobno', text)
    text = re.sub(r'[^A-Za-z]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    
    return text.lower()

In [270]:
# Test our function that it is working properly or not:
c1 = cleaning("Hello!!, My Name is Suryansh @ Gupta. ?, shouldn't")
print(c1)

hello my name is suryansh gupta should not


In [271]:
DataFrame.head(5)

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [272]:
DataFrame['Review'] = DataFrame.Review.apply(cleaning)

In [304]:
DataFrame.head(10)

Unnamed: 0,Review,Liked
0,wow loved this place,1
1,crust is not good,0
2,not tasty and the texture was just nasty,0
3,stopped by during the late may bank holiday of...,1
4,the selection on the menu was great and so wer...,1
5,now i am getting angry and i want my damn pho,0
6,honeslty it did not taste that fresh,0
7,the potatoes were like rubber and you could te...,0
8,the fries were great too,1
9,a great touch,1


In [274]:
Name = 'My @name i#s Suryansh,./<>?!@#$%^&*()_+{}[]"|:;" Gupta.'
New = re.sub(f'[{string.punctuation}]', '', Name)
print('Another type of cleaning the data is Fail..X')
print(New)

Another type of cleaning the data is Fail..X
My name is Suryansh Gupta


STEP.5:

Spliting DataFrame into { TEST,TRAIN } & Training the Model :

In [461]:
# testing data set is 20% of DataFrame.
# training data ste is 80% of DataFrame.

from sklearn.model_selection import train_test_split as TTS

x_train,x_test,y_train,y_test = TTS(DataFrame['Review'].values, DataFrame['Liked'].values, test_size = .20, random_state = 1)

In [462]:
len(x_train), len(x_test),len(y_train),len(y_test)

(800, 200, 800, 200)

In [463]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression

CV = CountVectorizer()
LR = LogisticRegression(solver = "lbfgs")

from sklearn.pipeline import Pipeline

Model = Pipeline([('vectorizer',CV),('classifier',LR)])

Model.fit(x_train,y_train)

from sklearn.metrics import confusion_matrix
predictions = Model.predict(x_test)
confusion_matrix(predictions,y_test)

array([[92, 12],
       [16, 80]], dtype=int64)

STEP.6

Model Prediction:

In [464]:
from sklearn.metrics import recall_score,accuracy_score,precision_score

print("Accuracy Prediction = ", accuracy_score(predictions,y_test))
print("Precision           = ", precision_score(predictions, y_test, average = 'weighted'))
print("Recall              = ", recall_score(predictions, y_test, average = 'weighted'))

Accuracy Prediction =  0.86
Precision           =  0.860354267310789
Recall              =  0.86


Step.7

Finally, Custom review analysis through Model & Creating GUI:

In [389]:
Example = ["@food is #$# good"]
Result = Model.predict(Example)
print(Result)

[1]


In [415]:
from tkinter import*
from tkinter import messagebox
def Predict():
    ex = lb.get("1.0",END)
    result = Model.predict([ex])
    label = {0:"Dislike, Customer did not Satisfied.", 1:"Liked, Customer is Happy."}
    ab = label[result[0]]
    lbl1.config(text = ab, fg = "green")
    messagebox.showinfo("Result",ab)

In [468]:
root=Tk()

root.title("Sentiment Analysis - Reviews Prediction")

lbl = Label(text = "Sentiment Analysis", fg = "red", bg = "white", font = ("georgia",20))
lbl.pack()

lbl1 = Label(bg = "white", font = ("georgia", 20))
lbl1.pack()

lb = Text(root,)
lb.pack(pady=3)

btn = Button(root, bg="green", fg="white", text="Prediction", height=5, command = Predict)
btn.pack(fill = "x", pady = 3)

root.mainloop()