# 1. Read data


In [None]:
import nltk
import pandas as pd

df = pd.read_csv('movie_reviews.csv')

# Display the total number of rows and columns

# Display the first 5 records


![](https://i.imgur.com/pt5p6Hb.png)

# 2. Pre-process the Data

In [None]:
# Text preprocessing steps - remove numbers, captial letters and punctuation
import re
import string

alphanumeric = lambda x: re.sub('[^\w\s]', ' ', x)
punc_lower = lambda x: re.sub('\d', ' ', x.lower())

rv = df['review']
df['review'] = rv.map(alphanumeric).map(punc_lower)

# Display the pre-processed "review"

In [None]:
from nltk.corpus import stopwords
stop=stopwords.words('english') 

# Try with other language - eg: Indonesian
# Display all the stopwords in NLTK

In [None]:
custom_stop_words = ['br']
stop.extend(custom_stop_words)

In [None]:
df['review'] = df['review'].apply(lambda x:' '.join([word for word in x.split() \
                                       if word not in (stop)]))
df['review']

In [None]:
# Identify the feature and label (X,y)


# 3. Data Splitting 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

# 4. Feature Extraction - Text Vectorization

In [None]:
from sklearn.feature_extraction.text import CountVectorizer#TfidfVectorizer
import joblib #for saving and loading Python objects

cv = CountVectorizer(stop_words="english")
X_train_CV = cv.fit_transform(X_train)
X_test_CV = cv.transform(X_test)

# Save the CountVectorizer as a pickle file
joblib.dump(cv, 'vectorizer.pkl')

![](https://i.ibb.co/HN2M07C/pickle.png)
<!-- <a href="https://imgbb.com/"><img src="https://i.ibb.co/HN2M07C/pickle.png" alt="pickle" border="0" /></a> -->
<!-- ![](https://i.imgur.com/pt5p6Hb.png) -->

# 5. Model Training - Prediction

In [None]:
from sklearn.svm import SVC
import joblib

svm = SVC()

#Start modal training using SVM


In [None]:
# Save the trained model as a pickle file
joblib.dump(svm, 'svm_model.pkl')

In [None]:
#Start modal prediction using SVM


In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test,svm_re)

In [None]:
testcase = ["I dislike this movie."]
tc_cv = cv.transform(testcase)
output = svm.predict(tc_cv)
output

In [None]:
# Load the SVM model and vectorizer
svm_model = joblib.load('svm_model.pkl')
vectorizer = joblib.load('vectorizer.pkl')



# 6. GUI

from tkinter import Tk, Label, Text, Button, Entry, W, messagebox

gui = Tk()
gui.config(background="light green")
gui.title("Sentiment Detector")
gui.geometry("250x400")

def analyze_sentiment():
    text = textArea.get("1.0", "end").strip()
    if text:
        # Vectorize the input text
        text_vectorized = vectorizer.transform([text])
        # Predict sentiment using the SVM model
        prediction = svm_model.predict(text_vectorized)
        sentiment = "Positive" if prediction[0] == 'positive' else "Negative"
        overallField.delete(0, "end")  # Clear previous content
        overallField.insert(0, sentiment)  # Set the sentiment in the Entry widget
    else:
        messagebox.showwarning("Input Error", "Please enter text for analysis.")

enterText = Label(gui, text="Enter Your Sentence", bg="light green")
textArea = Text(gui, height=5, width=25, font="lucida 13")
check = Button(gui, text="Check Sentiment", fg="Black", bg="Red", command=analyze_sentiment)
overall = Label(gui, text="Sentence Overall Rated As: ", bg="light green")
overallField = Entry(gui)
clear = Button(gui, text="Clear", fg="Black", bg="Red", command=lambda: textArea.delete("1.0", "end"))



enterText.grid(row=0, column=2)
textArea.grid(row=1, column=2, padx=10, sticky=W)
check.grid(row=2, column=2, pady=10)
overall.grid(row=9, column=2 , pady=(30,0))
overallField.grid(row=10, column=2)
clear.grid(row=11, column=2, pady=10)

gui.mainloop()


# Exerice

1. Try to use different model to perform the sentiment analysis (Eg: Logistic Regression (LR) instead of SVM). Compare the performance in terms of its accuracy.

2. Modify the GUI to LR model.

![](https://i.ibb.co/P1zWX1c/LR.pn)

# <a href="https://imgbb.com/"><img src="https://i.ibb.co/P1zWX1c/LR.png" alt="LR" border="0" /></a>