# Import libraries

In [1]:
import re,nltk
import pandas as pd
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Import dataset

In [2]:
df=pd.read_csv("review.csv")

# Download stopwords and assign it to "sw"

In [3]:
nltk.download("stopwords")
sw=stopwords.words("english")

[nltk_data] Downloading package stopwords to C:\Users\Tuhin
[nltk_data]     Roy\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Data preprocessing and cleaning

In [4]:
def preprocess_character(word):
    word=word.lower()
    word=word.replace("<br /><br />","")
    word=re.sub("[^a-z]+"," ",word)
    word=word.split(" ")
    word=[sample for sample in word if sample not in sw]
    word=" ".join(word)
    return word

# Apply data preprocessing into "review" column

In [5]:
df["review"]=df["review"].apply(preprocess_character)

# Assign the values of "review" in X and "label" in y

In [6]:
X=df["review"].values
y=df["label"].values

# Encode y

In [7]:
le=LabelEncoder()
y=le.fit_transform(y)

# Apply count vectorization in review 

In [8]:
cv=CountVectorizer(max_features=10000)
X=cv.fit_transform(X)

# Split data into train and test

In [9]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=20)

# Train our data

In [10]:
model=MultinomialNB()
model.fit(X_train,y_train)

MultinomialNB()

# Determine test accuracy

In [11]:
y_pred=model.predict(X_test)

print("Test accuracy : {:.1f} %".format(accuracy_score(y_test,y_pred)*100))

Test accuracy : 85.6 %


# Final prediction

In [12]:
review=str(input("Enter your review : "))
review=preprocess_character(review)
review=cv.transform([review])
result=model.predict(review)
if result==1:
    print("\nResult : Positive Sentiment","\U0001F603")
else:
    print("\nResult : Negative Sentiment","\U0001F613")

Enter your review : If I would I've seen this movie in 2016, it certainly would have been in my list of best movies. If you're going to this movie thinking Aamir will be flexing and showcasing his acting prowess then well and good but the girls are the highlight of the movie, hands down. SCRIPT - Major props to bringing out a story which is inspiring and one which India needs right now. The story is kept grounded in reality. No over-the-top gimmicks. The songs blend in very well with the premise.

Result : Positive Sentiment 😃
