# Restaurant Review Sentiment Analysis


In [2]:
#Loading the important libraries
import numpy as np
import pandas as pd

In [3]:
#Loading the dataset of Restaurant Reviews
df = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t', quoting=3)

FileNotFoundError: [Errno 2] No such file or directory: 'Restaurant_Reviews.tsv'

In [None]:
#Shape of the dataset
df.shape


: 

In [None]:
#Columns of the dataset
df.columns


: 

In [None]:
df.head()

: 

In [None]:
#Showcasing the first 15 rows of the dataset
df.head(15)

: 

In [None]:
df.tail()

: 

In [None]:
#Showcasing the last 15 rows of the dataset
df.tail(15)

: 

In [None]:
#Importing the natural language toolkit 
import nltk 


: 

In [None]:
#Regular expression specifies a set of strings that matches it, and the functions in this module let you check if a particular string matches or not.
import re

: 

In [None]:
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer


: 

In [None]:
# Now we need to clean the reviews for that we need to lowercase the words, eliminate stop words, tokenize and do stemming and


: 

In [None]:
corpus = []
for i in range(0,1000):
    review = re.sub(pattern='[^a-zA-Z]',repl=' ', string=df['Review'][i])
    
    #Lowercasing the letters 
    review = review.lower()
    
    #Tokenizing review by words and words
    review_words=review.split()
    
    #Eliminating the stopwords
    review_words = [word for word in review_words if not word in set(stopwords.words('english'))]
    
    #Stemming the words in the review
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review_words]
    
    #Joining the stemmed words
    review = ' '.join(review)

   #Creating a corpus
    corpus.append(review)
    
    
    
    

: 

In [None]:
#Showcasing the reviews in the corpus 
corpus [0:20]

: 

In [None]:
import pickle

: 

In [None]:
#Creating the BAG OF WORDS MODEL i.e. a method to extract features from the text documents 
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()
y = df.iloc[:, 1].values

: 

In [None]:
# Creating a pickle file for the CountVectorizer
pickle.dump(cv, open('cv-transform.pkl', 'wb'))

: 

In [None]:
#Doing the train,test, split where test_datasize percentage is 20
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

: 

In [None]:
# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB(alpha=0.2)
classifier.fit(X_train, y_train)


: 

In [None]:
# Creating a pickle file for the Multinomial Naive Bayes model
filename = 'restaurant-sentiment-mnb-model.pkl'
pickle.dump(classifier, open(filename, 'wb'))

: 

In [None]:
#Predicting the test results 
y_pred = classifier.predict(X_test)



: 

In [None]:
# Accuracy
from sklearn.metrics import accuracy_score
score1 = accuracy_score(y_test,y_pred)
print("The Accuracy score is: {}".format(round(score1,2)))



: 

In [None]:
#Precision
from sklearn.metrics import precision_score
score2 = precision_score(y_test,y_pred)
print("The Precision score is: {}".format(round(score2,2)))



: 

In [None]:
#Recall
from sklearn.metrics import recall_score
score3= recall_score(y_test,y_pred)
print("The Recall score is: {}".format(round(score3,2)))



: 

In [None]:
print("The Accuracy, Precision, and Recall Scores:")
print("Accuracy score = {}".format(round(score1,2)))
print("Precision score = {}".format(round(score2,2)))
print("Recall score = {}".format(round(score3,2)))

: 

In [None]:
#Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

: 

In [None]:
cm

: 

In [None]:
#Plotting the confusion matrix
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

: 

In [None]:
plt.figure(figsize = (9,6))
sns.heatmap(cm, annot=True, cmap="magma", xticklabels=['Positive', 'Negative'], yticklabels=['Positive', 'Negative'])
plt.xlabel('Predicted Label')
plt.ylabel('Actual Label')

: 

In [None]:
# Hyperparameter tuning the Naive Bayes Classifier
best_accuracy = 0.0
alpha_val = 0.0
for i in np.arange(0.1,1.1,0.1):
  temp_classifier = MultinomialNB(alpha=i)
  temp_classifier.fit(X_train, y_train)
  temp_y_pred = temp_classifier.predict(X_test)
  score = accuracy_score(y_test, temp_y_pred)
  print("The accuracy score for alpha {} is {}%".format(round(i,1), round(score*100,2)))
  if score>best_accuracy:
    best_accuracy = score
    alpha_val = i
print('--------------------------------------------')
print('--------------------------------------------')
print('--------------------------------------------')
print('The best accuracy is {}% with alpha value {}'.format(round(best_accuracy*100, 2), round(alpha_val,1)))

: 

In [None]:
#Since the best accuracy score is 78.5 % for alpha 0.2
classifier = MultinomialNB(alpha=0.2)
classifier.fit(X_train, y_train)

: 

In [None]:
def predict_sentiment(sample_review):
  sample_review = re.sub(pattern='[^a-zA-Z]',repl=' ', string = sample_review)
  sample_review = sample_review.lower()
  sample_review_words = sample_review.split()
  sample_review_words = [word for word in sample_review_words if not word in set(stopwords.words('english'))]
  ps = PorterStemmer()
  final_review = [ps.stem(word) for word in sample_review_words]
  final_review = ' '.join(final_review)

  temp = cv.transform([final_review]).toarray()
  return classifier.predict(temp)

: 

In [None]:
# Predicting values
sample_review = 'The food is really good here.'

if predict_sentiment(sample_review):
  print('This is a POSITIVE review.')
else:
  print('This is a NEGATIVE review!')

: 

In [None]:
# Predicting values
sample_review = 'Food was pretty bad and the service was very slow.'

if predict_sentiment(sample_review):
  print('This is a POSITIVE review.')
else:
  print('This is a NEGATIVE review!')

: 

In [None]:
# Predicting values
sample_review = 'Delicious'

if predict_sentiment(sample_review):
  print('This is a POSITIVE review.')
else:
  print('This is a NEGATIVE review!')

: 

: 