# Sentiment Analysis of Restaurant Reviews

### Author :- Rajeev Laxman Rao Mahendrakar

## Import Dependentias

In [None]:
import numpy as np
import pandas as pd

import nltk
import re
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

## Get Sms Dataset

In [None]:
df = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t', quoting=3)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
df.shape

In [None]:
df['Liked'].value_counts()

## Cleaning the messages

In [None]:
corpus = []
ps = PorterStemmer()

for i in range(0,df.shape[0]):
    message = re.sub(pattern='[^a-zA-Z]', repl=' ', string=df.Review[i]) #Cleaning special character from the message
    message = message.lower() #Converting the entire message into lower case
    words = message.split() # Tokenizing the review by words
    words = [word for word in words if word not in set(stopwords.words('english'))] #Removing the stop words
    words = [ps.stem(word) for word in words] #Stemming the words
    message = ' '.join(words) #Joining the stemmed words
    corpus.append(message) #Building a corpus of messages

In [None]:
corpus[0:10]

## Creating the Bag of Words model

In [None]:
cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()
y = df.iloc[:, 1].values

## train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

## Cheking alpha Naive Bayes Classifier

In [None]:
best_accuracy = 0.0
alpha_val = 0.0
for i in np.arange(0.0,1.1,0.1):
    temp_classifier = MultinomialNB(alpha=i)
    temp_classifier.fit(X_train, y_train)
    temp_y_pred = temp_classifier.predict(X_test)
    score = accuracy_score(y_test, temp_y_pred)
    print("Accuracy score for alpha={} is: {}%".format(round(i,1), round(score*100,2)))
    if score>best_accuracy:
        best_accuracy = score
        alpha_val = i
print('--------------------------------------------')
print('The best accuracy is {}% with alpha value as {}'.format(round(best_accuracy*100, 2), round(alpha_val,1)))

## Fitting Naive Bayes to the Training set

In [None]:
classifier = MultinomialNB(alpha=0.1)
classifier.fit(X_train, y_train)

## Predicting the Test set results

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
y_pred

## Accuracy Score

In [None]:
acc_s = accuracy_score(y_test, y_pred)*100

In [None]:
print("Accuracy Score {} %".format(round(acc_s,2)))

# Prediction

In [None]:
def predict_review(sample_message):
    sample_message = re.sub(pattern='[^a-zA-Z]',repl=' ', string = sample_message)
    sample_message = sample_message.lower()
    sample_message_words = sample_message.split()
    sample_message_words = [word for word in sample_message_words if not word in set(stopwords.words('english'))]
    ps = PorterStemmer()
    final_message = [ps.stem(word) for word in sample_message_words]
    final_message = ' '.join(final_message)
    temp = cv.transform([final_message]).toarray()
    return classifier.predict(temp)

In [None]:
result = ['Ohhh Shit! Negetive Review','Good! Positive Review']

In [None]:
msg = 'The food is really good here.'

if predict_review(msg):
    print(result[1])
else:
    print(result[0])

In [None]:
msg = 'Food was pretty bad and the service was very slow.'

if predict_review(msg):
    print(result[1])
else:
    print(result[0])

In [None]:
msg = 'The food was absolutely wonderful, from preparation to presentation, very pleasing.'

if predict_review(msg):
    print(result[1])
else:
    print(result[0])