In [190]:
# Import necessary libraries
import re
import nltk
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [None]:
# Load the data from the CSV file
data = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t')

In [191]:
# Import NLTK libraries for text preprocessing
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [192]:
# Initialize an empty list to store preprocessed reviews
corpus = []

# Loop through each review in the dataset for preprocessing
for i in range(0, 1000):
     # Remove non-alphabetical characters and replace with a space
     review = re.sub(pattern='[^a-zA-Z]', repl=' ', string=data['Review'][i])
     # Convert text to lowercase
     review = review.lower()
     # Tokenize the review into words
     review_words = review.split()
     # Remove stopwords
     review_words = [word for word in review_words if not word in set(stopwords.words('english'))]
     # Perform stemming using Porter Stemmer
     ps = PorterStemmer()
     review = [ps.stem(word) for word in review_words]
     # Join the stemmed words back into a review
     review = ' '.join(review)
     # Append the preprocessed review to the corpus
     corpus.append(review)

In [None]:
# Creating a Bag of Words model using CountVectorizer
cv = CountVectorizer(max_features=1500)
x = cv.fit_transform(corpus)

In [235]:
# Split the data into training and test sets
y = data["Liked"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=2)

In [241]:
# Train a Multinomial Naive Bayes model
model = MultinomialNB(alpha=0.9)
model.fit(x_train, y_train)

In [242]:
# Make predictions on the test set
y_pred = model.predict(x_test)

In [243]:
# Calculate accuracy, precision, and recall
score1 = accuracy_score(y_test, y_pred)
score2 = precision_score(y_test, y_pred)
score3 = recall_score(y_test, y_pred)

In [244]:
# Print the evaluation metrics
print("Accuracy score is: {} %".format(round(score1 * 100, 2)))
print("Precision score is: {} %".format(round(score2 * 100, 2)))
print("Recall score is: {} %".format(round(score3 * 100, 2)))

Accuracy score is: 82.5 %
Precision score is: 77.14 %
Recall score is: 88.04 %


In [240]:
# Hyperparameter tuning for Multinomial Naive Bayes
from sklearn.metrics import accuracy_score

best_accuracy = 0.0
alpha_val = 0.0

# Iterate through different values of alpha for the Naive Bayes classifier
for i in np.arange(0.1, 1.1, 0.1):
    temp_classifier = MultinomialNB(alpha=i)
    temp_classifier.fit(x_train, y_train)
    temp_y_pred = temp_classifier.predict(x_test)
    score = accuracy_score(y_test, temp_y_pred)
    print("Accuracy score for alpha {} is: {:.2f}%".format(round(i, 1), round(score * 100, 2)))

    if score > best_accuracy:
        best_accuracy = score
        alpha_val = i

# Print the best accuracy and corresponding alpha value
print('------------------------')
print('The best accuracy is {:.2f}% with alpha value as {}'.format(round(best_accuracy * 100, 2), round(alpha_val, 1)))

Accuracy score for alpha 0.1 is: 80.00%
Accuracy score for alpha 0.2 is: 81.00%
Accuracy score for alpha 0.3 is: 81.00%
Accuracy score for alpha 0.4 is: 81.50%
Accuracy score for alpha 0.5 is: 80.50%
Accuracy score for alpha 0.6 is: 80.50%
Accuracy score for alpha 0.7 is: 80.50%
Accuracy score for alpha 0.8 is: 81.00%
Accuracy score for alpha 0.9 is: 82.50%
Accuracy score for alpha 1.0 is: 82.00%
------------------------
The best accuracy is 82.50% with alpha value as 0.9


In [245]:
# Function to predict sentiment of a sample review
def predict_sentiment(sample_review):
    # Remove special characters and tokenize the sample review
    sample_review = re.sub(pattern='[^a-zA-Z]', repl=' ', string=sample_review)
    sample_review = sample_review.lower()
    sample_review_words = sample_review.split()

    # Remove stopwords and perform stemming
    ps = PorterStemmer()
    sample_review_words = [ps.stem(word) for word in sample_review_words if word not in set(stopwords.words('english'))]

    # Join the stemmed words
    final_review = ' '.join(sample_review_words)

    # Transform the sample review into a TF-IDF vector using the pre-fitted 'cv' object
    sample_review_vector = cv.transform([final_review]).toarray()

    # Make a prediction
    prediction = model.predict(sample_review_vector)

    # Return the predicted sentiment
    if prediction == 1:
        return "POSITIVE"
    else:
        return "NEGATIVE"

In [252]:
# Sample review
sample_review = 'The food was really bad'

# Predict the sentiment of the sample review
predicted_sentiment = predict_sentiment(sample_review)

# Print the predicted sentiment
print("This is a {} review.".format(predicted_sentiment))

This is a NEGATIVE review.
