In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from pathlib import Path
import os

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [2]:
cwd = Path().cwd() / "datasets"

# Importing the dataset
dataset = pd.read_csv(os.path.join(cwd, 'Restaurant_Reviews.tsv'), delimiter = '\t', quoting = 3)

In [3]:
# Cleaning the texts
corpus = []
for i in range(0, 1000):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

In [4]:
# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values

In [6]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

In [7]:
# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [8]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [9]:
# New review
new_review = "I enjoyed the food and the service was excellent!"

# Cleaning the new review
new_review = re.sub('[^a-zA-Z]', ' ', new_review)
new_review = new_review.lower()
new_review = new_review.split()
ps = PorterStemmer()
new_review = [ps.stem(word) for word in new_review if not word in set(stopwords.words('english'))]
new_review = ' '.join(new_review)

# Transforming the new review using the same CountVectorizer
new_review_transformed = cv.transform([new_review]).toarray()

# Predicting the sentiment of the new review
new_review_pred = classifier.predict(new_review_transformed)

# Displaying the result
if new_review_pred[0] == 1:
    print("Positive review")
else:
    print("Negative review")

Positive review
