In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Download necessary NLTK resources
nltk.download("stopwords")

# Load the dataset
dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t', quoting=3)

# Text preprocessing
corpus = []
ps = PorterStemmer()
stop_words = set(stopwords.words('english'))
for review in dataset['Review']:
    review = re.sub('[^a-zA-Z]', ' ', review)
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if word not in stop_words]
    review = ' '.join(review)
    corpus.append(review)

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer(max_features=1500)
X = vectorizer.fit_transform(corpus).toarray()

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, dataset['Liked'], test_size=0.2, random_state=0)





[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dell\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
"""Decision Tree Classifier"""
from sklearn.tree import DecisionTreeClassifier 
DT_Classifier = DecisionTreeClassifier(criterion = 'entropy' , random_state= 0)
DT_Classifier.fit(X_train , y_train)
DT_Y_Predection = DT_Classifier.predict(X_test)

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, DT_Y_Predection))
print("\nClassification Report:")
print(classification_report(y_test, DT_Y_Predection))
print("\nAccuracy:", accuracy_score(y_test, DT_Y_Predection))

Confusion Matrix:
[[81 16]
 [40 63]]

Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.84      0.74        97
           1       0.80      0.61      0.69       103

    accuracy                           0.72       200
   macro avg       0.73      0.72      0.72       200
weighted avg       0.74      0.72      0.72       200


Accuracy: 0.72


In [3]:
"""Naive Bayes Classifier"""
from sklearn.naive_bayes import GaussianNB
NB_Classifier = GaussianNB()
NB_Classifier.fit(X_train , y_train)
NB_Y_Predection = NB_Classifier.predict(X_test)

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, NB_Y_Predection))
print("\nClassification Report:")
print(classification_report(y_test, NB_Y_Predection))
print("\nAccuracy:", accuracy_score(y_test, NB_Y_Predection))

Confusion Matrix:
[[57 40]
 [16 87]]

Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.59      0.67        97
           1       0.69      0.84      0.76       103

    accuracy                           0.72       200
   macro avg       0.73      0.72      0.71       200
weighted avg       0.73      0.72      0.71       200


Accuracy: 0.72


In [4]:
"""SVM Classifier"""
from sklearn.svm import SVC
SVM_Classifier = SVC(kernel="rbf" , random_state=0)
SVM_Classifier.fit(X_train , y_train)
SVM_Y_Predection = SVM_Classifier.predict(X_test)



# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, SVM_Y_Predection))
print("\nClassification Report:")
print(classification_report(y_test, SVM_Y_Predection))
print("\nAccuracy:", accuracy_score(y_test , SVM_Y_Predection))

Confusion Matrix:
[[83 14]
 [35 68]]

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.86      0.77        97
           1       0.83      0.66      0.74       103

    accuracy                           0.76       200
   macro avg       0.77      0.76      0.75       200
weighted avg       0.77      0.76      0.75       200


Accuracy: 0.755


In [5]:
"""KNN Classifier"""
from sklearn.neighbors import KNeighborsClassifier
KNN_Classifier = KNeighborsClassifier(n_neighbors=5 , metric='minkowski' , p=2)
KNN_Classifier.fit(X_train,y_train)
KNN_Y_Predection = KNN_Classifier.predict(X_test)


# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, KNN_Y_Predection))
print("\nClassification Report:")
print(classification_report(y_test, KNN_Y_Predection))
print("\nAccuracy:", accuracy_score(y_test, KNN_Y_Predection))

Confusion Matrix:
[[79 18]
 [42 61]]

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.81      0.72        97
           1       0.77      0.59      0.67       103

    accuracy                           0.70       200
   macro avg       0.71      0.70      0.70       200
weighted avg       0.71      0.70      0.70       200


Accuracy: 0.7


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [6]:
"""Logistic Regression"""
from sklearn.linear_model import LogisticRegression
LR_Classifier = LogisticRegression(random_state=0)
LR_Classifier.fit(X_train , y_train)
LR_Y_Predection = LR_Classifier.predict(X_test)


# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, LR_Y_Predection))
print("\nClassification Report:")
print(classification_report(y_test, LR_Y_Predection))
print("\nAccuracy:", accuracy_score(y_test, LR_Y_Predection))

Confusion Matrix:
[[85 12]
 [37 66]]

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.88      0.78        97
           1       0.85      0.64      0.73       103

    accuracy                           0.76       200
   macro avg       0.77      0.76      0.75       200
weighted avg       0.77      0.76      0.75       200


Accuracy: 0.755
