In [1]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to C:\Users\win
[nltk_data]     11\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
df = pd.read_csv('Restaurant Reviews/Restaurant_Reviews.tsv', sep='\t')

In [4]:
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [5]:
df.dropna(inplace=True)

In [6]:
blanks = []

for i, lb, rv in df.itertuples():
    if type(rv) == str:
        if rv.isspace():
            blanks.append(i)

In [7]:
blanks

[]

In [8]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    filtered_tokens = [token for token in tokens if token not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    processed_text = ' '.join(lemmatized_tokens)

    return processed_text

df['Review'] = df['Review'].apply(preprocess_text)

In [9]:
display(df)

Unnamed: 0,Review,Liked
0,wow ... loved place .,1
1,crust good .,0
2,tasty texture nasty .,0
3,stopped late may bank holiday rick steve recom...,1
4,selection menu great price .,1
...,...,...
995,think food flavor texture lacking .,0
996,appetite instantly gone .,0
997,overall impressed would go back .,0
998,"whole experience underwhelming , think 'll go ...",0


In [10]:
analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    scores = analyzer.polarity_scores(text)
    sentiment = 1 if scores['pos'] > 0 else 0

    return sentiment

df['Prediction_Liked'] = df['Review'].apply(get_sentiment)

In [11]:
display(df)

Unnamed: 0,Review,Liked,Prediction_Liked
0,wow ... loved place .,1,1
1,crust good .,0,1
2,tasty texture nasty .,0,0
3,stopped late may bank holiday rick steve recom...,1,1
4,selection menu great price .,1,1
...,...,...,...
995,think food flavor texture lacking .,0,0
996,appetite instantly gone .,0,0
997,overall impressed would go back .,0,1
998,"whole experience underwhelming , think 'll go ...",0,0


In [12]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

accuracy_score(df['Liked'], df['Prediction_Liked'])

0.74

In [13]:
print(confusion_matrix(df['Liked'], df['Prediction_Liked']))

[[326 174]
 [ 86 414]]


In [14]:
print(classification_report(df['Liked'], df['Prediction_Liked']))

              precision    recall  f1-score   support

           0       0.79      0.65      0.71       500
           1       0.70      0.83      0.76       500

    accuracy                           0.74      1000
   macro avg       0.75      0.74      0.74      1000
weighted avg       0.75      0.74      0.74      1000

