In [1]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
# Download VADER lexicon (only required once)
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to C:\Users\Fabio_UofT
[nltk_data]     SCS\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
# Step 1: Prepare the labeled dataset
restaurants_df = pd.read_csv('restaurants_reviews.csv')

In [4]:
restaurants_df.head()

Unnamed: 0,Restaurant_Name,Category,Address,Province,Latitude,Longitude,Rating,Review
0,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,5,The food met my expectation. The seating area ...
1,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,2,"It's a Wendy's, their food is good. Better tha..."
2,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,3,I love the taste of this food as it tastes lik...
3,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,5,This is the best Wendy's I've ever been to. In...
4,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,5,I been here so many times. I never had a probl...


In [5]:
# Step 2: Text Preprocessing (if needed)
restaurants_df['Review'] = restaurants_df['Review'].astype(str)
restaurants_df['Review'] = restaurants_df['Review'].apply(lambda review: review.lower())
restaurants_df['Review'] = restaurants_df['Review'].str.replace('[^\w\s]', '')


  restaurants_df['Review'] = restaurants_df['Review'].str.replace('[^\w\s]', '')


In [6]:
# Step 3: Perform Sentiment Analysis using VADER
analyzer = SentimentIntensityAnalyzer()

def get_sentiment_polarity(review):
    return analyzer.polarity_scores(review)['compound']

restaurants_df['Sentiment_score'] = restaurants_df['Review'].apply(get_sentiment_polarity)


In [7]:
# Step 4: Categorize reviews as positive or negative based on sentiment score
threshold = 0.5  # You can adjust this threshold based on your preference
restaurants_df['Sentiment'] = restaurants_df['Sentiment_score'].apply(lambda score: 'Good' if score >= threshold else 'Bad')


In [8]:
restaurants_df.head()

Unnamed: 0,Restaurant_Name,Category,Address,Province,Latitude,Longitude,Rating,Review,Sentiment_score,Sentiment
0,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,5,the food met my expectation the seating area w...,0.9607,Good
1,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,2,its a wendys their food is good better than mc...,0.1966,Bad
2,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,3,i love the taste of this food as it tastes lik...,0.8198,Good
3,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,5,this is the best wendys ive ever been to incre...,0.9287,Good
4,Wendy's (10365 111th Street),Fast food,"10365 111th Street, Edmonton, AB T5K 2V3",AB,53.545878,-113.510914,5,i been here so many times i never had a proble...,0.5613,Good


In [9]:
# Step 5: Save the updated CSV
restaurants_df.to_csv('restaurants_with_sentiment.csv', index=False)