In [1]:
# Install vaderSentiment library
!pip install pandas vadersentiment

In [2]:
# Import packages
import pandas as pd
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [3]:
# Load dataset
df = pd.read_csv('cleaned_gp_comment.csv')

In [4]:
# Specify comment column 
text_col = 'Comments'
print(f"Using text column: {text_col}")

Using text column: Comments


In [7]:
# Perform VADER sentiment analysis
analyzer = SentimentIntensityAnalyzer()

def vader_scores(text):
    return analyzer.polarity_scores(str(text))

df['vader_neg'] = df['Comments'].apply(lambda t: vader_scores(t)['neg'])
df['vader_neu'] = df['Comments'].apply(lambda t: vader_scores(t)['neu'])
df['vader_pos'] = df['Comments'].apply(lambda t: vader_scores(t)['pos'])
df['vader_compound'] = df['Comments'].apply(lambda t: vader_scores(t)['compound'])

In [9]:
# View first few comments with VADER scores
df[['Comments', 'vader_neg', 'vader_neu', 'vader_pos', 'vader_compound']].head(10)

Unnamed: 0,Comments,vader_neg,vader_neu,vader_pos,vader_compound
0,Taste super delicious too,0.0,0.208,0.792,0.8225
1,Wow,0.0,0.0,1.0,0.5859
2,The non-sticky texture of Golden Penny Macaron...,0.0,0.691,0.309,0.6166
3,Very delicious,0.0,0.2,0.8,0.6115
4,After eating Macaroni I sleep very well.,0.0,0.715,0.285,0.3384
5,Non sticky texture,0.0,1.0,0.0,0.0
6,This delicious,0.0,0.213,0.787,0.5719
7,Delicious!,0.0,0.0,1.0,0.6114
8,That's my favorites than spaghetti,0.0,0.588,0.412,0.4215
9,My favorite,0.0,0.25,0.75,0.4588


In [10]:
# Statictics of the analyzed comments
df[[ 'vader_compound']].describe()

Unnamed: 0,vader_compound
count,1294.0
mean,0.326174
std,0.36731
min,-0.8481
25%,0.0
50%,0.4151
75%,0.6369
max,0.9716


In [11]:
# Label comments as positive, negative, or neutral
threshold = 0.5

def label_from_compound(c):
    if c >= threshold:
        return 'positive'
    if c <= -threshold:
        return 'negative'
    return 'neutral'

df['vader_label'] = df['vader_compound'].apply(label_from_compound)

In [13]:
# Save sentiment labels
gp_comments_sl = 'cleaned_gp_comment.csv'
df.to_csv(gp_comments_sl, index=False)

In [15]:
# View sentiment distribution
print("\nSentiment distribution with threshold 0.5:")
print(df['vader_label'].value_counts())


Sentiment distribution with threshold 0.5:
vader_label
neutral     721
positive    550
negative     23
Name: count, dtype: int64


In [17]:
 # Preview results
df[['Comments', 'vader_neg', 'vader_neu', 'vader_pos', 'vader_compound', 'vader_label']].head(10)

Unnamed: 0,Comments,vader_neg,vader_neu,vader_pos,vader_compound,vader_label
0,Taste super delicious too,0.0,0.208,0.792,0.8225,positive
1,Wow,0.0,0.0,1.0,0.5859,positive
2,The non-sticky texture of Golden Penny Macaron...,0.0,0.691,0.309,0.6166,positive
3,Very delicious,0.0,0.2,0.8,0.6115,positive
4,After eating Macaroni I sleep very well.,0.0,0.715,0.285,0.3384,neutral
5,Non sticky texture,0.0,1.0,0.0,0.0,neutral
6,This delicious,0.0,0.213,0.787,0.5719,positive
7,Delicious!,0.0,0.0,1.0,0.6114,positive
8,That's my favorites than spaghetti,0.0,0.588,0.412,0.4215,neutral
9,My favorite,0.0,0.25,0.75,0.4588,neutral
