## Sentiment Analysis

In [3]:
import pandas as pd
import pyodbc
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [5]:
# Download the VADER lexicon for sentiment analysis if not already present.

nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Jaja\AppData\Roaming\nltk_data...


True

In [57]:
customer_reviews_df = pd.read_csv(r'C:\Users\Jaja\Desktop\DA Portfolio\Ali Ahmad\Exported Fact_Customer Reviews.csv')

customer_reviews_df.head()

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText
0,1,77,18,12/23/2023,3,"Average experience, nothing special."
1,2,80,19,12/25/2024,5,The quality is top-notch.
2,3,50,13,1/26/2025,4,Five stars for the quick delivery.
3,4,78,15,4/21/2025,3,"Good quality, but could be cheaper."
4,5,64,2,7/16/2023,3,"Average experience, nothing special."


In [59]:
# Initialize the VADER sentiment intensity analyzer for analyzing the sentiment of text data

sia = SentimentIntensityAnalyzer()

In [61]:
# Define a function to calculate sentiment scores using VADER

def calculate_sentiment(review):
    # Get the sentiment scores for the review text
    sentiment = sia.polarity_scores(review)
    # Return the compound score, which is a normalized score between -1 (most negative) and 1 (most positive)
    return sentiment['compound']

In [63]:
# Define a function to categorize sentiment using both the sentiment score and the review rating

def categorize_sentiment(score, rating):
    # Use both the text sentiment score and the numerical rating to determine sentiment category
    if score > 0.05:  # Positive sentiment score
        if rating >= 4:
            return 'Positive'  # High rating and positive sentiment
        elif rating == 3:
            return 'Mixed Positive'  # Neutral rating but positive sentiment
        else:
            return 'Mixed Negative'  # Low rating but positive sentiment
    elif score < -0.05:  # Negative sentiment score
        if rating <= 2:
            return 'Negative'  # Low rating and negative sentiment
        elif rating == 3:
            return 'Mixed Negative'  # Neutral rating but negative sentiment
        else:
            return 'Mixed Positive'  # High rating but negative sentiment
    else:  # Neutral sentiment score
        if rating >= 4:
            return 'Positive'  # High rating with neutral sentiment
        elif rating <= 2:
            return 'Negative'  # Low rating with neutral sentiment
        else:
            return 'Neutral'  # Neutral rating and neutral sentiment

In [65]:
# Define a function to bucket sentiment scores into text ranges

def sentiment_bucket(score):
    if score >= 0.5:
        return '0.5 to 1.0'  # Strongly positive sentiment
    elif 0.0 <= score < 0.5:
        return '0.0 to 0.49'  # Mildly positive sentiment
    elif -0.5 <= score < 0.0:
        return '-0.49 to 0.0'  # Mildly negative sentiment
    else:
        return '-1.0 to -0.5'  # Strongly negative sentiment

In [67]:
# Apply sentiment analysis to calculate sentiment scores for each review

customer_reviews_df['SentimentScore'] = customer_reviews_df['ReviewText'].apply(calculate_sentiment)

In [69]:
# Apply sentiment categorization using both text and rating

customer_reviews_df['SentimentCategory'] = customer_reviews_df.apply(
    lambda row: categorize_sentiment(row['SentimentScore'], row['Rating']), axis=1)

In [71]:
# Apply sentiment bucketing to categorize scores into defined ranges

customer_reviews_df['SentimentBucket'] = customer_reviews_df['SentimentScore'].apply(sentiment_bucket)

In [73]:
# Display the first few rows of the DataFrame with sentiment scores, categories, and buckets

customer_reviews_df.head()

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText,SentimentScore,SentimentCategory,SentimentBucket
0,1,77,18,12/23/2023,3,"Average experience, nothing special.",-0.3089,Mixed Negative,-0.49 to 0.0
1,2,80,19,12/25/2024,5,The quality is top-notch.,0.0,Positive,0.0 to 0.49
2,3,50,13,1/26/2025,4,Five stars for the quick delivery.,0.0,Positive,0.0 to 0.49
3,4,78,15,4/21/2025,3,"Good quality, but could be cheaper.",0.2382,Mixed Positive,0.0 to 0.49
4,5,64,2,7/16/2023,3,"Average experience, nothing special.",-0.3089,Mixed Negative,-0.49 to 0.0


In [75]:
# Save the DataFrame with sentiment scores, categories, and buckets to a new CSV file

customer_reviews_df.to_excel('fact_customer_reviews_with_sentiment.xlsx', index=False)