# VADER Sentiment Analysis Evaluation
This notebook evaluates sentiment scores using VADER sentiment analysis, mapping VADER's compound score to a numerical scale from 1 to 5.

In [None]:
import os
import pandas as pd
import logging
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to convert VADER compound score (in [-1,1]) to a scale of 1 to 5
def convert_compound_to_score(compound):
    # Map -1 to 1 and 1 to 5; compound 0 maps to 3.
    return round(((compound + 1) / 2) * 4 + 1)

logging.info('VADER Sentiment Analyzer initialized.')

In [None]:
# Load the sentiment analysis dataset
project_root = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
dataset_path = os.path.join(project_root, 'data', 'Cleaned_data', 'sentiment_analysis.csv')
df = pd.read_csv(dataset_path)
logging.info(f'Loaded dataset with {len(df)} samples.')

# Evaluate sentiment for each text entry using VADER
predictions = []
for _, row in df.iterrows():
    text = row['Text']
    actual_score = row['Score']  # Expected to be between 1 and 5
    vs = analyzer.polarity_scores(text)
    compound = vs['compound']
    predicted_score = convert_compound_to_score(compound)
    predictions.append({
        'text': text,
        'actual_score': actual_score,
        'predicted_score': predicted_score
    })

predictions_df = pd.DataFrame(predictions)

# Save individual VADER predictions
results_dir = os.path.join(project_root, 'evaluation', 'results')
os.makedirs(results_dir, exist_ok=True)
predictions_output_path = os.path.join(results_dir, 'vader_sentiment_predictions.csv')
predictions_df.to_csv(predictions_output_path, index=False)
logging.info(f'VADER predictions saved to {predictions_output_path}')