In [None]:
# Purpose: Perform sentiment analysis on Twitter/Reddit data related to stocks and visualize results.

# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Tuple
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Set plotting style
sns.set_style("darkgrid")


def load_sentiment_data(filepath: str) -> pd.DataFrame:
    """
    Load social media text data for sentiment analysis.

    Args:
        filepath (str): Path to the CSV file containing tweets or posts.

    Returns:
        pd.DataFrame: DataFrame with at least 'date' and 'text' columns.
    """
    df = pd.read_csv(filepath, parse_dates=["date"])
    df.dropna(subset=["text"], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df



def initialize_sentiment_analyzer() -> SentimentIntensityAnalyzer:
    """
    Initialize the VADER sentiment analyzer.

    Returns:
        SentimentIntensityAnalyzer: An instance of VADER analyzer.
    """
    return SentimentIntensityAnalyzer()



def analyze_sentiment(df: pd.DataFrame, analyzer: SentimentIntensityAnalyzer) -> pd.DataFrame:
    """
    Compute sentiment scores (positive, negative, neutral, compound) for each text entry.

    Args:
        df (pd.DataFrame): DataFrame with 'text' column.
        analyzer (SentimentIntensityAnalyzer): VADER sentiment analyzer.

    Returns:
        pd.DataFrame: Original DataFrame with new sentiment score columns added.
    """
    sentiment_scores = df["text"].apply(analyzer.polarity_scores)
    sentiment_df = pd.json_normalize(sentiment_scores)
    df = pd.concat([df, sentiment_df], axis=1)
    return df


def plot_sentiment_distribution(df: pd.DataFrame, column: str = "compound") -> None:
    """
    Plot the distribution of sentiment scores.

    Args:
        df (pd.DataFrame): DataFrame containing sentiment scores.
        column (str): Column name for the sentiment score to plot.
    """
    plt.figure(figsize=(12, 6))
    sns.histplot(df[column], bins=30, kde=True, color="purple")
    plt.title(f"Distribution of {column.capitalize()} Sentiment Scores")
    plt.xlabel(f"{column.capitalize()} Score")
    plt.ylabel("Frequency")
    plt.show()


def categorize_sentiment(compound_score: float) -> str:
    """
    Categorize sentiment based on compound score.

    Args:
        compound_score (float): Compound sentiment score from VADER.

    Returns:
        str: Sentiment category: 'Positive', 'Neutral', or 'Negative'.
    """
    if compound_score >= 0.05:
        return "Positive"
    elif compound_score <= -0.05:
        return "Negative"
    else:
        return "Neutral"


def add_sentiment_category(df: pd.DataFrame) -> pd.DataFrame:
    """
    Add a sentiment category column to DataFrame based on compound score.

    Args:
        df (pd.DataFrame): DataFrame with 'compound' sentiment scores.

    Returns:
        pd.DataFrame: DataFrame with additional 'sentiment' column.
    """
    df["sentiment"] = df["compound"].apply(categorize_sentiment)
    return df


