In [None]:
# Purpose: Analyze correlation between stock price movements and sentiment scores,
# and generate simple trading signals for buy/sell decisions.

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Tuple

# Set plotting style for consistency
sns.set_style("whitegrid")



def load_data(stock_filepath: str, sentiment_filepath: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Load historical stock prices and sentiment data.

    Args:
        stock_filepath (str): Path to stock price CSV.
        sentiment_filepath (str): Path to sentiment data CSV.

    Returns:
        Tuple[pd.DataFrame, pd.DataFrame]: DataFrames for stock prices and sentiment data.
    """
    stock_df = pd.read_csv(stock_filepath, parse_dates=["Date"])
    sentiment_df = pd.read_csv(sentiment_filepath, parse_dates=["date"])

    # Sort by date for both datasets
    stock_df.sort_values("Date", inplace=True)
    sentiment_df.sort_values("date", inplace=True)

    stock_df.reset_index(drop=True, inplace=True)
    sentiment_df.reset_index(drop=True, inplace=True)

    return stock_df, sentiment_df


def aggregate_sentiment_by_date(sentiment_df: pd.DataFrame) -> pd.DataFrame:
    """
    Aggregate sentiment scores by date, computing the mean compound score per day.

    Args:
        sentiment_df (pd.DataFrame): DataFrame with sentiment scores and 'date' column.

    Returns:
        pd.DataFrame: Daily average sentiment scores.
    """
    daily_sentiment = sentiment_df.groupby("date")["compound"].mean().reset_index()
    daily_sentiment.rename(columns={"date": "Date", "compound": "avg_compound_sentiment"}, inplace=True)
    return daily_sentiment


def merge_data(stock_df: pd.DataFrame, daily_sentiment_df: pd.DataFrame) -> pd.DataFrame:
    """
    Merge stock price data with daily sentiment scores on the date column.

    Args:
        stock_df (pd.DataFrame): Stock price data with 'Date' column.
        daily_sentiment_df (pd.DataFrame): Daily sentiment data with 'Date' column.

    Returns:
        pd.DataFrame: Combined DataFrame with stock prices and sentiment scores.
    """
    combined_df = pd.merge(stock_df, daily_sentiment_df, on="Date", how="left")
    # Forward fill missing sentiment scores to fill weekends/holidays
    combined_df["avg_compound_sentiment"].fillna(method="ffill", inplace=True)
    return combined_df



def calculate_correlation(combined_df: pd.DataFrame) -> float:
    """
    Calculate Pearson correlation between daily returns and average sentiment.

    Args:
        combined_df (pd.DataFrame): DataFrame with 'daily_return' and 'avg_compound_sentiment'.

    Returns:
        float: Pearson correlation coefficient.
    """
    correlation = combined_df["daily_return"].corr(combined_df["avg_compound_sentiment"])
    return correlation


def generate_trading_signals(combined_df: pd.DataFrame, sentiment_threshold: float = 0.05) -> pd.DataFrame:
    """
    Generate simple buy/sell signals based on sentiment and daily returns.

    Strategy:
    - Buy signal (1): Positive sentiment above threshold and positive return previous day.
    - Sell signal (-1): Negative sentiment below negative threshold and negative return previous day.
    - Hold signal (0): Otherwise.

    Args:
        combined_df (pd.DataFrame): DataFrame with 'daily_return' and 'avg_compound_sentiment'.
        sentiment_threshold (float): Threshold for sentiment to trigger signals.

    Returns:
        pd.DataFrame: DataFrame with new 'signal' column added.
    """
    signals = []

    for i in range(len(combined_df)):
        if i == 0:
            signals.append(0)  # No signal on first day due to no prior data
            continue

        sentiment = combined_df.loc[i, "avg_compound_sentiment"]
        prev_return = combined_df.loc[i-1, "daily_return"]

        if sentiment > sentiment_threshold and prev_return > 0:
            signals.append(1)  # Buy
        elif sentiment < -sentiment_threshold and prev_return < 0:
            signals.append(-1)  # Sell
        else:
            signals.append(0)  # Hold / No action

    combined_df["signal"] = signals
    return combined_df


def plot_signals(combined_df: pd.DataFrame, ticker: str) -> None:
    """
    Plot closing price with buy/sell signals annotated.

    Args:
        combined_df (pd.DataFrame): DataFrame with stock prices and signals.
        ticker (str): Stock ticker symbol.
    """
    plt.figure(figsize=(15, 8))
    plt.plot(combined_df["Date"], combined_df["Close"], label=f"{ticker} Close Price", color="blue")

    buy_signals = combined_df[combined_df["signal"] == 1]
    sell_signals = combined_df[combined_df["signal"] == -1]

    plt.scatter(buy_signals["Date"], buy_signals["Close"], marker="^", color="green", label="Buy Signal", s=100)
    plt.scatter(sell_signals["Date"], sell_signals["Close"], marker="v", color="red", label="Sell Signal", s=100)

    plt.title(f"{ticker} Closing Price with Trading Signals")
    plt.xlabel("Date")
    plt.ylabel("Price (USD)")
    plt.legend()
    plt.show()


