## Imports

In [None]:
import pandas as pd
from typing import List, Dict

## Class Declaration

In [None]:
class SentimentAggregator:
    def __init__(self, articles: List):
        self.articles = articles  # list of NewsArticle or dict-likes with .headline/.stock/.date

    def aggregate_daily(self) -> pd.DataFrame:
        records = []
        for art in self.articles:
            # assume art has .sentiment_score computed already; if not, call compute_sentiment()
            if getattr(art, 'sentiment_score', None) is None and hasattr(art, 'compute_sentiment'):
                art.compute_sentiment()
            records.append({
                'date': art.date,
                'stock': art.stock,
                'sentiment': art.sentiment_score
            })
        df = pd.DataFrame(records)
        grouped = df.groupby(['stock', 'date'])['sentiment'].mean().reset_index()
        return grouped  # columns: stock, date, sentiment


class CorrelationAnalyzer:
    def __init__(self, sentiment_df: pd.DataFrame, stock_quant_map: Dict[str, object], lag: int = 0):
        """
        sentiment_df: columns ['stock','date','sentiment']
        stock_quant_map: e.g. {'AAPL': QuantitativeAnalysis instance, ...}
        lag: integer number of days to shift sentiment forward (e.g., 1 means sentiment on T used to predict return on T+1)
        """
        self.sentiment_df = sentiment_df.copy()
        self.stock_quant_map = stock_quant_map
        self.lag = lag

    def prepare_merged(self, stock: str) -> pd.DataFrame:
        sent = self.sentiment_df[self.sentiment_df['stock'] == stock].copy()
        sent = sent.rename(columns={'date': 'Date'}).sort_values('Date')
        if self.lag != 0:
            sent['Date'] = sent['Date'] + pd.Timedelta(days=self.lag)
        quant = self.stock_quant_map[stock].df.copy()
        merged = pd.merge(quant, sent[['Date', 'sentiment']], on='Date', how='inner')
        return merged  # has price, indicators, daily_return, sentiment

    def compute_correlations(self) -> pd.DataFrame:
        rows = []
        for stock in self.sentiment_df['stock'].unique():
            if stock not in self.stock_quant_map:
                continue
            merged = self.prepare_merged(stock)
            if merged.empty:
                continue
            base = merged[['sentiment', 'daily_return']].dropna()
            pearson_ret = base['sentiment'].corr(base['daily_return'])
            # optional: Spearman
            spearman_ret = base['sentiment'].rank().corr(base['daily_return'].rank())
            rows.append({
                'stock': stock,
                'feature': 'daily_return',
                'pearson': pearson_ret,
                'spearman': spearman_ret,
                'n': len(base)
            })
            # indicator correlations, e.g., RSI, MACD_hist if present
            for ind in ['RSI', 'MACD_hist']:
                if ind in merged:
                    tmp = merged[['sentiment', ind]].dropna()
                    if tmp.empty:
                        continue
                    pearson_ind = tmp['sentiment'].corr(tmp[ind])
                    spearman_ind = tmp['sentiment'].rank().corr(tmp[ind].rank())
                    rows.append({
                        'stock': stock,
                        'feature': ind,
                        'pearson': pearson_ind,
                        'spearman': spearman_ind,
                        'n': len(tmp)
                    })
        return pd.DataFrame(rows)

    def simple_signal_evaluation(self, stock: str):
        merged = self.prepare_merged(stock)
        if merged.empty:
            return None
        # Example signal: positive if sentiment>0 and RSI<70; negative if sentiment<0 and RSI>30
        signal = []
        for _, row in merged.iterrows():
            if row.get('sentiment', 0) > 0 and row.get('RSI', 100) < 70:
                signal.append(1)
            elif row.get('sentiment', 0) < 0 and row.get('RSI', 0) > 30:
                signal.append(-1)
            else:
                signal.append(0)
        merged['signal'] = signal
        # Shift return to next day to simulate prediction (optional)
        merged['next_return'] = merged['daily_return'].shift(-1)
        eval_df = merged.dropna(subset=['next_return'])
        summary = eval_df.groupby('signal')['next_return'].agg(['mean', 'count'])
        return summary


## News Article

In [None]:
# news.py equivalent
from textblob import TextBlob
import pandas as pd

class NewsArticle:
    def __init__(self, headline: str, url: str, publisher: str, date: str, stock: str):
        self.headline = headline
        self.url = url
        self.publisher = publisher
        self.stock = stock
        self.date = pd.to_datetime(date).normalize()
        self.sentiment_score = None

    def compute_sentiment(self):
        self.sentiment_score = TextBlob(self.headline).sentiment.polarity
        return self.sentiment_score


### articles and stock quant map

In [None]:
from notebooks.quantitative_analysis import QuantitativeAnalysis
import pandas as pd
from pathlib import Path

# 1. Load news and create NewsArticle list
news_df = pd.read_csv("../data/raw_analyst_settings.csv")  # or your actual filename
news_df['date'] = pd.to_datetime(news_df['date'], errors='coerce').dt.normalize()

articles = []
for _, row in news_df.dropna(subset=['date']).iterrows():
    art = NewsArticle(
        headline=row['headline'],
        url=row.get('url', ""),
        publisher=row.get('publisher', ""),
        date=row['date'],
        stock=row['stock']
    )
    art.compute_sentiment()
    articles.append(art)

# 2. Load each stock and wrap with QuantitativeAnalysis
tickers = ["AAPL", "AMZN", "GOOG", "META", "MSFT", "NVDA", "TSLA"]
stock_quant_map = {}
for t in tickers:
    path = Path(f"../data/{t}_historical_data.csv")
    if not path.exists():
        continue
    df = pd.read_csv(path)
    qa = QuantitativeAnalysis(df)
    qa.add_sma(20)
    qa.add_sma(50)
    qa.add_rsi()
    stock_quant_map[t] = qa


## Class usage

In [None]:
sent_agg = SentimentAggregator(articles)
sentiment_df = sent_agg.aggregate_daily()

corr = CorrelationAnalyzer(sentiment_df, stock_quant_map, lag=1)
corr_df = corr.compute_correlations()
display(corr_df)

eval_summary = corr.simple_signal_evaluation("AAPL")
print(eval_summary)
