In [None]:
import pandas as pd
from data_prep import load_and_clean_data, normalize_dates_and_sentiment
from sentiment_analysis import analyze_sentiment

# Load news and stock data (replace placeholders with your file paths)
news_data = load_and_clean_data("your_news_data.csv")
stock_data = load_and_clean_data("your_stock_data.csv")

# Normalize dates and perform sentiment analysis
joined_data = normalize_dates_and_sentiment(news_data, stock_data)

# Calculate daily returns
stock_data['daily_return'] = stock_data['Adj Close'].pct_change() * 100

# Aggregate sentiments (if multiple headlines per day)
if len(joined_data.groupby('date')) > 1:  # Check if multiple news items per day exist
    daily_sentiment = joined_data.groupby('date')['sentiment'].value_counts().unstack(fill_value=0)
    daily_sentiment['average_sentiment'] = daily_sentiment.mean(axis=1)
else:
    daily_sentiment = joined_data[['sentiment']]
    daily_sentiment.columns = ['average_sentiment']

# Correlation calculation (using scipy.stats if not already imported)
from scipy.stats import pearsonr

correlation, p_value = pearsonr(daily_sentiment['average_sentiment'], stock_data['daily_return'])
print(f"Correlation Coefficient: {correlation:.4f}, p-value: {p_value:.4f}")
