In [2]:
import pandas as pd
from scipy.stats import pearsonr

# List of companies and their corresponding CSV file paths
companies = [
    {"path": "../datas/TSLA_merged_data_labeled.csv", "name": "Tesla"},
    {"path": "../datas/AAPL_merged_data_labeled.csv", "name": "Apple"},
    {"path": "../datas/MSFT_merged_data_labeled.csv", "name": "Microsoft"},
    {"path": "../datas/GOOG_merged_data_labeled.csv", "name": "Google"},
    {"path": "../datas/NVDA_merged_data_labeled.csv", "name": "NVIDIA"},
    {"path": "../datas/AMZN_merged_data_labeled.csv", "name": "Amazon"},
    {"path": "../datas/META_merged_data_labeled.csv", "name": "META"},
]

def analyze_sentiment_stock_correlation(file_path, company_name):
    """
    Load labeled stock-news data, compute average daily sentiment,
    calculate daily stock returns, and compute correlation between the two.
    """
    df = pd.read_csv(file_path)

    # Convert 'Date' column to datetime format and sort
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values('Date', inplace=True)

    # 🟩 1. Aggregate Sentiments: Compute average sentiment per day
    daily_sentiment = df.groupby('Date')['Sentiment'].mean().reset_index()
    daily_sentiment.rename(columns={'Sentiment': 'Avg_Daily_Sentiment'}, inplace=True)

    # 🟩 2. Calculate Daily Returns: Percent change in closing price
    daily_prices = df[['Date', 'Close']].drop_duplicates().sort_values('Date')
    daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()

    # 🟩 3. Merge average sentiment and daily return on Date
    merged = pd.merge(daily_sentiment, daily_prices, on='Date', how='inner')
    merged.dropna(inplace=True)  # Remove rows with missing values

    # 🟩 4. Correlation Analysis: Pearson correlation
    correlation, p_value = pearsonr(merged['Avg_Daily_Sentiment'], merged['Daily_Return'])

    # Print summary
    print(f"📊 {company_name} Sentiment vs Stock Return Correlation")
    print(f"➡️ Pearson Correlation: {correlation:.4f}")
    print(f"➡️ P-Value: {p_value:.4f}")
    print("--------------------------------------------------------")

    return {
        "company": company_name,
        "correlation": correlation,
        "p_value": p_value
    }

# Run analysis for each company
results = []
for company in companies:
    result = analyze_sentiment_stock_correlation(company['path'], company['name'])
    results.append(result)


  daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()


📊 Tesla Sentiment vs Stock Return Correlation
➡️ Pearson Correlation: 0.0038
➡️ P-Value: 0.8592
--------------------------------------------------------


  daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()


📊 Apple Sentiment vs Stock Return Correlation
➡️ Pearson Correlation: -0.0070
➡️ P-Value: 0.7402
--------------------------------------------------------


  daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()


📊 Microsoft Sentiment vs Stock Return Correlation
➡️ Pearson Correlation: -0.0108
➡️ P-Value: 0.6126
--------------------------------------------------------


  daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()


📊 Google Sentiment vs Stock Return Correlation
➡️ Pearson Correlation: 0.0082
➡️ P-Value: 0.6988
--------------------------------------------------------


  daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()


📊 NVIDIA Sentiment vs Stock Return Correlation
➡️ Pearson Correlation: 0.0038
➡️ P-Value: 0.8592
--------------------------------------------------------


  daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()


📊 Amazon Sentiment vs Stock Return Correlation
➡️ Pearson Correlation: -0.0249
➡️ P-Value: 0.2399
--------------------------------------------------------
📊 META Sentiment vs Stock Return Correlation
➡️ Pearson Correlation: -0.0132
➡️ P-Value: 0.5721
--------------------------------------------------------


  daily_prices['Daily_Return'] = daily_prices['Close'].pct_change()
