# Imports and Setup

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Import your custom modules
import sys
sys.path.append('../src') # Add src directory to Python path
from data_loader import load_all_historical_data
from news_processor import load_financial_news_data, add_sentiment_score
from financial_metrics import add_daily_returns # We will specifically use daily returns here

# Configure plot styles
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['figure.dpi'] = 100

# Load and Prepare News Data (Sentiment Analysis)

In [19]:
print("--- Loading and Processing News Data ---")
df_news = load_financial_news_data(filepath='../data/cleaned_analyst_ratings.csv')

if df_news.empty:
    print("News data not loaded or is empty. Please check 'financial_news.csv' and data_loader.py.")
else:
    df_news_sentiment = add_sentiment_score(df_news.copy()) # Use a copy to avoid modifying original df_news
    print("\nNews data with sentiment scores:")
    print(df_news_sentiment[['date', 'stock', 'headline', 'sentiment_score']].head())

    # Aggregate sentiment by day for each stock
    # We will use the mean sentiment for simplicity, but median or sum could also be considered.
    daily_avg_sentiment = df_news_sentiment.groupby(['publication_day', 'stock'])['sentiment_score'].mean().reset_index()
    daily_avg_sentiment.rename(columns={'publication_day': 'Date', 'sentiment_score': 'daily_avg_sentiment'}, inplace=True)
    daily_avg_sentiment['Date'] = pd.to_datetime(daily_avg_sentiment['Date']) # Ensure datetime type for merging

    print("\nDaily aggregated sentiment per stock:")
    print(daily_avg_sentiment.head())

--- Loading and Processing News Data ---
Loaded financial news data from ../data/cleaned_analyst_ratings.csv

News data with sentiment scores:
                       date stock  \
0 2020-06-05 14:30:54+00:00     A   
1 2020-06-03 14:45:20+00:00     A   
2 2020-05-26 08:30:07+00:00     A   
3 2020-05-22 16:45:06+00:00     A   
4 2020-05-22 15:38:59+00:00     A   

                                            headline  sentiment_score  
0            Stocks That Hit 52-Week Highs On Friday             0.16  
1         Stocks That Hit 52-Week Highs On Wednesday             0.16  
2                      71 Biggest Movers From Friday             0.00  
3       46 Stocks Moving In Friday's Mid-Day Session             0.00  
4  B of A Securities Maintains Neutral on Agilent...             0.00  

Daily aggregated sentiment per stock:
                       Date stock  daily_avg_sentiment
0 2011-04-28 00:00:00+00:00   DEJ             0.000000
1 2011-04-28 00:00:00+00:00   DGP             0.00000

# Load and Prepare Stock Price Data

In [17]:
print("\n--- Loading and Processing Stock Historical Data ---")
all_stock_data = load_all_historical_data(data_dir='../data/yfinance_data')

processed_stock_returns = {}
for ticker, df_hist in all_stock_data.items():
    if df_hist.empty:
        print(f"Skipping {ticker}: Historical data is empty.")
        continue  
    # Calculate daily stock returns
    df_hist_with_returns = add_daily_returns(df_hist.copy())
    
    # Ensure 'Date' index is standardized to match news 'Date' for merging
    df_hist_with_returns.index = df_hist_with_returns.index.normalize() # Remove time component if present
    df_hist_with_returns.index.name = 'Date' # Ensure index name is 'Date' for merging

    processed_stock_returns[ticker] = df_hist_with_returns[['Close', 'Daily_Return']] # Keep relevant columns
    print(f"Processed daily returns for {ticker}.")

print("\nExample: AAPL Daily Returns Head:")
if 'AAPL' in processed_stock_returns:
    print(processed_stock_returns['AAPL'].head())


--- Loading and Processing Stock Historical Data ---

Example: AAPL Daily Returns Head:


# Merge Datasets (Data Alignment)

In [13]:
print("\n--- Merging News Sentiment and Stock Returns ---")
merged_data_for_correlation = {}

unique_tickers = daily_avg_sentiment['stock'].unique()

for ticker in unique_tickers:
    news_sentiment_for_ticker = daily_avg_sentiment[daily_avg_sentiment['stock'] == ticker]
    stock_returns_for_ticker = processed_stock_returns.get(ticker)

    if stock_returns_for_ticker is None or stock_returns_for_ticker.empty:
        print(f"No stock data found or empty for {ticker}. Skipping merge.")
        continue

    # Perform the merge. Using 'inner' join ensures only matching dates are kept.
    # This aligns the news sentiment for a given day with the stock's return on that same day.
    # Note: If news impacts *next day's* return, we'd shift the stock returns.
    df_merged = pd.merge(
        news_sentiment_for_ticker,
        stock_returns_for_ticker,
        on='Date',
        how='inner'
    )
    
    if not df_merged.empty:
        # Drop the first row of Daily_Return as it will be NaN due to pct_change()
        df_merged.dropna(subset=['Daily_Return'], inplace=True)
        if not df_merged.empty:
            merged_data_for_correlation[ticker] = df_merged
            print(f"Merged data for {ticker}. Rows: {len(df_merged)}")
        else:
            print(f"Merged data for {ticker} resulted in empty DataFrame after dropping NaNs.")
    else:
        print(f"No overlapping dates for {ticker} between news sentiment and stock returns.")

print("\nExample: Merged data for AAPL head (sentiment and returns):")
if 'AAPL' in merged_data_for_correlation:
    print(merged_data_for_correlation['AAPL'].head())


--- Merging News Sentiment and Stock Returns ---
No stock data found or empty for DEJ. Skipping merge.
No stock data found or empty for DGP. Skipping merge.
No stock data found or empty for ESR. Skipping merge.
No stock data found or empty for AIA. Skipping merge.
No stock data found or empty for GDL. Skipping merge.
No stock data found or empty for PTM. Skipping merge.
No stock data found or empty for DBP. Skipping merge.
No stock data found or empty for AGOL. Skipping merge.
No stock data found or empty for DGZ. Skipping merge.
No stock data found or empty for GLTR. Skipping merge.
No stock data found or empty for PSQ. Skipping merge.
No stock data found or empty for SCJ. Skipping merge.
No stock data found or empty for SZO. Skipping merge.
No stock data found or empty for TLH. Skipping merge.
No stock data found or empty for WITE. Skipping merge.
No stock data found or empty for CSM. Skipping merge.
No stock data found or empty for NIM. Skipping merge.
No stock data found or empty 

No stock data found or empty for BLV. Skipping merge.
No stock data found or empty for CMF. Skipping merge.
No stock data found or empty for CXA. Skipping merge.
No stock data found or empty for DLBL. Skipping merge.
No stock data found or empty for DTYL. Skipping merge.
No stock data found or empty for LTPZ. Skipping merge.
No stock data found or empty for LWC. Skipping merge.
No stock data found or empty for TIPZ. Skipping merge.
No stock data found or empty for UBT. Skipping merge.
No stock data found or empty for UST. Skipping merge.
No stock data found or empty for GSC. Skipping merge.
No stock data found or empty for LSC. Skipping merge.
No stock data found or empty for FVL. Skipping merge.
No stock data found or empty for PSP. Skipping merge.
No stock data found or empty for SRTY. Skipping merge.
No stock data found or empty for BOS. Skipping merge.
No stock data found or empty for EWV. Skipping merge.
No stock data found or empty for FFNM. Skipping merge.
No stock data found or

No stock data found or empty for PZI. Skipping merge.
No stock data found or empty for RCD. Skipping merge.
No stock data found or empty for WGA. Skipping merge.
No stock data found or empty for GTU. Skipping merge.
No stock data found or empty for UAE. Skipping merge.
No stock data found or empty for FBZ. Skipping merge.
No stock data found or empty for ADRU. Skipping merge.
No stock data found or empty for ECF. Skipping merge.
No stock data found or empty for ICLN. Skipping merge.
No stock data found or empty for PSLV. Skipping merge.
No stock data found or empty for XPL. Skipping merge.
No stock data found or empty for CHII. Skipping merge.
No stock data found or empty for DRV. Skipping merge.
No stock data found or empty for EAD. Skipping merge.
No stock data found or empty for PIM. Skipping merge.
No stock data found or empty for NXR. Skipping merge.
No stock data found or empty for VBFC. Skipping merge.
No stock data found or empty for EMXX. Skipping merge.
No stock data found or

No stock data found or empty for AFCB. Skipping merge.
No stock data found or empty for BBRC. Skipping merge.
No stock data found or empty for FRAK. Skipping merge.
No stock data found or empty for HTR. Skipping merge.
No stock data found or empty for PHO. Skipping merge.
No stock data found or empty for SDOW. Skipping merge.
No stock data found or empty for PHDG. Skipping merge.
No stock data found or empty for GRU. Skipping merge.
No stock data found or empty for IGU. Skipping merge.
No stock data found or empty for ONEF. Skipping merge.
No stock data found or empty for SJB. Skipping merge.
No stock data found or empty for UJB. Skipping merge.
No stock data found or empty for WEET. Skipping merge.
No stock data found or empty for FRI. Skipping merge.
No stock data found or empty for ACWX. Skipping merge.
No stock data found or empty for LMLP. Skipping merge.
No stock data found or empty for PBD. Skipping merge.
No stock data found or empty for IYK. Skipping merge.
No stock data found

No stock data found or empty for KNOW. Skipping merge.
No stock data found or empty for FXSG. Skipping merge.
No stock data found or empty for MNA. Skipping merge.
No stock data found or empty for VCV. Skipping merge.
No stock data found or empty for USBI. Skipping merge.
No stock data found or empty for GII. Skipping merge.
No stock data found or empty for SQQQ. Skipping merge.
No stock data found or empty for VIIX. Skipping merge.
No stock data found or empty for DBV. Skipping merge.
No stock data found or empty for EMCB. Skipping merge.
No stock data found or empty for NORW. Skipping merge.
No stock data found or empty for GURU. Skipping merge.
No stock data found or empty for VXUS. Skipping merge.
No stock data found or empty for HMG. Skipping merge.
No stock data found or empty for VB. Skipping merge.
No stock data found or empty for AADR. Skipping merge.
No stock data found or empty for ADRD. Skipping merge.
No stock data found or empty for PXSC. Skipping merge.
No stock data fou

No stock data found or empty for DGS. Skipping merge.
No stock data found or empty for EEMA. Skipping merge.
No stock data found or empty for PJP. Skipping merge.
No stock data found or empty for ELD. Skipping merge.
No stock data found or empty for PNQI. Skipping merge.
No stock data found or empty for IDX. Skipping merge.
No stock data found or empty for RWXL. Skipping merge.
No stock data found or empty for VHT. Skipping merge.
No stock data found or empty for IYF. Skipping merge.
No stock data found or empty for SIL. Skipping merge.
No stock data found or empty for WDIV. Skipping merge.
No stock data found or empty for EVAL. Skipping merge.
No stock data found or empty for OLEM. Skipping merge.
No stock data found or empty for EUO. Skipping merge.
No stock data found or empty for GRF. Skipping merge.
No stock data found or empty for VEGA. Skipping merge.
No stock data found or empty for CLY. Skipping merge.
No stock data found or empty for BKYF. Skipping merge.
No stock data found 

No stock data found or empty for EOPN. Skipping merge.
No stock data found or empty for IHDG. Skipping merge.
No stock data found or empty for FRS. Skipping merge.
No stock data found or empty for GRT. Skipping merge.
No stock data found or empty for BJK. Skipping merge.
No stock data found or empty for VIAS. Skipping merge.
No stock data found or empty for ACFN. Skipping merge.
No stock data found or empty for TECU. Skipping merge.
No stock data found or empty for ASPX. Skipping merge.
No stock data found or empty for CBDE. Skipping merge.
No stock data found or empty for LAS. Skipping merge.
No stock data found or empty for COCO. Skipping merge.
No stock data found or empty for AUMAU. Skipping merge.
No stock data found or empty for ASBI. Skipping merge.
No stock data found or empty for CWI. Skipping merge.
No stock data found or empty for CFN. Skipping merge.
No stock data found or empty for LSG. Skipping merge.
No stock data found or empty for HDRAU. Skipping merge.
No stock data f

No stock data found or empty for MVNR. Skipping merge.
No stock data found or empty for CNDO. Skipping merge.
No stock data found or empty for CYCCP. Skipping merge.
No stock data found or empty for PETM. Skipping merge.
No stock data found or empty for BONE. Skipping merge.
No stock data found or empty for CLAC. Skipping merge.
No stock data found or empty for DRC. Skipping merge.
No stock data found or empty for RTI. Skipping merge.
No stock data found or empty for SURG. Skipping merge.
No stock data found or empty for BWS. Skipping merge.
No stock data found or empty for GCA. Skipping merge.
No stock data found or empty for LEVY. Skipping merge.
No stock data found or empty for ICA. Skipping merge.
No stock data found or empty for SLXP. Skipping merge.
No stock data found or empty for VTSS. Skipping merge.
No stock data found or empty for HQL. Skipping merge.
No stock data found or empty for RKT. Skipping merge.
No stock data found or empty for KFX. Skipping merge.
No stock data fou

No stock data found or empty for CLTX. Skipping merge.
No stock data found or empty for EPAX. Skipping merge.
No stock data found or empty for EXLP. Skipping merge.
No stock data found or empty for REMY. Skipping merge.
No stock data found or empty for ACT. Skipping merge.
No stock data found or empty for NML. Skipping merge.
No stock data found or empty for MCOX. Skipping merge.
No stock data found or empty for RCPT. Skipping merge.
No stock data found or empty for EVBS. Skipping merge.
No stock data found or empty for IG. Skipping merge.
No stock data found or empty for LBMH. Skipping merge.
No stock data found or empty for SFG. Skipping merge.
No stock data found or empty for XUE. Skipping merge.
No stock data found or empty for DAEG. Skipping merge.
No stock data found or empty for EOX. Skipping merge.
No stock data found or empty for HEWJ. Skipping merge.
No stock data found or empty for XOOM. Skipping merge.
No stock data found or empty for DBMX. Skipping merge.
No stock data fou

No stock data found or empty for FDO. Skipping merge.
No stock data found or empty for GLRI. Skipping merge.
No stock data found or empty for YOKU. Skipping merge.
No stock data found or empty for BLT. Skipping merge.
No stock data found or empty for FULL. Skipping merge.
No stock data found or empty for JGW. Skipping merge.
No stock data found or empty for PCL. Skipping merge.
No stock data found or empty for POZN. Skipping merge.
No stock data found or empty for RLD. Skipping merge.
No stock data found or empty for UPIP. Skipping merge.
No stock data found or empty for ZINC. Skipping merge.
No stock data found or empty for ZSPH. Skipping merge.
No stock data found or empty for IPAC. Skipping merge.
No stock data found or empty for IRC. Skipping merge.
No stock data found or empty for ITOT. Skipping merge.
No stock data found or empty for IUSB. Skipping merge.
No stock data found or empty for IUSV. Skipping merge.
No stock data found or empty for VGGL. Skipping merge.
No stock data fo

No stock data found or empty for KHI. Skipping merge.
No stock data found or empty for STRZA. Skipping merge.
No stock data found or empty for ATML. Skipping merge.
No stock data found or empty for SSRG. Skipping merge.
No stock data found or empty for FUTY. Skipping merge.
No stock data found or empty for HYLS. Skipping merge.
No stock data found or empty for CKP. Skipping merge.
No stock data found or empty for MTU. Skipping merge.
No stock data found or empty for GMCR. Skipping merge.
No stock data found or empty for FNDX. Skipping merge.
No stock data found or empty for LINE. Skipping merge.
No stock data found or empty for DXJR. Skipping merge.
No stock data found or empty for SZYM. Skipping merge.
No stock data found or empty for TFM. Skipping merge.
No stock data found or empty for SLI. Skipping merge.
No stock data found or empty for STCK. Skipping merge.
No stock data found or empty for ARO. Skipping merge.
No stock data found or empty for RSTI. Skipping merge.
No stock data f

No stock data found or empty for PLCM. Skipping merge.
No stock data found or empty for XRA. Skipping merge.
No stock data found or empty for CACQ. Skipping merge.
No stock data found or empty for CADT. Skipping merge.
No stock data found or empty for JXSB. Skipping merge.
No stock data found or empty for ROVI. Skipping merge.
No stock data found or empty for SGNT. Skipping merge.
No stock data found or empty for IMPR. Skipping merge.
No stock data found or empty for IILG. Skipping merge.
No stock data found or empty for QIHU. Skipping merge.
No stock data found or empty for FYLD. Skipping merge.
No stock data found or empty for CIU. Skipping merge.
No stock data found or empty for EGT. Skipping merge.
No stock data found or empty for ARMH. Skipping merge.
No stock data found or empty for VMEM. Skipping merge.
No stock data found or empty for CSH. Skipping merge.
No stock data found or empty for CFA. Skipping merge.
No stock data found or empty for EQY. Skipping merge.
No stock data fo

No stock data found or empty for PVI. Skipping merge.
No stock data found or empty for UAM. Skipping merge.
No stock data found or empty for AMSG. Skipping merge.
No stock data found or empty for HNR. Skipping merge.
No stock data found or empty for ELSE. Skipping merge.
No stock data found or empty for RTH. Skipping merge.
No stock data found or empty for RSO. Skipping merge.
No stock data found or empty for FIDU. Skipping merge.
No stock data found or empty for HAR. Skipping merge.
No stock data found or empty for VLTC. Skipping merge.
No stock data found or empty for SPP. Skipping merge.
No stock data found or empty for WNR. Skipping merge.
No stock data found or empty for AIXG. Skipping merge.
No stock data found or empty for TRTL. Skipping merge.
No stock data found or empty for HW. Skipping merge.
No stock data found or empty for OPXA. Skipping merge.
No stock data found or empty for AGND. Skipping merge.
No stock data found or empty for HYND. Skipping merge.
No stock data found 

No stock data found or empty for CNNX. Skipping merge.
No stock data found or empty for SBRAP. Skipping merge.
No stock data found or empty for SGOC. Skipping merge.
No stock data found or empty for TICC. Skipping merge.
No stock data found or empty for VTTI. Skipping merge.
No stock data found or empty for ENOC. Skipping merge.
No stock data found or empty for TESO. Skipping merge.
No stock data found or empty for TRMR. Skipping merge.
No stock data found or empty for SNC. Skipping merge.
No stock data found or empty for SSNI. Skipping merge.
No stock data found or empty for CDI. Skipping merge.
No stock data found or empty for GUID. Skipping merge.
No stock data found or empty for VYM. Skipping merge.
No stock data found or empty for IEF. Skipping merge.
No stock data found or empty for TMF. Skipping merge.
No stock data found or empty for NUTR. Skipping merge.
No stock data found or empty for LMOS. Skipping merge.
No stock data found or empty for GAA. Skipping merge.
No stock data f

No stock data found or empty for HCN. Skipping merge.
No stock data found or empty for UGAZ. Skipping merge.
No stock data found or empty for INCR. Skipping merge.
No stock data found or empty for WG. Skipping merge.
No stock data found or empty for CALL. Skipping merge.
No stock data found or empty for STLY. Skipping merge.
No stock data found or empty for GIGA. Skipping merge.
No stock data found or empty for SOR. Skipping merge.
No stock data found or empty for ZX. Skipping merge.
No stock data found or empty for CBG. Skipping merge.
No stock data found or empty for AFAM. Skipping merge.
No stock data found or empty for EMB. Skipping merge.
No stock data found or empty for WLB. Skipping merge.
No stock data found or empty for BGI. Skipping merge.
No stock data found or empty for DSWL. Skipping merge.
No stock data found or empty for BND. Skipping merge.
No stock data found or empty for SGY. Skipping merge.
No stock data found or empty for SNI. Skipping merge.
No stock data found or 

No stock data found or empty for RFT. Skipping merge.
No stock data found or empty for STBZ. Skipping merge.
No stock data found or empty for VII. Skipping merge.
No stock data found or empty for VTI. Skipping merge.
No stock data found or empty for ABAX. Skipping merge.
No stock data found or empty for FYC. Skipping merge.
No stock data found or empty for SFB. Skipping merge.
No stock data found or empty for BOFI. Skipping merge.
No stock data found or empty for APB. Skipping merge.
No stock data found or empty for ZOES. Skipping merge.
No stock data found or empty for BLJ. Skipping merge.
No stock data found or empty for BNJ. Skipping merge.
No stock data found or empty for MON. Skipping merge.
No stock data found or empty for QSII. Skipping merge.
No stock data found or empty for AFSI. Skipping merge.
No stock data found or empty for ERX. Skipping merge.
No stock data found or empty for AGG. Skipping merge.
No stock data found or empty for SDT. Skipping merge.
No stock data found or

No stock data found or empty for JRS. Skipping merge.
No stock data found or empty for MQY. Skipping merge.
No stock data found or empty for NKG. Skipping merge.
No stock data found or empty for OXBR. Skipping merge.
No stock data found or empty for CA. Skipping merge.
No stock data found or empty for HMTV. Skipping merge.
No stock data found or empty for KFS. Skipping merge.
No stock data found or empty for MCN. Skipping merge.
No stock data found or empty for MIY. Skipping merge.
No stock data found or empty for MYJ. Skipping merge.
No stock data found or empty for MYN. Skipping merge.
No stock data found or empty for BNY. Skipping merge.
No stock data found or empty for BVX. Skipping merge.
No stock data found or empty for ECR. Skipping merge.
No stock data found or empty for EDI. Skipping merge.
No stock data found or empty for ESP. Skipping merge.
No stock data found or empty for GMOM. Skipping merge.
No stock data found or empty for IIF. Skipping merge.
No stock data found or emp

No stock data found or empty for ITIC. Skipping merge.
No stock data found or empty for MUH. Skipping merge.
No stock data found or empty for OFED. Skipping merge.
No stock data found or empty for BHB. Skipping merge.
No stock data found or empty for BWG. Skipping merge.
No stock data found or empty for CRESY. Skipping merge.
No stock data found or empty for FCT. Skipping merge.
No stock data found or empty for GF. Skipping merge.
No stock data found or empty for IMI. Skipping merge.
No stock data found or empty for IRR. Skipping merge.
No stock data found or empty for JDD. Skipping merge.
No stock data found or empty for LDP. Skipping merge.
No stock data found or empty for TMK. Skipping merge.
No stock data found or empty for UBNK. Skipping merge.
No stock data found or empty for CHSCP. Skipping merge.
No stock data found or empty for CVRR. Skipping merge.
No stock data found or empty for AXR. Skipping merge.
No stock data found or empty for BGB. Skipping merge.
No stock data found o

No stock data found or empty for PFBI. Skipping merge.
No stock data found or empty for PZG. Skipping merge.
No stock data found or empty for SPA. Skipping merge.
No stock data found or empty for TSBK. Skipping merge.
No stock data found or empty for MLP. Skipping merge.
No stock data found or empty for AIF. Skipping merge.
No stock data found or empty for BEL. Skipping merge.
No stock data found or empty for BGR. Skipping merge.
No stock data found or empty for JPI. Skipping merge.
No stock data found or empty for MYF. Skipping merge.
No stock data found or empty for OFS. Skipping merge.
No stock data found or empty for OPOF. Skipping merge.
No stock data found or empty for PIH. Skipping merge.
No stock data found or empty for SBR. Skipping merge.
No stock data found or empty for SCG. Skipping merge.
No stock data found or empty for VALU. Skipping merge.
No stock data found or empty for CHW. Skipping merge.
No stock data found or empty for IX. Skipping merge.
No stock data found or em

No stock data found or empty for ACNB. Skipping merge.
No stock data found or empty for BCBP. Skipping merge.
No stock data found or empty for SOXS. Skipping merge.
No stock data found or empty for UNTY. Skipping merge.
No stock data found or empty for CCIH. Skipping merge.
No stock data found or empty for KEYW. Skipping merge.
No stock data found or empty for CHKE. Skipping merge.
No stock data found or empty for EMES. Skipping merge.
No stock data found or empty for FORD. Skipping merge.
No stock data found or empty for THFF. Skipping merge.
No stock data found or empty for ENFC. Skipping merge.
No stock data found or empty for FCAP. Skipping merge.
No stock data found or empty for INBK. Skipping merge.
No stock data found or empty for WINA. Skipping merge.
No stock data found or empty for NCI. Skipping merge.
No stock data found or empty for SAL. Skipping merge.
No stock data found or empty for ALX. Skipping merge.
No stock data found or empty for HBP. Skipping merge.
No stock data 

No stock data found or empty for BDGE. Skipping merge.
No stock data found or empty for COWN. Skipping merge.
No stock data found or empty for HEB. Skipping merge.
No stock data found or empty for LRAD. Skipping merge.
No stock data found or empty for NBHC. Skipping merge.
No stock data found or empty for USLM. Skipping merge.
No stock data found or empty for CVBF. Skipping merge.
No stock data found or empty for EZA. Skipping merge.
No stock data found or empty for GTY. Skipping merge.
No stock data found or empty for JMP. Skipping merge.
No stock data found or empty for COLB. Skipping merge.
No stock data found or empty for HTBI. Skipping merge.
No stock data found or empty for ISTR. Skipping merge.
No stock data found or empty for IYE. Skipping merge.
No stock data found or empty for TOWN. Skipping merge.
No stock data found or empty for WEBK. Skipping merge.
No stock data found or empty for NAN. Skipping merge.
No stock data found or empty for NMY. Skipping merge.
No stock data fou

No stock data found or empty for WBC. Skipping merge.
No stock data found or empty for CTIB. Skipping merge.
No stock data found or empty for FCSC. Skipping merge.
No stock data found or empty for FUNC. Skipping merge.
No stock data found or empty for HTH. Skipping merge.
No stock data found or empty for LBY. Skipping merge.
No stock data found or empty for PCYG. Skipping merge.
No stock data found or empty for SYMC. Skipping merge.
No stock data found or empty for AREX. Skipping merge.
No stock data found or empty for BITA. Skipping merge.
No stock data found or empty for IBOC. Skipping merge.
No stock data found or empty for MTSL. Skipping merge.
No stock data found or empty for RDS.A. Skipping merge.
No stock data found or empty for GBR. Skipping merge.
No stock data found or empty for SMMF. Skipping merge.
No stock data found or empty for VIA. Skipping merge.
No stock data found or empty for DEST. Skipping merge.
No stock data found or empty for ATLC. Skipping merge.
No stock data 

No stock data found or empty for MOFG. Skipping merge.
No stock data found or empty for RST. Skipping merge.
No stock data found or empty for TDV. Skipping merge.
No stock data found or empty for ASUR. Skipping merge.
No stock data found or empty for CARB. Skipping merge.
No stock data found or empty for CRR. Skipping merge.
No stock data found or empty for GIFI. Skipping merge.
No stock data found or empty for GVP. Skipping merge.
No stock data found or empty for LEU. Skipping merge.
No stock data found or empty for MATW. Skipping merge.
No stock data found or empty for PESI. Skipping merge.
No stock data found or empty for PME. Skipping merge.
No stock data found or empty for SMTX. Skipping merge.
No stock data found or empty for SYBT. Skipping merge.
No stock data found or empty for TG. Skipping merge.
No stock data found or empty for APEI. Skipping merge.
No stock data found or empty for BREW. Skipping merge.
No stock data found or empty for CBMG. Skipping merge.
No stock data foun

No stock data found or empty for BANF. Skipping merge.
No stock data found or empty for BMA. Skipping merge.
No stock data found or empty for CMCO. Skipping merge.
No stock data found or empty for ELMD. Skipping merge.
No stock data found or empty for ISSC. Skipping merge.
No stock data found or empty for LKFN. Skipping merge.
No stock data found or empty for NFJ. Skipping merge.
No stock data found or empty for TBBK. Skipping merge.
No stock data found or empty for VLGEA. Skipping merge.
No stock data found or empty for WLFC. Skipping merge.
No stock data found or empty for WSBF. Skipping merge.
No stock data found or empty for AMPE. Skipping merge.
No stock data found or empty for FF. Skipping merge.
No stock data found or empty for FRA. Skipping merge.
No stock data found or empty for GBCI. Skipping merge.
No stock data found or empty for IMKTA. Skipping merge.
No stock data found or empty for JGH. Skipping merge.
No stock data found or empty for LBAI. Skipping merge.
No stock data 

No stock data found or empty for XTLB. Skipping merge.
No stock data found or empty for ZFGN. Skipping merge.
No stock data found or empty for AIT. Skipping merge.
No stock data found or empty for ATTO. Skipping merge.
No stock data found or empty for BANC. Skipping merge.
No stock data found or empty for FOR. Skipping merge.
No stock data found or empty for SFUN. Skipping merge.
No stock data found or empty for TTEC. Skipping merge.
No stock data found or empty for ARTW. Skipping merge.
No stock data found or empty for BHK. Skipping merge.
No stock data found or empty for CHY. Skipping merge.
No stock data found or empty for CMRE. Skipping merge.
No stock data found or empty for CNSL. Skipping merge.
No stock data found or empty for CYOU. Skipping merge.
No stock data found or empty for FFBC. Skipping merge.
No stock data found or empty for GLOP. Skipping merge.
No stock data found or empty for HSTM. Skipping merge.
No stock data found or empty for HTBK. Skipping merge.
No stock data 

No stock data found or empty for SMSI. Skipping merge.
No stock data found or empty for SWIR. Skipping merge.
No stock data found or empty for BMI. Skipping merge.
No stock data found or empty for KELYA. Skipping merge.
No stock data found or empty for RUSHA. Skipping merge.
No stock data found or empty for TU. Skipping merge.
No stock data found or empty for ABEV. Skipping merge.
No stock data found or empty for AEIS. Skipping merge.
No stock data found or empty for AR. Skipping merge.
No stock data found or empty for CEL. Skipping merge.
No stock data found or empty for CRY. Skipping merge.
No stock data found or empty for CXDC. Skipping merge.
No stock data found or empty for EXPO. Skipping merge.
No stock data found or empty for LLEX. Skipping merge.
No stock data found or empty for LM. Skipping merge.
No stock data found or empty for LTRX. Skipping merge.
No stock data found or empty for NGG. Skipping merge.
No stock data found or empty for ONE. Skipping merge.
No stock data found

No stock data found or empty for OSIS. Skipping merge.
No stock data found or empty for RFP. Skipping merge.
No stock data found or empty for ROCK. Skipping merge.
No stock data found or empty for RP. Skipping merge.
No stock data found or empty for SUP. Skipping merge.
No stock data found or empty for TCPC. Skipping merge.
No stock data found or empty for TDS. Skipping merge.
No stock data found or empty for THR. Skipping merge.
No stock data found or empty for TUR. Skipping merge.
No stock data found or empty for UFCS. Skipping merge.
No stock data found or empty for UVSP. Skipping merge.
No stock data found or empty for VGR. Skipping merge.
No stock data found or empty for VMI. Skipping merge.
No stock data found or empty for WSBC. Skipping merge.
No stock data found or empty for CDXS. Skipping merge.
No stock data found or empty for CORE. Skipping merge.
No stock data found or empty for EIS. Skipping merge.
No stock data found or empty for HMLP. Skipping merge.
No stock data found 

No stock data found or empty for BIP. Skipping merge.
No stock data found or empty for BLL. Skipping merge.
No stock data found or empty for BRKS. Skipping merge.
No stock data found or empty for CAMP. Skipping merge.
No stock data found or empty for CBSH. Skipping merge.
No stock data found or empty for CDR. Skipping merge.
No stock data found or empty for CEO. Skipping merge.
No stock data found or empty for CLNE. Skipping merge.
No stock data found or empty for CLS. Skipping merge.
No stock data found or empty for CNA. Skipping merge.
No stock data found or empty for COR. Skipping merge.
No stock data found or empty for CR. Skipping merge.
No stock data found or empty for CRS. Skipping merge.
No stock data found or empty for DAKT. Skipping merge.
No stock data found or empty for DEO. Skipping merge.
No stock data found or empty for E. Skipping merge.
No stock data found or empty for EEFT. Skipping merge.
No stock data found or empty for EIGI. Skipping merge.
No stock data found or e

No stock data found or empty for RAVE. Skipping merge.
No stock data found or empty for S. Skipping merge.
No stock data found or empty for SCVL. Skipping merge.
No stock data found or empty for SLRC. Skipping merge.
No stock data found or empty for SYNA. Skipping merge.
No stock data found or empty for TA. Skipping merge.
No stock data found or empty for WAT. Skipping merge.
No stock data found or empty for WNC. Skipping merge.
No stock data found or empty for CQP. Skipping merge.
No stock data found or empty for AB. Skipping merge.
No stock data found or empty for AHH. Skipping merge.
No stock data found or empty for ALLE. Skipping merge.
No stock data found or empty for ATRO. Skipping merge.
No stock data found or empty for AVNW. Skipping merge.
No stock data found or empty for BBDO. Skipping merge.
No stock data found or empty for CCJ. Skipping merge.
No stock data found or empty for CCK. Skipping merge.
No stock data found or empty for CEA. Skipping merge.
No stock data found or e

No stock data found or empty for GBX. Skipping merge.
No stock data found or empty for GIGM. Skipping merge.
No stock data found or empty for HSC. Skipping merge.
No stock data found or empty for KRC. Skipping merge.
No stock data found or empty for NYCB. Skipping merge.
No stock data found or empty for ORI. Skipping merge.
No stock data found or empty for PATK. Skipping merge.
No stock data found or empty for PRGO. Skipping merge.
No stock data found or empty for RCII. Skipping merge.
No stock data found or empty for SEAC. Skipping merge.
No stock data found or empty for SMFG. Skipping merge.
No stock data found or empty for STAR. Skipping merge.
No stock data found or empty for SUI. Skipping merge.
No stock data found or empty for UGI. Skipping merge.
No stock data found or empty for WNS. Skipping merge.
No stock data found or empty for ARLP. Skipping merge.
No stock data found or empty for BLDP. Skipping merge.
No stock data found or empty for FAF. Skipping merge.
No stock data foun

No stock data found or empty for DHT. Skipping merge.
No stock data found or empty for EROS. Skipping merge.
No stock data found or empty for FRPT. Skipping merge.
No stock data found or empty for GLBS. Skipping merge.
No stock data found or empty for GRPN. Skipping merge.
No stock data found or empty for LMNX. Skipping merge.
No stock data found or empty for LPTH. Skipping merge.
No stock data found or empty for MPW. Skipping merge.
No stock data found or empty for NBY. Skipping merge.
No stock data found or empty for POL. Skipping merge.
No stock data found or empty for QLYS. Skipping merge.
No stock data found or empty for RAD. Skipping merge.
No stock data found or empty for RNR. Skipping merge.
No stock data found or empty for SHLX. Skipping merge.
No stock data found or empty for TRP. Skipping merge.
No stock data found or empty for ZIXI. Skipping merge.
No stock data found or empty for ADT. Skipping merge.
No stock data found or empty for ALGN. Skipping merge.
No stock data foun

No stock data found or empty for AMT. Skipping merge.
No stock data found or empty for ASGN. Skipping merge.
No stock data found or empty for ASPS. Skipping merge.
No stock data found or empty for AUY. Skipping merge.
No stock data found or empty for BAX. Skipping merge.
No stock data found or empty for BBVA. Skipping merge.
No stock data found or empty for BEN. Skipping merge.
No stock data found or empty for BKU. Skipping merge.
No stock data found or empty for BMO. Skipping merge.
No stock data found or empty for CHRW. Skipping merge.
No stock data found or empty for CMI. Skipping merge.
No stock data found or empty for CODI. Skipping merge.
No stock data found or empty for CROX. Skipping merge.
No stock data found or empty for DDS. Skipping merge.
No stock data found or empty for DIN. Skipping merge.
No stock data found or empty for GGB. Skipping merge.
No stock data found or empty for GIL. Skipping merge.
No stock data found or empty for HBI. Skipping merge.
No stock data found or

No stock data found or empty for CINF. Skipping merge.
No stock data found or empty for CPA. Skipping merge.
No stock data found or empty for CTSO. Skipping merge.
No stock data found or empty for EFOI. Skipping merge.
No stock data found or empty for EWBC. Skipping merge.
No stock data found or empty for FMC. Skipping merge.
No stock data found or empty for FNV. Skipping merge.
No stock data found or empty for FSLR. Skipping merge.
No stock data found or empty for GEL. Skipping merge.
No stock data found or empty for GOLD. Skipping merge.
No stock data found or empty for INCY. Skipping merge.
No stock data found or empty for INGN. Skipping merge.
No stock data found or empty for IO. Skipping merge.
No stock data found or empty for ITW. Skipping merge.
No stock data found or empty for LBTYA. Skipping merge.
No stock data found or empty for LPSN. Skipping merge.
No stock data found or empty for MASI. Skipping merge.
No stock data found or empty for MO. Skipping merge.
No stock data foun

No stock data found or empty for AES. Skipping merge.
No stock data found or empty for ALXN. Skipping merge.
No stock data found or empty for AU. Skipping merge.
No stock data found or empty for CBAY. Skipping merge.
No stock data found or empty for CHH. Skipping merge.
No stock data found or empty for CLNY. Skipping merge.
No stock data found or empty for DUK. Skipping merge.
No stock data found or empty for EBS. Skipping merge.
No stock data found or empty for EWJ. Skipping merge.
No stock data found or empty for GDOT. Skipping merge.
No stock data found or empty for GIS. Skipping merge.
No stock data found or empty for HALO. Skipping merge.
No stock data found or empty for IFF. Skipping merge.
No stock data found or empty for IIVI. Skipping merge.
No stock data found or empty for LEN. Skipping merge.
No stock data found or empty for LH. Skipping merge.
No stock data found or empty for LSCC. Skipping merge.
No stock data found or empty for MNST. Skipping merge.
No stock data found or

No stock data found or empty for EW. Skipping merge.
No stock data found or empty for FLS. Skipping merge.
No stock data found or empty for GENE. Skipping merge.
No stock data found or empty for HA. Skipping merge.
No stock data found or empty for IBN. Skipping merge.
No stock data found or empty for IMMU. Skipping merge.
No stock data found or empty for KMX. Skipping merge.
No stock data found or empty for LINC. Skipping merge.
No stock data found or empty for LOGI. Skipping merge.
No stock data found or empty for MAS. Skipping merge.
No stock data found or empty for NBL. Skipping merge.
No stock data found or empty for NCLH. Skipping merge.
No stock data found or empty for NEM. Skipping merge.
No stock data found or empty for NGL. Skipping merge.
No stock data found or empty for NR. Skipping merge.
No stock data found or empty for PPL. Skipping merge.
No stock data found or empty for REG. Skipping merge.
No stock data found or empty for RGR. Skipping merge.
No stock data found or emp

No stock data found or empty for DGX. Skipping merge.
No stock data found or empty for ESEA. Skipping merge.
No stock data found or empty for GLNG. Skipping merge.
No stock data found or empty for H. Skipping merge.
No stock data found or empty for HEI. Skipping merge.
No stock data found or empty for KEY. Skipping merge.
No stock data found or empty for KEYS. Skipping merge.
No stock data found or empty for LOW. Skipping merge.
No stock data found or empty for MEIP. Skipping merge.
No stock data found or empty for MMM. Skipping merge.
No stock data found or empty for MT. Skipping merge.
No stock data found or empty for PENN. Skipping merge.
No stock data found or empty for PFG. Skipping merge.
No stock data found or empty for PLT. Skipping merge.
No stock data found or empty for QCOM. Skipping merge.
No stock data found or empty for RBS. Skipping merge.
No stock data found or empty for RL. Skipping merge.
No stock data found or empty for SPCB. Skipping merge.
No stock data found or em

No stock data found or empty for TTPH. Skipping merge.
No stock data found or empty for TUP. Skipping merge.
No stock data found or empty for TWTR. Skipping merge.
No stock data found or empty for WMT. Skipping merge.
No stock data found or empty for XLE. Skipping merge.
No stock data found or empty for XLF. Skipping merge.
No stock data found or empty for XLK. Skipping merge.
No stock data found or empty for AAL. Skipping merge.
No stock data found or empty for AZN. Skipping merge.
No stock data found or empty for CCL. Skipping merge.
No stock data found or empty for CPE. Skipping merge.
No stock data found or empty for CRC. Skipping merge.
No stock data found or empty for DAL. Skipping merge.
No stock data found or empty for DVP. Skipping merge.
No stock data found or empty for FB. Skipping merge.
No stock data found or empty for GILD. Skipping merge.
No stock data found or empty for JBLU. Skipping merge.
No stock data found or empty for LUV. Skipping merge.
No stock data found or em

In [None]:
#Save correlation data
output_dir = '../data/processed/'
os.makedirs(output_dir, exist_ok=True)
for ticker, df in merged_data_for_correlation.items():
    df.to_csv(os.path.join(output_dir, f'{ticker}_merged_correlation_data.csv'))

print(f"\nSaved merged correlation data to {output_dir}")

#Save the overall correlation _df
correlation _df.to_csv(os.path.join(output_dir, 'overall_correlation_summary.csv'), index = False)
print(f"\Saved overall correlation summary to {ticker}")

# Perform Correlation Analysis

In [None]:
print("\n--- Performing Correlation Analysis ---")
correlation_results = {}

for ticker, df_merged in merged_data_for_correlation.items():
    if not df_merged.empty:
        # Calculate Pearson correlation
        correlation = df_merged['daily_avg_sentiment'].corr(df_merged['Daily_Return'])
        correlation_results[ticker] = correlation
        print(f"Correlation for {ticker}: {correlation:.4f}")
    else:
        print(f"Skipping correlation for {ticker}: No merged data available.")

print("\n--- Summary of Correlation Results ---")
correlation_df = pd.DataFrame(list(correlation_results.items()), columns=['Ticker', 'Sentiment_vs_Daily_Return_Correlation'])
print(correlation_df.sort_values(by='Sentiment_vs_Daily_Return_Correlation', ascending=False))

# Visualize correlations using a bar chart
plt.figure(figsize=(10, 6))
sns.barplot(x='Ticker', y='Sentiment_vs_Daily_Return_Correlation', data=correlation_df, palette='viridis')
plt.title('Correlation between Daily News Sentiment and Daily Stock Returns')
plt.xlabel('Stock Ticker')
plt.ylabel('Pearson Correlation Coefficient')
plt.ylim(-1, 1) # Set y-axis limits for correlation
plt.axhline(0, color='gray', linestyle='--', linewidth=0.7)
plt.show()

# --- Optional: Visualize Scatter Plot for a specific stock ---
ticker_to_plot_corr = 'AAPL' # Choose a ticker to inspect
if ticker_to_plot_corr in merged_data_for_correlation:
    df_plot_corr = merged_data_for_correlation[ticker_to_plot_corr]
    plt.figure(figsize=(10, 7))
    sns.scatterplot(x='daily_avg_sentiment', y='Daily_Return', data=df_plot_corr, alpha=0.6)
    plt.title(f'Daily News Sentiment vs. Daily Returns for {ticker_to_plot_corr}\n(Correlation: {correlation_results.get(ticker_to_plot_corr):.4f})')
    plt.xlabel('Daily Average Sentiment Score')
    plt.ylabel('Daily Stock Return (%)')
    plt.axhline(0, color='grey', linestyle='--', linewidth=0.8)
    plt.axvline(0, color='grey', linestyle='--', linewidth=0.8)
    plt.show()
else:
    print(f"No merged data for {ticker_to_plot_corr} to plot correlation.")

# Interpretation and Next Steps

In [None]:
print("\n--- Interpretation of Correlation Results ---")
print("A correlation coefficient close to 1 indicates a strong positive linear relationship (higher sentiment associated with higher returns).")
print("A correlation coefficient close to -1 indicates a strong negative linear relationship (higher sentiment associated with lower returns).")
print("A correlation coefficient close to 0 indicates a weak or no linear relationship.")

print("\nKey Observations from Correlation Analysis:")
# Based on the output of the correlation_df and plots, you would write your observations here.
# For example:
# - "We observe a generally positive, but weak, correlation between daily news sentiment and daily stock returns across most tickers."
# - "AAPL shows a slightly stronger positive correlation than AMZN."
# - "The correlation values are relatively low, suggesting that daily average sentiment alone might not be a strong linear predictor of daily returns."
# - "This could imply that the market reacts to news in more complex ways, or that other factors (technical indicators, macroeconomic news) are more influential."

print("\nFurther Steps and Considerations:")
print("1. Lagging Effects: Investigate if news sentiment on day t correlates with stock returns on day t+1, t+2, etc. This involves shifting the stock return column.")
print("2. News Volume: Explore the correlation between the *volume* of news articles (not just sentiment) and stock movements.")
print("3. Sentiment Aggregation: Experiment with different sentiment aggregation methods (e.g., sum of positive/negative scores, weighted average, median).")
print("4. Advanced Sentiment: Consider more nuanced sentiment analysis (e.g., financial-specific lexicons, BERT-based models) beyond TextBlob/VADER.")
print("5. Non-linear Relationships: Correlation measures linear relationships. Non-linear patterns might exist, which could be explored with more advanced statistical models or machine learning.")
print("6. Feature Engineering: Combine news sentiment with the technical indicators calculated in Quantitative_Analysis.ipynb to create a richer feature set for predictive modeling.")
print("7. Hypothesis Testing: Use statistical tests (e.g., t-tests, p-values) to determine if the observed correlations are statistically significant.")
print("8. Strategy Development: Based on the observed correlations, formulate initial hypotheses for investment strategies that leverage news sentiment.")