In [14]:
## Setup and Data Preparation
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from statsmodels.tsa.stattools import coint
import datetime as dt

import warnings
warnings.filterwarnings('ignore')

Load Pairs to Plot CSV

In [15]:
pairs_to_plot = pd.read_csv('pairs_to_plot.csv')

In [16]:
pairs_to_plot = list(zip(pairs_to_plot['stock1'], pairs_to_plot['stock2']))

Load Closing Data - Filter for 252 days

In [17]:
fetch_data = pd.read_parquet('close_1year.parquet')

In [18]:
corr_data = fetch_data.tail(252)

In [19]:
corr_data.tail()

Ticker,AAPL,AMD,AXP,BAC,C,COP,COST,CVS,CVX,DE,...,PFE,PG,QCOM,SLB,TGT,TSLA,UNH,WFC,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-11-13,272.950012,247.960007,364.730011,52.869999,100.669998,88.597931,925.080017,79.239998,153.861603,473.410004,...,25.790001,147.960007,174.5,36.310001,90.620003,401.98999,332.519989,84.699997,102.540001,117.760002
2025-11-14,272.410004,246.809998,357.179993,52.610001,100.300003,90.530006,922.97998,77.809998,155.879074,476.230011,...,25.059999,147.669998,173.979996,36.939999,89.900002,404.350006,321.859985,85.050003,102.480003,119.290001
2025-11-17,267.459991,240.520004,341.25,51.48,98.190002,88.720001,912.590027,78.410004,153.110001,475.980011,...,25.08,145.820007,166.75,35.799999,88.480003,408.920013,320.519989,83.419998,102.949997,117.68
2025-11-18,267.440002,230.289993,340.660004,51.639999,98.32,89.68,895.080017,77.940002,153.619995,473.850006,...,25.450001,146.990005,165.059998,36.040001,88.529999,401.25,313.579987,83.860001,101.389999,119.029999
2025-11-19,268.559998,223.550003,344.640015,52.02,99.830002,87.980003,890.599976,76.550003,151.699997,474.769989,...,24.879999,146.990005,166.110001,35.860001,86.080002,403.98999,309.089996,84.160004,100.610001,117.349998


In [20]:
corr_data_returns = corr_data.pct_change().dropna()
corr_matrix = corr_data_returns.corr()

Generate Spreads

In [21]:
def merge_df(stock1_norm_ls, stock2_norm_ls, spread_ls, zscore_ls):
    stock1_norm_df = pd.DataFrame(stock1_norm_ls).T
    stock2_norm_df = pd.DataFrame(stock2_norm_ls).T
    spread_df = pd.DataFrame(spread_ls).T
    spread_df.rename(columns={0:'Spread'}, inplace=True)
    zscore_df = pd.DataFrame(zscore_ls).T
    zscore_df.rename(columns={0:'Z_Score'}, inplace=True)
    df = pd.merge(stock1_norm_df, stock2_norm_df, right_on='Date', left_on='Date')
    df = pd.merge(df, spread_df, right_on='Date', left_on='Date')
    df = pd.merge(df, zscore_df, right_on='Date', left_on='Date')
    return df

In [22]:
def spread_zscore_data(i):
    stock1_norm_ls = []
    stock2_norm_ls = []
    spread_ls = []
    zscore_ls = []

    #load current stock prices
    stock1 = corr_data[i[0]]
    stock2 = corr_data[i[1]]

    #Normalized stock prices
    norm_s1 = stock1 / stock1.iloc[0] * 100
    norm_s2 = stock2 / stock2.iloc[0] * 100

    #determine the spread
    spread = norm_s1-norm_s2

    #Generate z-score
    z_score = (spread - spread.mean()) / spread.std()

    
    stock1_norm_ls.append(norm_s1)
    stock2_norm_ls.append(norm_s2)
    spread_ls.append(spread)
    zscore_ls.append(z_score)

    pairs_data_df = merge_df(stock1_norm_ls, stock2_norm_ls, spread_ls, zscore_ls)

    return pairs_data_df

In [23]:
current_signals = []

for i in pairs_to_plot:
    #isolate stock pairs
    stock1 = i[0]
    stock2 = i[1]
    #isolate spread and zscore for charts
    pairs_data_df = spread_zscore_data(i)
    pairs_data_df.to_parquet(f'{stock1}_{stock2}_pairs_data.parquet')

    #correlation scoring and signals
    current_corr = corr_matrix.loc[stock1, stock2]

    signals_raw = []
    current_pair = pairs_data_df.tail(1)
    signals_raw.append(current_pair)

    current_z = current_pair['Z_Score'][0]

    score, pvalue, _ = coint(corr_data[stock1], corr_data[stock2])

    # Determine signal
    if current_z > 2:
        signal = f"SELL {stock1} / BUY {stock2}"
        signal_status = "DIVERGED"
    elif current_z < -2:
        signal = f"BUY {stock1} / SELL {stock2}"
        signal_status = "DIVERGED"
    else:
        signal = "NO SIGNAL"
        signal_status = "IN_RANGE"
    
    # Determine confidence tier
    if score < 0.05:
        confidence = "high"
    elif score < 0.10:
        confidence = "moderate"
    else: 
        confidence = "low"
    
    # Build signal entry
    current_signals.append({
        'pair_id': f"{stock1}_{stock2}",
        'stock1': stock1,
        'stock2': stock2,
        'current_z_score': float(current_z),
        'signal': signal,
        'signal_status': signal_status,
        'correlation': float(current_corr),
        'coint_pvalue': pvalue,
        'coint_score': score,
        'confidence': confidence,
        'days_in_signal': 0,  # You can calculate this if tracking
        'last_updated': dt.datetime.now()
    })

    

Generate Signals

In [24]:
current_signals_df = pd.DataFrame(current_signals)

In [25]:
current_signals_df.to_parquet('current_signals.parquet')