![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)
<hr>

In [None]:
# Import required libraries
from datetime import datetime
from AlgorithmImports import *

from statsmodels.tsa.stattools import coint, adfuller
from scipy.stats import linregress
import pandas as pd
import matplotlib.pyplot as plt

# Configuration
START_DATE = datetime(2000, 1, 1)
END_DATE = datetime(2025, 1, 1)
TICKERS = [
    'SPY', 'SPXS', 'SPXL', 'IVV', 'VOO', 'VIX', # SPY
    'VXX', 'UVXY', 'TVIX', # Volatility Index
    'QQQ', 'TQQQ', 'SQQQ',  # NASDAQ
    'IWM', 'URTY', 'SRTY', # Russell 2000
    'EEM', 'EDZ', 'VWO', # Emerging Markets
    'DIA', 'TLT', 'UPRO', # Dow Jones ?
    'GLD', 'GDX', # Gold
    'XME', 'COPX', 'JJC', 'SIL', 'SLV', # Metals
    'CORN', 'DE', 'SOYB', 'WEAT', # Agriculture
    'ETC', 'ETH', # Ethereum
    'XLF', 'KBE', # Financials
    'KO', 'PEP', # Beverages
    'MCD', 'WEN', 'YUM', # Fast Food
    'XOM', 'USO', 'XLE', 'OIH', # Oil
    'EWA', 'EWC', # Canada / Australia
    'FXE', 'FXB', # Euro / Pound
    'FXI', 'ASHR', 'BABA', # China
    'TSM', 'AMD', 'ASXLS', # Semiconductors
    'TSLA', 'F', 'GM', 'RIVN', # Auto
    'EWG', 'DAX', 'BMW', 'VOW3', # Germany
    'UUP', 'EUO', # USD / Euro
]
COINTEGRATION_PVALUE_THRESHOLD = 0.05
MIN_HISTORY_DAYS = 360

qb = QuantBook()

def calculate_hedge_ratio(y, x):
    """Calculate hedge ratio using linear regression"""
    slope, intercept, _, _, _ = linregress(x, y)
    return slope

def test_cointegration(series1, series2):
    """Test cointegration between two price series"""
    hedge_ratio = calculate_hedge_ratio(series1, series2)
    spread = series1 - (hedge_ratio * series2)
    _, pvalue, _ = coint(series1, series2)
    return pvalue, hedge_ratio, spread

# Get historical data
symbols = [qb.add_equity(ticker).Symbol for ticker in TICKERS]


In [11]:
history = qb.history(symbols, START_DATE, END_DATE, Resolution.DAILY)

history_unstacked = history.unstack(level=0)

closing_prices = history_unstacked['close'].dropna(axis=1)


# Filter symbols with sufficient history
valid_symbols = [s for s in TICKERS if s in closing_prices.columns]
history = closing_prices[valid_symbols]
print(history.columns)
print(history.head())
print(history.tail())



In [12]:
# Generate all possible pairs
pairs = []
cointegrated_pairs = []

for i in range(len(valid_symbols)):
    for j in range(i+1, len(valid_symbols)):
        sym1 = valid_symbols[i]
        sym2 = valid_symbols[j]
        pairs.append((sym1, sym2))

# Test cointegration for each pair
results = []
for sym1, sym2 in pairs:
    try:
        series1 = history[sym1]
        series2 = history[sym2]

        print(f"Testing cointegration between {sym1} and {sym2}")
        # Ensure minimum data length
        if len(series1) < MIN_HISTORY_DAYS or len(series2) < MIN_HISTORY_DAYS:
            continue

        pvalue, hedge_ratio, spread = test_cointegration(series1, series2)

        print(f"p-value: {pvalue}, Hedge Ratio: {hedge_ratio}")
        results.append({
            'Pair': f"{sym1}-{sym2}",
            'PValue': pvalue,
            'HedgeRatio': hedge_ratio,
            'ADF_Statistic': adfuller(spread)[0],
            'Spread_Mean': spread.mean(),
            'Spread_Std': spread.std()
        })

        if pvalue < COINTEGRATION_PVALUE_THRESHOLD:
            cointegrated_pairs.append((sym1, sym2))

    except Exception as e:
        print(f"Error processing {sym1}-{sym2}: {str(e)}")
        continue

# Create results DataFrame
results_df = pd.DataFrame(results).sort_values('PValue')

# Display significant pairs
print(f"Found {len(cointegrated_pairs)} cointegrated pairs (p < {COINTEGRATION_PVALUE_THRESHOLD}):")
display(results_df[results_df.PValue < COINTEGRATION_PVALUE_THRESHOLD ])


In [None]:
# Visualize All pairs
for sym1, sym2 in cointegrated_pairs:
    spread = history[sym1] - (results_df.loc[results_df.Pair == f"{sym1}-{sym2}"].iloc[0]['HedgeRatio'] * history[sym2])

    plt.figure(figsize=(15, 7))
    plt.title(f"{sym1}-{sym2} Spread (Cointegrated) P: {results_df.loc[results_df.Pair == f'{sym1}-{sym2}'].iloc[0]['PValue']}")
    plt.plot(spread, label='Spread')
    plt.axhline(spread.mean(), color='black', linestyle='--', label='Mean')
    plt.axhline(spread.mean() + spread.std(), color='red', linestyle='--', label='±1 Std')
    plt.axhline(spread.mean() - spread.std(), color='red', linestyle='--')
    plt.legend()
    plt.show()