In [2]:
import yfinance as yf
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
import warnings

# Suppress warnings for clean output
warnings.filterwarnings('ignore')

# ------------------------
# List of correlated pairs (manually input from correlation results)
pairs = [
    ('XLB', 'XLI'),
    ('XLI', 'XLF'),
    ('XLK', 'XLY'),
    ('XLY', 'XLI'),
    ('XLB', 'XLF'),
    ('XLY', 'XLF'),
    ('XLY', 'XLB'),
    ('XLI', 'XLK'),
    ('XLV', 'XLK')
]

# ------------------------
# Cointegration Test Loop

results = []

for etf1, etf2 in pairs:
    data = yf.download([etf1, etf2], start="2015-07-22", end="2020-07-22", auto_adjust=False)
    
    # Use Adjusted Close if available, otherwise Close
    if 'Adj Close' in data.columns:
        data = data['Adj Close'].dropna()
    else:
        data = data['Close'].dropna()
    
    # Proceed if both ETF columns are present in data
    if len(data) > 0 and etf1 in data.columns and etf2 in data.columns:
        score, pvalue, _ = coint(data[etf1], data[etf2])
        results.append({
            'ETF': etf1,
            'Pair': etf2,
            'Cointegration p-value': round(pvalue, 4)
        })

# ------------------------
# Results DataFrame

results_df = pd.DataFrame(results).sort_values('Cointegration p-value').reset_index(drop=True)

print("Cointegration Test Results:")
print(results_df)

# Optional — Save Results
# results_df.to_csv('cointegration_test_results.csv', index=False)


[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed


Cointegration Test Results:
   ETF Pair  Cointegration p-value
0  XLV  XLK                 0.0069
1  XLI  XLF                 0.0160
2  XLB  XLI                 0.2573
3  XLK  XLY                 0.7830
4  XLI  XLK                 0.8027
5  XLB  XLF                 0.8136
6  XLY  XLB                 0.9248
7  XLY  XLI                 0.9932
8  XLY  XLF                 1.0000
