In [14]:
import yfinance as yf
import pandas as pd
from statsmodels.tsa.stattools import coint
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm

tickers = ['XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLV', 'XLY', 'XLU']
prices = yf.download(tickers, start = '2015-07-22', end = '2020-07-22')['Close']

returns = prices.pct_change().dropna()

correlation_matrix = returns.corr()
correlation_matrix = correlation_matrix.rename_axis(None).rename_axis(None, axis = 1)
correlation_matrix = correlation_matrix.stack().reset_index()
correlation_matrix.columns = ['ETF', 'Pair', 'Correlation']

correlation_matrix =  correlation_matrix[correlation_matrix['ETF'] != correlation_matrix['Pair']]
correlation_matrix = correlation_matrix.sort_values(by = 'Correlation', ascending = False)
correlation_matrix = correlation_matrix.drop_duplicates('Correlation')

potential_pairs = correlation_matrix[correlation_matrix['Correlation'] > 0.8]

potential_pairs

  prices = yf.download(tickers, start = '2015-07-22', end = '2020-07-22')['Close']
[*********************100%***********************]  9 of 9 completed


Unnamed: 0,ETF,Pair,Correlation
3,XLB,XLI,0.897353
29,XLI,XLF,0.895759
44,XLK,XLY,0.892242
75,XLY,XLI,0.863158
2,XLB,XLF,0.845627
74,XLY,XLF,0.828372
72,XLY,XLB,0.820797
31,XLI,XLK,0.816501
67,XLV,XLK,0.805193


In [15]:
tickers = prices.columns
cointegrated_pairs = []

candidate_pairs = []

for i in range(len(tickers)):
    for j in range(i + 1, len(tickers)):
        candidate_pairs.append([tickers[i], tickers[j]])

for i, j in candidate_pairs:
        etf1 = i
        etf2 = j

        score, pvalue, _ = coint(prices[etf1], prices[etf2])

        if pvalue < 0.05:
                cointegrated_pairs.append((etf1, etf2, pvalue))
                
cointegrated_pairs_df = pd.DataFrame(cointegrated_pairs, columns = ['ETF', 'Pair', 'P-Value'])
cointegrated_pairs_df = cointegrated_pairs_df.sort_values('P-Value').reset_index(drop=True)

print("Cointegration Test Results:")
cointegrated_pairs_df

Cointegration Test Results:


Unnamed: 0,ETF,Pair,P-Value
0,XLP,XLU,0.006064
1,XLK,XLV,0.013148
2,XLF,XLI,0.019971


In [16]:
results = []

def zscore_calc(series):
    return (series - series.mean()) / series.std()

def adf_test(series):
    test_res = adfuller(series)
    return {'stat': test_res[0], 'p-value': test_res[1]}

def hedge_ratio_calc(series1, series2):
    x = sm.add_constant(series2)
    model = sm.OLS(series2, x).fit()

    return model.params[1]

for etf1, etf2 in candidate_pairs:
    series1 = prices[etf1]
    series2 = prices[etf2]

    hedge_ratio = hedge_ratio_calc(series1, series2)

    spread = series1 - (hedge_ratio * series2)

    zscore_spread = zscore_calc(spread)

    adf_res = adf_test(spread)

    results.append(
        {'ETF1': etf1,
         'ETF2': etf2,
         'adf_value': adf_res['stat'],
         'p-value': adf_res['p-value'],
         'mean': zscore_spread.mean(),
         'std': zscore_spread.std()}
    )

results = pd.DataFrame(results)
results = results.sort_values('p-value', ascending = True)
results


  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]
  return model.params[1]


Unnamed: 0,ETF1,ETF2,adf_value,p-value,mean,std
30,XLP,XLU,-2.706435,0.072931,1.805986e-16,1.0
5,XLB,XLU,-2.37032,0.150289,1.35449e-16,1.0
4,XLB,XLP,-2.24942,0.1888,-2.2574830000000002e-17,1.0
23,XLI,XLU,-2.2258,0.197015,0.0,1.0
15,XLF,XLI,-2.076223,0.254182,-5.869455e-16,1.0
2,XLB,XLI,-2.065635,0.258556,1.805986e-16,1.0
22,XLI,XLP,-1.935541,0.315531,-2.257483e-16,1.0
29,XLK,XLY,-1.876377,0.343254,1.670537e-15,1.0
35,XLV,XLY,-1.873351,0.344699,4.514966e-16,1.0
1,XLB,XLF,-1.463256,0.551604,1.173891e-15,1.0
