# Cointegration. Simple Strategies

In [1]:
import numpy as np
import pandas as pd

import scipy as sp
import scipy.stats as sps
import scipy.optimize as spop

import matplotlib.pyplot as plt

In [33]:
bk = pd.read_csv('../data/BK_2002-01-01_2022-12-31_1d.csv', index_col=0, parse_dates=True)
bac = pd.read_csv('../data/BAC_2002-01-01_2022-12-31_1d.csv', index_col=0, parse_dates=True)

In [34]:
bk = bk['Close'] # coint[0]
bac = bac['Close'] # coint[1]

In [35]:
stock = []
naive1 = []
naive2 = []
opt1 = []
opt2 = []

In [36]:
df = pd.DataFrame({'bk': bk, 'bac': bac})
df['bk_returns'] = np.log(df['bk'].div(df['bk'].shift(1))) # coint[2]
df['bac_returns'] = np.log(df['bac'].div(df['bac'].shift(1))) # coint[3]
df = df.iloc[1:]
df

Unnamed: 0_level_0,bk,bac,bk_returns,bac_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2002-01-02,26.646786,18.864384,0.004647,0.000159
2002-01-03,27.238337,18.843409,0.021957,-0.001113
2002-01-04,27.699894,19.137033,0.016803,0.015462
2002-01-07,28.571007,18.909319,0.030964,-0.011971
2002-01-08,28.167946,18.606695,-0.014208,-0.016133
...,...,...,...,...
2022-12-23,44.312298,32.260765,0.003815,0.002467
2022-12-27,44.481010,32.320374,0.003800,0.001846
2022-12-28,44.292450,32.558830,-0.004248,0.007351
2022-12-29,45.314655,32.926445,0.022816,0.011228


In [37]:
slope, intercept, rvalue, pvalue, stderr = sps.linregress(df['bac'], df['bk'])
alpha, beta = intercept, slope

In [38]:
df['for_naive'] = df['bk'] - (df['bac'] * beta + alpha) # coint[4]

In [39]:
df

Unnamed: 0_level_0,bk,bac,bk_returns,bac_returns,for_naive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-01-02,26.646786,18.864384,0.004647,0.000159,-1.590183
2002-01-03,27.238337,18.843409,0.021957,-0.001113,-0.987491
2002-01-04,27.699894,19.137033,0.016803,0.015462,-0.681895
2002-01-07,28.571007,18.909319,0.030964,-0.011971,0.310170
2002-01-08,28.167946,18.606695,-0.014208,-0.016133,0.067850
...,...,...,...,...,...
2022-12-23,44.312298,32.260765,0.003815,0.002467,8.959743
2022-12-27,44.481010,32.320374,0.003800,0.001846,9.096795
2022-12-28,44.292450,32.558830,-0.004248,0.007351,8.781576
2022-12-29,45.314655,32.926445,0.022816,0.011228,9.608520


In [40]:
slope, intercept, rvalue, pvalue, stderr = sps.linregress(np.array(df['for_naive'][:-1]),
                                                          np.array(df['for_naive'][1:]) - np.array(df['for_naive'][:-1]))

In [41]:
df['without_shorts'] = df['bk_returns']*(df['for_naive']<0) + df['bac_returns']*(df['for_naive']>0) # coint[5]
df['with_shorts'] = -df['bk_returns']*np.sign(df['for_naive']) + df['bac_returns']*np.sign(df['for_naive']) # coint[6]

In [42]:
df

Unnamed: 0_level_0,bk,bac,bk_returns,bac_returns,for_naive,without_shorts,with_shorts
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2002-01-02,26.646786,18.864384,0.004647,0.000159,-1.590183,0.004647,0.004488
2002-01-03,27.238337,18.843409,0.021957,-0.001113,-0.987491,0.021957,0.023069
2002-01-04,27.699894,19.137033,0.016803,0.015462,-0.681895,0.016803,0.001341
2002-01-07,28.571007,18.909319,0.030964,-0.011971,0.310170,-0.011971,-0.042934
2002-01-08,28.167946,18.606695,-0.014208,-0.016133,0.067850,-0.016133,-0.001926
...,...,...,...,...,...,...,...
2022-12-23,44.312298,32.260765,0.003815,0.002467,8.959743,0.002467,-0.001348
2022-12-27,44.481010,32.320374,0.003800,0.001846,9.096795,0.001846,-0.001954
2022-12-28,44.292450,32.558830,-0.004248,0.007351,8.781576,0.007351,0.011599
2022-12-29,45.314655,32.926445,0.022816,0.011228,9.608520,0.011228,-0.011589
