In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

data_dir = 'data'
exchange = 'test'

df_1m = pd.read_hdf(os.path.join(data_dir, exchange, 'all-1min.h5'))
first_idx = df_1m['close_test_EOS_USDT'].first_valid_index()
df_1m = df_1m.loc[first_idx:]
df_1m = df_1m.filter(regex=r'(close|volume).*')
df_1m = df_1m.resample("1Min").fillna(method='ffill')

df_1m.resample("15Min").first().plot(subplots=True, figsize=(10, 10))

In [None]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import pickle

def orthogonal_projection(a, b):
    b = np.array(b)
    dims = len(np.shape(a))
    assert(dims == 1 or dims == 2)
    s = a @ b / np.sum(b * b)
    if dims == 2:
        result = b * s[:,np.newaxis]
        if isinstance(a, pd.DataFrame):
            return pd.DataFrame(result, index=a.index, columns=a.columns)
        return result
    result = s * b
    if isinstance(a, pd.Series):
        return pd.Series(result, index=a.index)
    return result

def hyperplane_projection(a, b):
    return a - orthogonal_projection(a, b)

m1 = df_1m.filter(regex=r'close.*')
m1 = m1.rename(columns=lambda x: x[-8:])
m1 = m1['20190101':]
m1_mean = m1.mean()
m1_norm = m1 / m1_mean - 1

c = coint_johansen(m1_norm, det_order=-1, k_ar_diff=1)

significant_results = (c.lr1 > c.cvt[:,1]) * (c.lr2 > c.cvm[:,1])
print(significant_results)
A = pd.DataFrame(c.evec[:, significant_results].T, columns=m1.columns)
residuals = [orthogonal_projection(m1_norm, x) for x in A.values]
# Add absolute price variance to residual covariance to reduce overconfidence. Would be nice to give this a more
# rigorous treatment
covs = [rs.cov() * m1_mean[None,:] * m1_mean[:,None] + np.diag(m1.var()/4) for rs in residuals]
print(covs)
x = A.sum()
cov_x = orthogonal_projection(m1_norm, x).cov() * m1_mean[None,:] * m1_mean[:,None] 
pickle.dump({
    'cointegrated_vectors': A,
    'mean_prices': m1_mean,
    'most_recent_data': m1.index[-1],
    'residual_covariances': covs,
    'combined_vector': x,
    'combined_covariance': cov_x
}, open('coint.p','wb'))

C = m1_norm @ x
C.plot()
C2 = m1_norm @ c.evec
C2.plot()

# C.plot(figsize=(16,10))
# C.plot(subplots=True, figsize=(16,16))
# print(x.lr1)
# print(x.cvt)
# print(x.evec)
# print(x.lr2)
# print(x.cvm)
# print(np.cov(x.r0t, rowvar=False))
# print(np.cov(x.rkt, rowvar=False))
# print(np.linalg.norm(x.r0t, axis=1))
# print(x.rkt.sum(axis=0))

In [None]:
m2 = m1.filter(regex=r'BTC_USDT|ETH_USDT').resample("15Min").first()
m2_norm = m2 - m2.mean()

c = coint_johansen(m2_norm, det_order=-1, k_ar_diff=1)
C = m2_norm @ c.evec
C.plot()
significant_results = (c.lr1 > c.cvt[:,1]) * (c.lr2 > c.cvm[:,1])
print(significant_results)
# C.plot(figsize=(16,10))
# C.plot(subplots=True, figsize=(16,16))
# print(x.lr1)
# print(x.cvt)
# print(x.lr2)
# print(x.cvm)
# print(x.r0t.sum(axis=0))
# print(x.rkt.sum(axis=0))
m2.plot.scatter(x='BTC_USDT', y='ETH_USDT')
m2_ = (hyperplane_projection(m2_norm, np.sum(c.evec, axis=0)) + 0) + m2.mean()
m2_.plot.scatter(x='BTC_USDT', y='ETH_USDT')