In [None]:
import quandl
import pandas as pd
from matplotlib import pyplot as plt
import requests
import statsmodels.tsa.stattools as ts 
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import scipy.stats as st
from numpy_ext import rolling_apply # multipler parameters
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [None]:
symbols = pd.read_csv("crypto_ohlc/symbols.csv").symbol.tolist()
df = ((pd.concat([pd.read_csv(f"crypto_ohlc/{symbol}.csv", index_col='date', parse_dates=True)
        ['close'].rename(symbol)for symbol in symbols],axis=1,sort=True)))
df = df.loc[:,~df.columns.duplicated()]
df.head()

In [None]:
norm_prices = df.divide(df.iloc[0])

plt.figure(figsize = (15, 10))
plt.plot(norm_prices)
plt.xlabel('days')
plt.title('Performance of cryptocurrencies')
plt.legend(symbols)
plt.show()

result_list = []
for a1 in df.columns:
    for a2 in df.columns:
        if a1 != a2:
            test_result = ts.coint(df[a1], df[a2])
            if test_result[1] < 0.05:
                print(a1 + ' and ' + a2 + ': p-value = ' + str(test_result[1]))
                result_list.append((a1, a2, test_result[1]))

In [None]:
import statsmodels.formula.api as sm
import statsmodels.tsa.stattools as ts

# def regression(df):
#     """
#     beta
#     """
    
#     regress = sm.ols(formula="{} ~ {}".format(df.columns[0], df.columns[1]), data = df).fit()
#     hedgeRatio = regress.params[1]
#     return hedgeRatio

# def yport(df):
#     """
#     y-hit
#     """
#     yport= df.iloc[:, 0] - df['hedgeRatio'] * df.iloc[:, 0]
#     return (ratio.iloc[-1] - ratio.mean()) / ratio.std()

def z_score(yport):
    zscore = (yport[-1]  - yport.mean()) / yport.std()
    
    return zscore

def upToOut(aPrice, bPrice, aPos, bPos):
    """
    statusList = (1,0)
    long A short B
    """
    balanceDict['A']['available'] += aPrice * balanceDict['A']['position']
    balanceDict['A']['position'] -= balanceDict['A']['position']
    balanceDict['B']['available'] += bPrice * abs(balanceDict['B']['position'])
    balanceDict['B']['position'] -= balanceDict['B']['position']  
    
def upToDown(aPrice, bPrice, aPos, bPos):
    """
    statusList = (1,-1)
    """
    balanceDict['A']['available'] += aPrice * balanceDict['A']['position']
    balanceDict['A']['position'] -= balanceDict['A']['position']
    balanceDict['B']['available'] += bPrice * balanceDict['B']['position']
    balanceDict['B']['position'] -= balanceDict['B']['position']   

    balanceDict['A']['available'] -=  aPrice * abs(aPos)
    balanceDict['A']['position'] += aPos
    balanceDict['B']['available'] -= bPrice * abs(bPos)
    balanceDict['B']['position'] += -(bPos * aPos)
    
def outToUp(aPrice, bPrice, aPos, bPos):
    """
    statusList = (0,1)
    """
    balanceDict['A']['available'] -= aPrice * abs(aPos)
    balanceDict['A']['position'] += aPos
    balanceDict['B']['available'] -= bPrice * abs(bPos)
    balanceDict['B']['position'] += -(bPos * aPos)
    
def outToDown(aPrice, bPrice, aPos, bPos):
    """
    statusList = (0,-1)
    """
    balanceDict['A']['available'] -=  aPrice * abs(aPos)
    balanceDict['A']['position'] += aPos
    balanceDict['B']['available'] -= bPrice * abs(bPos)
    balanceDict['B']['position'] += -(bPos * aPos)
    
def downToOut(aPrice, bPrice, aPos, bPos):
    """
    statusList = (-1,0)
    """
    balanceDict['A']['available'] += aPrice * abs(balanceDict['A']['position'])
    balanceDict['A']['position'] -= balanceDict['A']['position']
    balanceDict['B']['available'] += aPrice * balanceDict['B']['position']
    balanceDict['B']['position'] -= balanceDict['B']['position']  
    
def downToUp(aPrice, bPrice, aPos, bPos):
    """
    statusList = (-1,1)
    """
    balanceDict['A']['available'] += aPrice * abs(balanceDict['A']['position'])
    balanceDict['A']['position'] -= balanceDict['A']['position']
    balanceDict['B']['available'] += aPrice * balanceDict['B']['position']
    balanceDict['B']['position'] -= balanceDict['B']['position']    
    
    balanceDict['A']['available'] -= aPrice * abs(aPos)
    balanceDict['A']['position'] += aPos
    balanceDict['B']['available'] -= bPrice * abs(bPos)
    balanceDict['B']['position'] += -(bPos * aPos)


strategy = {(1,0): upToOut, 
            (1,-1):upToDown,
            (0,1): outToUp,
            (0,-1):outToDown,
            (-1,0):downToOut,
            (-1,1):downToUp}    

In [None]:
# import statsmodels.formula.api as sm
# import statsmodels.tsa.stattools as ts

# for i in result_list:
A_symbol, B_symbol = 'BTCUSDT', "ETHUSDT"

# mergin = 1000
pastStatus = 0
aPastOrderPrice = None
bPastOrderPrice = None
lookback = 20

balanceDict = {'A':{'available': 1000000, 'position':0}, 'B':{'available': 1000000, 'position':0}}
entryZscore = 1 
exitZscore = 0
miniSpread = 0.01
df1 = df[[A_symbol, B_symbol]]

hedgeRatio=np.full(df1.shape[0], 0.0)

for t in np.arange(lookback, len(hedgeRatio)):
    regress_results=sm.ols(formula="{} ~ {}".format(A_symbol, B_symbol), data=df1[(t-lookback):t]).fit() # Note this can deal with NaN in top row
    hedgeRatio[t-1]=regress_results.params[1] ## beta1

df1['hedgeRatio'] = hedgeRatio 
df1['yport'] = df1[A_symbol] - df1['hedgeRatio'] * df1[B_symbol]
df1['zscore'] = rolling_apply(z_score, 20, df1['yport'])

# df1['yport'].plot()

history = pd.DataFrame()
statusList = []
pl = []
date, A_available, A_position, B_available, B_position, init, Balance , A_price, B_price= [], [], [], [], [], [], [], [], []
for index, row in df1.iterrows():
#     currStatus = 0 if (pastStatus == 1 and row['zscore'] > -entryZscore) or (pastStatus == -1 and row['zscore'] < exitZscore) else 1 if row['zscore'] < -entryZscore else -1 if (pastStatus == -1 and row['zscore'] > exitZscore or row['zscore'] > entryZscore) else 0
    currStatus = 0 if (pastStatus == 1 and row['zscore'] > -entryZscore) or (pastStatus == -1 and row['zscore'] < exitZscore) else 1 if row['zscore'] < -entryZscore else -1 if (pastStatus == -1 and row['zscore'] > exitZscore or row['zscore'] > entryZscore) else 0
    con = (pastStatus, currStatus)
    if pastStatus != currStatus:
        strategy[con](row[A_symbol], row[B_symbol], currStatus, row['hedgeRatio'])
        aPastOrderPrice, bPastOrderPrice = row[A_symbol], row[B_symbol]
    pastStatus = currStatus
    pl.append(balanceDict['A']['available']+balanceDict['B']['available'])
    statusList.append(currStatus)
    
    date.append(index)
    A_price.append(row[A_symbol])
    B_price.append(row[B_symbol])
    A_available.append(balanceDict['A']['available'])
    A_position.append(balanceDict['A']['position'])
    B_available.append(balanceDict['B']['available'])
    B_position.append(balanceDict['B']['position'])
    init.append(2000000)
    Balance.append(balanceDict['A']['available'] + balanceDict['B']['available'])
    print('balanceDict:', balanceDict)
history['data'] = date
history['A_available'] = A_available
history['A_position'] = A_position
history['A_price'] = A_price
history['B_available'] = B_available
history['B_position'] = B_position
history['B_price'] = B_price
history['init'] = 2000000
history['balance'] = Balance

df1['statusList'] = statusList
df1['balance'] = pl
ret = df1['balance']/2000000
# if ret[-1] > 1:
print('[pair trade]{}-{} {}%'.format(A_symbol, B_symbol, ret[-1]*100))
#     ret.plot()