In [60]:
import pandas as pd
from datetime import timedelta
import numpy as np
from dateutil.relativedelta import *
from collections import defaultdict

### Import Data

In [4]:
# Get historical crypto market cap rank data
cryptoMarketCapRankDf = pd.read_csv('data\CryptoMarketCap.csv')
cryptoMarketCapRankDf['Date'] = pd.to_datetime(cryptoMarketCapRankDf['Date'])

In [5]:
# Get historical crypto price data
cryptoPriceDf = pd.read_csv('data\TradingViewCryptoPrice.csv', index_col=0)
cryptoPriceDf.index = pd.to_datetime(cryptoPriceDf.index)

### Formation Period

In [6]:
# Formation period paramenter
cutoffDate = '2020-01-01'
cutoffRank = 100

In [7]:
# Get formation period sample crypto list
marketCapCutoffDate = pd.to_datetime(cutoffDate) - timedelta(days=1)
sampleCrypto = cryptoMarketCapRankDf[(cryptoMarketCapRankDf['Date'] == marketCapCutoffDate) & (cryptoMarketCapRankDf['Rank'] <= cutoffRank)]
sampleCrypto = list(sampleCrypto['Symbol'])

In [8]:
# Fliter the crypto with formation period and availiablity of crpyto price
sampleCryptoPrice = cryptoPriceDf.loc[pd.to_datetime(cutoffDate) + relativedelta(months=-12): marketCapCutoffDate]
sampleCrypto = set(sampleCrypto).intersection([x[7:-3] for x in sampleCryptoPrice.columns])

# Data Cleaning
sampleCryptoPrice = sampleCryptoPrice[["CRYPTO:" + x + "USD" for x in sampleCrypto]]
sampleCryptoPrice = sampleCryptoPrice.ffill(axis=0)
sampleCryptoPrice = sampleCryptoPrice.dropna(axis=1)

print('Remaining number of crpyto: ', len(sampleCryptoPrice.columns))

Remaining number of crpyto:  46


#### Distance Method

In [9]:
# Normalized the log price
sampleCryptoLogPrice = np.log(sampleCryptoPrice)
sampleCryptoNormalizedLogPrice = (sampleCryptoLogPrice - sampleCryptoLogPrice.mean())/sampleCryptoLogPrice.std()

In [10]:
# SSD calculation for each pair
SSDResults = []
for i in range(len(sampleCryptoNormalizedLogPrice.columns)):
    for j in range(i+1, len(sampleCryptoNormalizedLogPrice.columns)):
        SSD = ((sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[i]] - sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[j]]) ** 2).sum()
        SSDResults.append([sampleCryptoNormalizedLogPrice.columns[i], sampleCryptoNormalizedLogPrice.columns[j], SSD])

In [25]:
# Print out the SSD results
SSDResults = pd.DataFrame(SSDResults, columns=['Crypto 1', 'Crypto 2', 'SSD'])
SSDResults = SSDResults.sort_values('SSD', ascending=True)
SSDResults = SSDResults.reindex()
SSDResults

Unnamed: 0,Crypto 1,Crypto 2,SSD
0,CRYPTO:LSKUSD,CRYPTO:BCDUSD,19.497151
1,CRYPTO:OMGUSD,CRYPTO:BCDUSD,20.357515
2,CRYPTO:LSKUSD,CRYPTO:ICXUSD,20.667822
3,CRYPTO:XRPUSD,CRYPTO:MIOTAUSD,23.009346
4,CRYPTO:LSKUSD,CRYPTO:OMGUSD,26.456084
...,...,...,...
1030,CRYPTO:TUSDUSD,CRYPTO:OKBUSD,1352.361155
1031,CRYPTO:BTCUSD,CRYPTO:TUSDUSD,1365.086738
1032,CRYPTO:LINKUSD,CRYPTO:TUSDUSD,1384.314325
1033,CRYPTO:WAVESUSD,CRYPTO:SNXUSD,1385.484417


### Trading Period

In [95]:
noTradingPairs = 50

In [101]:
DistanceMethodReturns = defaultdict(list)
# get trading crpyto price
tradingCryptoPrice = cryptoPriceDf.loc[pd.to_datetime(cutoffDate): pd.to_datetime(cutoffDate) + relativedelta(days=59)]
logTradingCryptoPrice = np.log(tradingCryptoPrice)

for i in range(noTradingPairs):
    
    # parameter
    crypto1 = SSDResults.loc[i, 'Crypto 1']
    crypto2 = SSDResults.loc[i, 'Crypto 2']
    crypto1Mean = sampleCryptoLogPrice.mean()[crypto1]
    crypto2Mean = sampleCryptoLogPrice.mean()[crypto2]
    crypto1SD = sampleCryptoLogPrice.std()[crypto1]
    crypto2SD = sampleCryptoLogPrice.std()[crypto2]

    # calculate spread
    normalizedCrypto1LogPrice = (logTradingCryptoPrice[crypto1] - crypto1Mean)/crypto1SD
    normalizedCrypto2LogPrice = (logTradingCryptoPrice[crypto2] - crypto2Mean)/crypto2SD
    Spread = normalizedCrypto1LogPrice - normalizedCrypto2LogPrice

    # check if there is any trading opportunity
    if len(Spread[(Spread >= 2) | (Spread <= -2)]) > 0:

        Spread.name = 'spread'
        Spread = Spread.to_frame()
        Spread['position'] = 0

        for date in Spread.index:
            # When the trading date is not the last day
            if date != (pd.to_datetime(cutoffDate) + relativedelta(days=59)):
                # short crypto 1 and long crypto 2 if spread >= 2
                if Spread.loc[date, 'spread'] >= 2:
                    Spread.loc[date + relativedelta(days=1), 'position'] = -1
                # long crypto 1 and short crypto 2 if spread <= -2
                elif Spread.loc[date, 'spread'] <= -2:
                    Spread.loc[date + relativedelta(days=1), 'position'] = 1
                # continuous the position if the spread do not cross zero
                elif Spread.loc[date, 'position'] == -1 and Spread.loc[date, 'spread'] > 0:
                    Spread.loc[date + relativedelta(days=1), 'position'] = -1
                # continuous the position if the spread do not cross zero
                elif Spread.loc[date, 'position'] == 1 and Spread.loc[date, 'spread'] < 0:
                    Spread.loc[date + relativedelta(days=1), 'position'] = 1
        
        # Spread[crypto1] = tradingCryptoPrice[crypto1]
        # Spread[crypto2] = tradingCryptoPrice[crypto2]
        Spread[crypto1 + ' Log Return'] = (logTradingCryptoPrice[crypto1] - logTradingCryptoPrice[crypto1].shift(1)) * Spread['position']
        Spread[crypto2 + ' Log Return'] = (logTradingCryptoPrice[crypto2] - logTradingCryptoPrice[crypto2].shift(1)) * -Spread['position']
        Spread[crypto1 + ' cum Log Return'] = Spread[crypto1 + ' Log Return'].cumsum()
        Spread[crypto2 + ' cum Log Return'] = Spread[crypto2 + ' Log Return'].cumsum()
        
        # Add Transaction records [Crypto1, Crypto2, transaction date, long/short]
        # return calculation  np.exp(Spread[crypto2 + ' cum Log Return'][-1])-1
        DistanceMethodReturns.append()

