In [1]:
import pandas as pd
from datetime import timedelta
import numpy as np
from dateutil.relativedelta import *

### Import Data

In [2]:
# Get historical crypto market cap rank data
cryptoMarketCapRankDf = pd.read_csv('data\CryptoMarketCap.csv')
cryptoMarketCapRankDf['Date'] = pd.to_datetime(cryptoMarketCapRankDf['Date'])

In [3]:
# Get historical crypto price data
cryptoPriceDf = pd.read_csv('data\TradingViewCryptoPrice.csv', index_col=0)
cryptoPriceDf.index = pd.to_datetime(cryptoPriceDf.index)

### Formation Period

In [4]:
# Formation period paramenter
cutoffDate = '2020-01-01'
cutoffRank = 100

In [5]:
# Get formation period sample crypto list
marketCapCutoffDate = pd.to_datetime(cutoffDate) - timedelta(days=1)
sampleCrypto = cryptoMarketCapRankDf[(cryptoMarketCapRankDf['Date'] == marketCapCutoffDate) & (cryptoMarketCapRankDf['Rank'] <= cutoffRank)]
sampleCrypto = list(sampleCrypto['Symbol'])

In [6]:
# Fliter the crypto with formation period and availiablity of crpyto price
sampleCryptoPrice = cryptoPriceDf.loc[pd.to_datetime(cutoffDate) + relativedelta(months=-12): marketCapCutoffDate]
sampleCrypto = set(sampleCrypto).intersection([x[7:-3] for x in sampleCryptoPrice.columns])

# Data Cleaning
sampleCryptoPrice = sampleCryptoPrice[["CRYPTO:" + x + "USD" for x in sampleCrypto]]
sampleCryptoPrice = sampleCryptoPrice.ffill(axis=0)
sampleCryptoPrice = sampleCryptoPrice.dropna(axis=1)

print('Remaining number of crpyto: ', len(sampleCryptoPrice.columns))

Remaining number of crpyto:  46


#### Distance Method

In [7]:
# Normalized the log price
sampleCryptoLogPrice = np.log(sampleCryptoPrice)
sampleCryptoNormalizedLogPrice = (sampleCryptoLogPrice - sampleCryptoLogPrice.mean())/sampleCryptoLogPrice.std()

In [8]:
# SSD calculation for each pair
SSDResults = []
for i in range(len(sampleCryptoNormalizedLogPrice.columns)):
    for j in range(i+1, len(sampleCryptoNormalizedLogPrice.columns)):
        SSD = ((sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[i]] - sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[j]]) ** 2).sum()
        SSDResults.append([sampleCryptoNormalizedLogPrice.columns[i], sampleCryptoNormalizedLogPrice.columns[j], SSD])

In [9]:
# Print out the SSD results
SSDResults = pd.DataFrame(SSDResults, columns=['Crypto 1', 'Crypto 2', 'SSD'])
SSDResults = SSDResults.sort_values('SSD', ascending=True)
SSDResults

Unnamed: 0,Crypto 1,Crypto 2,SSD
607,CRYPTO:BCDUSD,CRYPTO:LSKUSD,19.497151
627,CRYPTO:BCDUSD,CRYPTO:OMGUSD,20.357515
152,CRYPTO:ICXUSD,CRYPTO:LSKUSD,20.667822
666,CRYPTO:XRPUSD,CRYPTO:MIOTAUSD,23.009346
823,CRYPTO:LSKUSD,CRYPTO:OMGUSD,26.456084
...,...,...,...
510,CRYPTO:TUSDUSD,CRYPTO:OKBUSD,1352.361155
56,CRYPTO:BTCUSD,CRYPTO:TUSDUSD,1365.086738
407,CRYPTO:LINKUSD,CRYPTO:TUSDUSD,1384.314325
464,CRYPTO:SNXUSD,CRYPTO:WAVESUSD,1385.484417


### Trading Period

In [None]:
spread