In [37]:
import pandas as pd
from datetime import timedelta
import numpy as np
from dateutil.relativedelta import *
from collections import defaultdict

### Import Data

In [38]:
# Get historical crypto market cap rank data
cryptoMarketCapRankDf = pd.read_csv('data\CryptoMarketCap.csv')
cryptoMarketCapRankDf['Date'] = pd.to_datetime(cryptoMarketCapRankDf['Date'])

In [39]:
# Get historical crypto price data
cryptoPriceDf = pd.read_csv('data\TradingViewCryptoPrice.csv', index_col=0)
cryptoPriceDf.index = pd.to_datetime(cryptoPriceDf.index)

### Formation Period

In [40]:
# Formation period paramenter
cutoffDate = '2020-03-01'
lookback =365
forward = 120
cutoffRank = 100

In [41]:
def marketCapCryptoSelection(cryptoMarketCapRankDf, cutoffDate, cutoffRank):
    # Get formation period sample crypto list
    marketCapCutoffDate = pd.to_datetime(cutoffDate) - timedelta(days=1)
    sampleCrypto = cryptoMarketCapRankDf[(cryptoMarketCapRankDf['Date'] == marketCapCutoffDate) & (cryptoMarketCapRankDf['Rank'] <= cutoffRank)]
    sampleCrypto = list(sampleCrypto['Symbol'])
    return sampleCrypto

In [42]:
def cryptoPriceCleaning(cryptoPriceDf, sampleCrypto, cutoffDate, lookback):
    # Fliter the crypto with formation period
    cutoffRowIdx = cryptoPriceDf.index.get_loc(cutoffDate)
    if cutoffRowIdx < lookback:
        # if there is not enough got the whole lookback period, just get all the availiable data
        sampleCryptoPrice = cryptoPriceDf.iloc[:cutoffRowIdx]
    else:
        sampleCryptoPrice = cryptoPriceDf.iloc[cutoffRowIdx-lookback:cutoffRowIdx]
    
    # Filter based on the availiablity of crpyto price
    sampleCrypto = set(sampleCrypto).intersection([x[7:-3] for x in sampleCryptoPrice.columns])

    # Data Cleaning
    sampleCryptoPrice = sampleCryptoPrice[["CRYPTO:" + x + "USD" for x in sampleCrypto]]
    sampleCryptoPrice = sampleCryptoPrice.ffill(axis=0)
    sampleCryptoPrice = sampleCryptoPrice.dropna(axis=1)

    print('Remaining number of crpyto: ', len(sampleCryptoPrice.columns))

    return sampleCryptoPrice

In [43]:
sampleCrypto = marketCapCryptoSelection(cryptoMarketCapRankDf, cutoffDate, cutoffRank)

In [44]:
sampleCryptoPrice = cryptoPriceCleaning(cryptoPriceDf, sampleCrypto, cutoffDate, lookback)

Remaining number of crpyto:  47


#### Distance Method

In [45]:
def distanceMethodSelection(sampleCryptoPrice):
    # Normalized the log price
    sampleCryptoLogPrice = np.log(sampleCryptoPrice)
    sampleCryptoNormalizedLogPrice = (sampleCryptoLogPrice - sampleCryptoLogPrice.mean())/sampleCryptoLogPrice.std()

    # SSD calculation for each pair
    SSDResults = []
    for i in range(len(sampleCryptoNormalizedLogPrice.columns)):
        for j in range(i+1, len(sampleCryptoNormalizedLogPrice.columns)):
            SSD = ((sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[i]] - sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[j]]) ** 2).sum()
            SSDResults.append([sampleCryptoNormalizedLogPrice.columns[i], sampleCryptoNormalizedLogPrice.columns[j], SSD])
    
    # Return the SSD results
    SSDResults = pd.DataFrame(SSDResults, columns=['Crypto 1', 'Crypto 2', 'SSD'])
    SSDResults = SSDResults.sort_values('SSD', ascending=True)
    SSDResults = SSDResults.reindex()
    
    return SSDResults

In [46]:
SSDResults = distanceMethodSelection(sampleCryptoPrice)

### Trading Period

In [47]:
noTradingPairs = 50
spreadThreshold = 2.5
closeThreshold = 0 # same sign as spreadThreshold

In [48]:
def distanceMethodTrading(cryptoPriceDf, sampleCryptoPrice, SSDResults, cutoffDate, forward, noTradingPairs, spreadThreshold, closeThreshold, constantModel=True):
    
    # initialize the records dataframe
    TransactionRecords = pd.DataFrame(columns=['Date', 'Crypto', 'Long/Short', 'Price', "Open/Close", "Transaction pair", "Round Trip No.", "Pair No."])
    SpreadRecords = pd.DataFrame()
    PairNo = 0

    # get trading crpyto price
    cutoffRowIdx = cryptoPriceDf.index.get_loc(cutoffDate)
    if cutoffRowIdx + forward > len(cryptoPriceDf):
        # if there is not enough got the whole forward period, just get all the availiable data
        tradingCryptoPrice = cryptoPriceDf.iloc[cutoffRowIdx:]
    else:
        tradingCryptoPrice = cryptoPriceDf.iloc[cutoffRowIdx:cutoffRowIdx+forward]

    logSampleCrpytoPrice = np.log(sampleCryptoPrice)
    logTradingCryptoPrice = np.log(tradingCryptoPrice)

    for i in range(noTradingPairs):
        ############ Trading Signal Calculation ############

        # Parameter
        crypto1 = SSDResults.loc[i, 'Crypto 1']
        crypto2 = SSDResults.loc[i, 'Crypto 2']

        # Calculate normalized spread
        if constantModel:
            # constant model parameter
            crypto1Mean = logSampleCrpytoPrice.mean()[crypto1]
            crypto2Mean = logSampleCrpytoPrice.mean()[crypto2]
            crypto1SD = logSampleCrpytoPrice.std()[crypto1]
            crypto2SD = logSampleCrpytoPrice.std()[crypto2]

            # calculate spread
            normalizedCrypto1LogPrice = (logTradingCryptoPrice[crypto1] - crypto1Mean)/crypto1SD
            normalizedCrypto2LogPrice = (logTradingCryptoPrice[crypto2] - crypto2Mean)/crypto2SD
            Spread = normalizedCrypto1LogPrice - normalizedCrypto2LogPrice

        else:
            # Expending window model
            logCrypto1Price = pd.concat([logSampleCrpytoPrice[crypto1],logTradingCryptoPrice[crypto1]], axis=0)
            logCrypto2Price = pd.concat([logSampleCrpytoPrice[crypto2],logTradingCryptoPrice[crypto2]], axis=0)
            crypto1Mean = logCrypto1Price.expanding(min_periods=1).mean() # or df.rolling(window=len(df), min_periods=1).mean()
            crypto2Mean = logCrypto2Price.expanding(min_periods=1).mean()
            crypto1SD = logCrypto1Price.expanding(min_periods=1).std()
            crypto2SD = logCrypto2Price.expanding(min_periods=1).std()

            # calculate spread
            normalizedCrypto1LogPrice = (logCrypto1Price- crypto1Mean)/crypto1SD
            normalizedCrypto2LogPrice = (logCrypto2Price - crypto2Mean)/crypto2SD
            Spread = normalizedCrypto1LogPrice - normalizedCrypto2LogPrice
            Spread = Spread.loc[cutoffDate:]

        ############ Trading Execution ############
        # check if there is any trading opportunity
        if len(Spread[(Spread >= spreadThreshold) | (Spread <= -spreadThreshold)]) > 0:
            
            # save the spread records
            Spread.name = crypto1 + " " + crypto2
            SpreadRecords = SpreadRecords.merge(Spread, how='outer', left_index=True, right_index=True)

            # initialize before the transaction
            PairNo += 1
            Spread.name = 'spread'
            Spread = Spread.to_frame()
            Opened = False
            long = None
            # Spread['position'] = 0
            RoundTripNo = 1

            for date in Spread.index:

                # When the trading date is not the last day
                if date != Spread.index[-1]:
                    # If there is an open position before that date
                    if Opened:
                        # Close postion if the spread cross closeThreshold
                        if not long and Spread.loc[date, 'spread'] <= closeThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                            RoundTripNo += 1
                            long = None
                            Opened = False
                            
                        elif long and Spread.loc[date, 'spread'] >= -closeThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                            RoundTripNo += 1
                            long = None
                            Opened = False
                            
                    
                    # Check again if there is any position, if no and fulfil the criteria, then open position 
                    if not Opened:
                        # short crypto 1 and long crypto 2 if spread >= spreadThreshold
                        if Spread.loc[date, 'spread'] >= spreadThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Open", crypto2, RoundTripNo, PairNo]
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo]
                            long = False
                            Opened = True
                            
                        # long crypto 1 and short crypto 2 if spread <= -spreadThreshold
                        elif Spread.loc[date, 'spread'] <= -spreadThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Open", crypto2, RoundTripNo, PairNo]
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo]
                            long = True
                            Opened = True
                            

                # For last day closing position
                else:
                    if Opened:
                        if not long:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                            long = None
                            Opened = False
                            
                        else:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                            long = None
                            Opened = False
                            
    return (TransactionRecords, SpreadRecords)

In [49]:
TransactionRecords, SpreadRecords = distanceMethodTrading(cryptoPriceDf, sampleCryptoPrice, SSDResults, cutoffDate, forward, noTradingPairs, spreadThreshold, closeThreshold, False)

### Transform transaction records to Result

In [50]:
SpreadRecords

Unnamed: 0_level_0,CRYPTO:DCRUSD CRYPTO:TUSDUSD,CRYPTO:DCRUSD CRYPTO:WAVESUSD,CRYPTO:DCRUSD CRYPTO:ENJUSD,CRYPTO:DCRUSD CRYPTO:BSVUSD,CRYPTO:DCRUSD CRYPTO:LINKUSD,CRYPTO:DCRUSD CRYPTO:XTZUSD,CRYPTO:DCRUSD CRYPTO:LSKUSD,CRYPTO:DCRUSD CRYPTO:SCUSD,CRYPTO:DCRUSD CRYPTO:ETCUSD,CRYPTO:DCRUSD CRYPTO:USDTUSD,...,CRYPTO:DCRUSD CRYPTO:KCSUSD,CRYPTO:DCRUSD CRYPTO:STEEMUSD,CRYPTO:DCRUSD CRYPTO:ZENUSD,CRYPTO:DCRUSD CRYPTO:KNCUSD,CRYPTO:DCRUSD CRYPTO:ICXUSD,CRYPTO:DCRUSD CRYPTO:THETAUSD,CRYPTO:DCRUSD CRYPTO:OKBUSD,CRYPTO:MKRUSD CRYPTO:TUSDUSD,CRYPTO:MKRUSD CRYPTO:ETHUSD,CRYPTO:MKRUSD CRYPTO:WAVESUSD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-01,-0.267716,-0.745156,-1.229148,-2.402699,-2.407882,-3.169761,-1.461160,-1.219015,-1.960161,-0.989437,...,-1.062366,-0.646573,-2.082909,-5.573526,-1.594954,-1.546998,-3.289701,0.639128,-0.688229,0.161687
2020-03-02,-0.235560,-0.562722,-1.109797,-2.259345,-2.233407,-3.015566,-1.225077,-0.941094,-1.946560,-1.113921,...,-0.918164,-0.355066,-1.861993,-4.764283,-1.530222,-1.793253,-3.093601,0.591604,-0.753823,0.264443
2020-03-03,0.556021,-0.490286,-1.040257,-2.175742,-2.306447,-2.925349,-1.114492,-0.868531,-1.920296,-1.054791,...,-0.956454,-0.344927,-1.746592,-4.442231,-1.648709,-1.886294,-2.913755,0.954072,-0.977725,-0.092235
2020-03-04,0.033966,-0.386717,-0.835573,-1.939324,-2.156332,-3.053350,-0.925323,-0.613827,-1.559879,-0.869352,...,-0.755190,-0.251605,-1.664142,-4.305522,-1.392672,-1.740932,-2.701709,0.364303,-0.900850,-0.056380
2020-03-05,-0.044977,-0.599638,-0.949380,-1.950993,-2.169928,-3.012136,-1.010098,-0.638905,-1.630959,-1.091711,...,-0.986585,-0.309360,-1.673511,-4.296722,-1.854953,-2.144229,-2.707018,0.333703,-0.927235,-0.220959
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-24,0.184618,-0.597330,-2.070417,-1.243095,-2.051175,-2.119669,-0.812357,-1.895929,-0.856656,-0.507750,...,0.388310,-0.662393,-0.705910,-3.412668,-1.438135,-3.098449,-2.057699,0.709916,-1.077418,-0.072033
2020-06-25,0.056821,-0.733521,-2.124355,-1.383913,-2.218906,-2.215923,-0.908825,-1.861666,-0.958296,-0.672081,...,0.315338,-0.768463,-0.922695,-3.496478,-1.519258,-3.116673,-2.225597,0.565216,-1.189231,-0.225126
2020-06-26,-0.496058,-0.776907,-2.083929,-1.421715,-2.271444,-2.230895,-0.955845,-1.923301,-0.991943,-0.845497,...,0.226806,-0.876892,-1.155397,-3.578184,-1.522348,-3.129825,-2.294925,0.048727,-1.192822,-0.232122
2020-06-27,-0.770001,-0.858892,-2.132280,-1.399843,-2.343402,-2.241165,-0.996002,-1.776693,-0.873234,-0.959556,...,0.145409,-0.995696,-1.103992,-3.572305,-1.520605,-2.996020,-2.389850,-0.195675,-1.154245,-0.284566


In [51]:
TransactionRecords

Unnamed: 0,Date,Crypto,Long/Short,Price,Open/Close,Transaction pair,Round Trip No.,Pair No.
0,2020-03-13,CRYPTO:DCRUSD,Long,10.775826,Open,CRYPTO:TUSDUSD,1,1
1,2020-03-13,CRYPTO:TUSDUSD,Short,1.023590,Open,CRYPTO:DCRUSD,1,1
2,2020-04-07,CRYPTO:DCRUSD,Short,12.388623,Close,CRYPTO:TUSDUSD,1,1
3,2020-04-07,CRYPTO:TUSDUSD,Long,0.944311,Close,CRYPTO:DCRUSD,1,1
4,2020-03-12,CRYPTO:DCRUSD,Long,9.340784,Open,CRYPTO:WAVESUSD,1,2
...,...,...,...,...,...,...,...,...
83,2020-06-09,CRYPTO:ETHUSD,Long,244.100000,Close,CRYPTO:MKRUSD,1,21
84,2020-03-12,CRYPTO:MKRUSD,Long,240.446866,Open,CRYPTO:WAVESUSD,1,22
85,2020-03-12,CRYPTO:WAVESUSD,Short,0.783889,Open,CRYPTO:MKRUSD,1,22
86,2020-05-29,CRYPTO:MKRUSD,Short,488.457282,Close,CRYPTO:WAVESUSD,1,22


In [52]:
result = pd.DataFrame(columns=['Pair No.', 'Round Trip No.', 'Start Date', 'End Date', 'long crypto', 'short crypto', 'long crypto return', 'short crypto return'])
# loop each pair of transactions
for i in range(1, TransactionRecords['Pair No.'].max() + 1):
    pair = TransactionRecords[TransactionRecords['Pair No.'] == i]

    # loop each Round Trip in pair
    for j in range(1, pair['Round Trip No.'].max() + 1):
        roundTrip = pair[pair['Round Trip No.'] == j]

        returnResult = dict()

        # loop each crypto in the round trip
        for crypto in set(roundTrip['Crypto']):
            
            # prepare the specific round trip transaction record
            record = roundTrip[roundTrip['Crypto'] == crypto]
            record = record.reset_index(drop=True)

            # Calculate the return of the specific round trip
            returns = record['Price'][1] / record['Price'][0] - 1
            if record['Long/Short'][0] == 'Short':
                returns = -returns 
            
            # Insert Record
            if len(returnResult) == 0:
                returnResult['Pair No.'] = i
                returnResult['Round Trip No.'] = j
                returnResult['Start Date'] = record['Date'][0]
                returnResult['End Date'] = record['Date'][1]
            
            if record['Long/Short'][0] == 'Long':
                returnResult['long crypto'] = crypto
                returnResult['long crypto return'] = returns
            else:
                returnResult['short crypto'] = crypto
                returnResult['short crypto return'] = returns

        result.loc[len(result)] = returnResult  


In [53]:
# Remark: return can be more than -100% for the short selling position
result['Total Return'] = result['long crypto return'] + result['short crypto return']
result

Unnamed: 0,Pair No.,Round Trip No.,Start Date,End Date,long crypto,short crypto,long crypto return,short crypto return,Total Return
0,1,1,2020-03-13,2020-04-07,CRYPTO:DCRUSD,CRYPTO:TUSDUSD,0.149668,0.077452,0.22712
1,2,1,2020-03-12,2020-06-04,CRYPTO:DCRUSD,CRYPTO:WAVESUSD,0.962417,-0.439019,0.523398
2,3,1,2020-04-24,2020-06-28,CRYPTO:DCRUSD,CRYPTO:ENJUSD,0.148011,-0.196278,-0.048267
3,4,1,2020-03-12,2020-06-28,CRYPTO:DCRUSD,CRYPTO:BSVUSD,0.53421,-0.524668,0.009542
4,5,1,2020-03-08,2020-06-28,CRYPTO:DCRUSD,CRYPTO:LINKUSD,-0.123299,-0.127222,-0.250521
5,6,1,2020-03-01,2020-06-28,CRYPTO:DCRUSD,CRYPTO:XTZUSD,-0.163074,0.096168,-0.066906
6,7,1,2020-03-16,2020-06-28,CRYPTO:DCRUSD,CRYPTO:LSKUSD,0.538651,-0.405196,0.133454
7,8,1,2020-06-16,2020-06-28,CRYPTO:DCRUSD,CRYPTO:SCUSD,-0.091736,0.272547,0.180811
8,9,1,2020-03-13,2020-06-28,CRYPTO:DCRUSD,CRYPTO:ETCUSD,0.329895,-0.024416,0.305479
9,10,1,2020-03-12,2020-06-28,CRYPTO:DCRUSD,CRYPTO:USDTUSD,0.53421,0.004978,0.539188


In [54]:
result['Total Return'].mean()

0.09447403583126084

### TO-DO list:
1) create a time series chart (input pair No.) and show the pair daily return over the period and spread over the period (SpreadRecords)
2) make a function to loop the above process each two months and return both the transaction records and transform it to result
3) Price should be normalized to the first day of the trading period?
