In [93]:
import pandas as pd
from datetime import timedelta
import numpy as np
from dateutil.relativedelta import *
from collections import defaultdict

### Import Data

In [94]:
# Get historical crypto market cap rank data
cryptoMarketCapRankDf = pd.read_csv('data\CryptoMarketCap.csv')
cryptoMarketCapRankDf['Date'] = pd.to_datetime(cryptoMarketCapRankDf['Date'])

In [95]:
# Get historical crypto price data
cryptoPriceDf = pd.read_csv('data\TradingViewCryptoPrice.csv', index_col=0)
cryptoPriceDf.index = pd.to_datetime(cryptoPriceDf.index)

### Formation Period

In [96]:
# Formation period paramenter
cutoffDate = '2023-01-01'
cutoffRank = 100

In [97]:
# Get formation period sample crypto list
marketCapCutoffDate = pd.to_datetime(cutoffDate) - timedelta(days=1)
sampleCrypto = cryptoMarketCapRankDf[(cryptoMarketCapRankDf['Date'] == marketCapCutoffDate) & (cryptoMarketCapRankDf['Rank'] <= cutoffRank)]
sampleCrypto = list(sampleCrypto['Symbol'])

In [98]:
# Fliter the crypto with formation period and availiablity of crpyto price
sampleCryptoPrice = cryptoPriceDf.loc[pd.to_datetime(cutoffDate) + relativedelta(months=-12): marketCapCutoffDate]
sampleCrypto = set(sampleCrypto).intersection([x[7:-3] for x in sampleCryptoPrice.columns])

# Data Cleaning
sampleCryptoPrice = sampleCryptoPrice[["CRYPTO:" + x + "USD" for x in sampleCrypto]]
sampleCryptoPrice = sampleCryptoPrice.ffill(axis=0)
sampleCryptoPrice = sampleCryptoPrice.dropna(axis=1)

print('Remaining number of crpyto: ', len(sampleCryptoPrice.columns))

Remaining number of crpyto:  82


#### Distance Method

In [99]:
# Normalized the log price
sampleCryptoLogPrice = np.log(sampleCryptoPrice)
sampleCryptoNormalizedLogPrice = (sampleCryptoLogPrice - sampleCryptoLogPrice.mean())/sampleCryptoLogPrice.std()

In [100]:
# SSD calculation for each pair
SSDResults = []
for i in range(len(sampleCryptoNormalizedLogPrice.columns)):
    for j in range(i+1, len(sampleCryptoNormalizedLogPrice.columns)):
        SSD = ((sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[i]] - sampleCryptoNormalizedLogPrice[sampleCryptoNormalizedLogPrice.columns[j]]) ** 2).sum()
        SSDResults.append([sampleCryptoNormalizedLogPrice.columns[i], sampleCryptoNormalizedLogPrice.columns[j], SSD])

In [101]:
# Print out the SSD results
SSDResults = pd.DataFrame(SSDResults, columns=['Crypto 1', 'Crypto 2', 'SSD'])
SSDResults = SSDResults.sort_values('SSD', ascending=True)
SSDResults = SSDResults.reindex()
SSDResults

Unnamed: 0,Crypto 1,Crypto 2,SSD
2932,CRYPTO:WBTCUSD,CRYPTO:BTCUSD,0.165664
999,CRYPTO:SANDUSD,CRYPTO:ENJUSD,4.186039
485,CRYPTO:XEMUSD,CRYPTO:AVAXUSD,4.266660
1159,CRYPTO:1INCHUSD,CRYPTO:DOTUSD,5.173343
1819,CRYPTO:FILUSD,CRYPTO:DOTUSD,5.276612
...,...,...,...
206,CRYPTO:XTZUSD,CRYPTO:TWTUSD,1165.157620
1271,CRYPTO:KCSUSD,CRYPTO:TWTUSD,1180.497630
2767,CRYPTO:TWTUSD,CRYPTO:SOLUSD,1187.711354
941,CRYPTO:MANAUSD,CRYPTO:TWTUSD,1188.185045


### Trading Period

In [102]:
noTradingPairs = 50
spreadThreshold = 2.5
closeThreshold = 0

In [103]:
TransactionRecords = pd.DataFrame(columns=['Date', 'Crypto', 'Long/Short', 'Price', "Open/Close", "Transaction pair", "Round Trip No.", "Pair No."])
SpreadRecords = pd.DataFrame()
PairNo = 0

# get trading crpyto price
tradingCryptoPrice = cryptoPriceDf.loc[pd.to_datetime(cutoffDate): pd.to_datetime(cutoffDate) + relativedelta(days=59)]
logTradingCryptoPrice = np.log(tradingCryptoPrice)

for i in range(noTradingPairs):
    
    # parameter
    crypto1 = SSDResults.loc[i, 'Crypto 1']
    crypto2 = SSDResults.loc[i, 'Crypto 2']
    crypto1Mean = sampleCryptoLogPrice.mean()[crypto1]
    crypto2Mean = sampleCryptoLogPrice.mean()[crypto2]
    crypto1SD = sampleCryptoLogPrice.std()[crypto1]
    crypto2SD = sampleCryptoLogPrice.std()[crypto2]

    # calculate spread
    normalizedCrypto1LogPrice = (logTradingCryptoPrice[crypto1] - crypto1Mean)/crypto1SD
    normalizedCrypto2LogPrice = (logTradingCryptoPrice[crypto2] - crypto2Mean)/crypto2SD
    Spread = normalizedCrypto1LogPrice - normalizedCrypto2LogPrice

    # check if there is any trading opportunity
    if len(Spread[(Spread >= spreadThreshold) | (Spread <= -spreadThreshold)]) > 0:
        
        # save the spread records
        Spread.name = crypto1 + " " + crypto2
        SpreadRecords = SpreadRecords.merge(Spread, how='outer', left_index=True, right_index=True)

        # initialize before the transaction
        PairNo += 1
        Spread.name = 'spread'
        Spread = Spread.to_frame()
        Spread['position'] = 0
        RoundTripNo = 1

        for date in Spread.index:
            # When the trading date is not the last day
            if date != (pd.to_datetime(cutoffDate) + relativedelta(days=59)):
                
                # continuous the position if the spread do not cross closeThreshold
                if Spread.loc[date, 'position'] == -1 and Spread.loc[date, 'spread'] > closeThreshold:
                    Spread.loc[date + relativedelta(days=1), 'position'] = -1
                
                # continuous the position if the spread do not cross closeThreshold
                elif Spread.loc[date, 'position'] == 1 and Spread.loc[date, 'spread'] < closeThreshold:
                    Spread.loc[date + relativedelta(days=1), 'position'] = 1
                
                # short crypto 1 and long crypto 2 if spread >= spreadThreshold
                elif Spread.loc[date, 'spread'] >= spreadThreshold:
                    Spread.loc[date + relativedelta(days=1), 'position'] = -1
                    # Long/Short with tomorrow open price  i.e. today close price
                    TransactionRecords.loc[len(TransactionRecords)] = [date + relativedelta(days=1), crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Open", crypto2, RoundTripNo, PairNo]
                    TransactionRecords.loc[len(TransactionRecords)] = [date + relativedelta(days=1), crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo]
                
                # long crypto 1 and short crypto 2 if spread <= -spreadThreshold
                elif Spread.loc[date, 'spread'] <= -spreadThreshold:
                    Spread.loc[date + relativedelta(days=1), 'position'] = 1
                    # Long/Short with tomorrow open price  i.e. today close price
                    TransactionRecords.loc[len(TransactionRecords)] = [date + relativedelta(days=1), crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Open", crypto2, RoundTripNo, PairNo]
                    TransactionRecords.loc[len(TransactionRecords)] = [date + relativedelta(days=1), crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo]
                
                # Close the position if the spread cross closeThreshold
                elif Spread.loc[date, 'position'] == -1 and Spread.loc[date, 'spread'] <= closeThreshold:
                    # Long/Short with today close price
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                    RoundTripNo += 1

                elif Spread.loc[date, 'position'] == 1 and Spread.loc[date, 'spread'] >= closeThreshold:
                    # Long/Short with today close price
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                    RoundTripNo += 1
            
            # For last day closing position
            else:
                if Spread.loc[date, 'position'] == -1:
                    # Long/Short with today close price
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                    RoundTripNo += 1
                elif Spread.loc[date, 'position'] == 1:
                    # Long/Short with today close price
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo]
                    TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo]
                    RoundTripNo += 1
        
        # Spread[crypto1] = tradingCryptoPrice[crypto1]
        # Spread[crypto2] = tradingCryptoPrice[crypto2]
        # Spread[crypto1 + ' Log Return'] = (logTradingCryptoPrice[crypto1] - logTradingCryptoPrice[crypto1].shift(1)) * Spread['position']
        # Spread[crypto2 + ' Log Return'] = (logTradingCryptoPrice[crypto2] - logTradingCryptoPrice[crypto2].shift(1)) * -Spread['position']
        # Spread[crypto1 + ' cum Log Return'] = Spread[crypto1 + ' Log Return'].cumsum()
        # Spread[crypto2 + ' cum Log Return'] = Spread[crypto2 + ' Log Return'].cumsum()

        # print(crypto1)
        # print(np.exp(Spread[crypto1 + ' cum Log Return'][-1])-1)
        # print(crypto2)
        # print(np.exp(Spread[crypto2 + ' cum Log Return'][-1])-1)
        
        # return calculation (short selling position incorrect if use log)

        # short sell return is normal return calculation but add a negative sign to the return



### Transform transaction records to Result

In [104]:
result = pd.DataFrame(columns=['Pair No.', 'Round Trip No.', 'Start Date', 'End Date', 'crypto 1', 'crypto 2', 'crypto 1 return', 'crypto 2 return'])
# loop each pair of transactions
for i in range(1, TransactionRecords['Pair No.'].max() + 1):
    pair = TransactionRecords[TransactionRecords['Pair No.'] == i]

    # loop each Round Trip in pair
    for j in range(1, pair['Round Trip No.'].max() + 1):
        roundTrip = pair[pair['Round Trip No.'] == j]

        returnResult = dict()

        # loop each crypto in the round trip
        for crypto in set(roundTrip['Crypto']):
            
            # prepare the specific round trip transaction record
            record = roundTrip[roundTrip['Crypto'] == crypto]
            record = record.reset_index(drop=True)

            # Calculate the return of the specific round trip
            returns = record['Price'][1] / record['Price'][0] - 1
            if record['Long/Short'][0] == 'Short':
                returns = -returns 
            
            # Insert Record
            if len(returnResult) == 0:
                returnResult['Pair No.'] = i
                returnResult['Round Trip No.'] = j
                returnResult['Start Date'] = record['Date'][0]
                returnResult['End Date'] = record['Date'][1]
                returnResult['crypto 1'] = crypto
                returnResult['crypto 1 return'] = returns
            else:
                returnResult['crypto 2'] = crypto
                returnResult['crypto 2 return'] = returns

        result.loc[len(result)] = returnResult  


In [105]:
# Remark: return can be more than -100% for the short selling position
result['Total Return'] = result['crypto 1 return'] + result['crypto 2 return']
result

Unnamed: 0,Pair No.,Round Trip No.,Start Date,End Date,crypto 1,crypto 2,crypto 1 return,crypto 2 return,Total Return
0,1,1,2023-01-02,2023-03-01,CRYPTO:TUSDUSD,CRYPTO:XTZUSD,0.003591,0.672958,0.67655
1,2,1,2023-01-15,2023-03-01,CRYPTO:TUSDUSD,CRYPTO:OKBUSD,0.000205,-0.586354,-0.586149
2,3,1,2023-01-20,2023-01-21,CRYPTO:TUSDUSD,CRYPTO:USDCUSD,0.000163,0.000844,0.001008
3,3,2,2023-02-07,2023-02-07,CRYPTO:TUSDUSD,CRYPTO:USDCUSD,0.000892,0.001169,0.002061
4,4,1,2023-01-07,2023-02-02,CRYPTO:TUSDUSD,CRYPTO:USDDUSD,0.000599,0.025052,0.025651


In [106]:
result['Total Return'].mean()

0.023824099182591964

### TO-DO list:
1) create a time series chart (input pair No.) and show the pair daily return over the period and spread over the period (SpreadRecords)
2) make a function to loop the above process each two months and return both the transaction records and transform it to result
