In [1]:
import pandas as pd
from datetime import timedelta
import numpy as np
from dateutil.relativedelta import *
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
from collections import defaultdict

### Import Data

In [2]:
# Get historical crypto market cap rank data
cryptoMarketCapRankDf = pd.read_csv('data\CryptoMarketCap.csv')
cryptoMarketCapRankDf['Date'] = pd.to_datetime(cryptoMarketCapRankDf['Date'])

In [3]:
# Get historical crypto price data
cryptoPriceDf = pd.read_csv('data\TradingViewCryptoPrice.csv', index_col=0)
cryptoPriceDf.index = pd.to_datetime(cryptoPriceDf.index)

### Formation Period

In [4]:
def marketCapCryptoSelection(cryptoMarketCapRankDf, cutoffDate, cutoffRank):
    # Get formation period sample crypto list
    marketCapCutoffDate = pd.to_datetime(cutoffDate) - timedelta(days=1)
    sampleCrypto = cryptoMarketCapRankDf[(cryptoMarketCapRankDf['Date'] == marketCapCutoffDate) & (cryptoMarketCapRankDf['Rank'] <= cutoffRank)]
    sampleCrypto = list(sampleCrypto['Symbol'])
    return sampleCrypto

In [5]:
def cryptoPriceCleaning(cryptoPriceDf, sampleCrypto, cutoffDate, lookback):
    # Fliter the crypto with formation period
    cutoffRowIdx = cryptoPriceDf.index.get_loc(cutoffDate)
    if cutoffRowIdx < lookback:
        # if there is not enough got the whole lookback period, just get all the availiable data
        sampleCryptoPrice = cryptoPriceDf.iloc[:cutoffRowIdx]
    else:
        sampleCryptoPrice = cryptoPriceDf.iloc[cutoffRowIdx-lookback:cutoffRowIdx]
    
    # Filter based on the availiablity of crpyto price
    sampleCrypto = set(sampleCrypto).intersection([x[7:-3] for x in sampleCryptoPrice.columns])

    # Data Cleaning
    sampleCryptoPrice = sampleCryptoPrice[["CRYPTO:" + x + "USD" for x in sampleCrypto]]
    sampleCryptoPrice = sampleCryptoPrice.ffill(axis=0)
    sampleCryptoPrice = sampleCryptoPrice.dropna(axis=1)

    print('Remaining number of crpyto: ', len(sampleCryptoPrice.columns))

    return sampleCryptoPrice

#### Cointegration Method

In [6]:
def cointegrationMethodSelection(sampleCryptoPrice, ADFtestThreshold):
    # Take log for the price series
    sampleCryptoLogPrice = np.log(sampleCryptoPrice)
    
    # Test all price series for order 1 integration
    I0Series = []
    for crypto in sampleCryptoLogPrice.columns:
        if adfuller(sampleCryptoLogPrice[crypto])[1] < ADFtestThreshold:
            I0Series.append(crypto)

    # remove price series with order 0 integration from samples
    sampleCryptoPrice = sampleCryptoPrice.drop(I0Series, axis=1)

    print('I0Series: ', I0Series)
    print('Remaining number of crpyto: ', len(sampleCryptoPrice.columns))

    # Finding cointegrated pairs
    CointegratedPairs = pd.DataFrame(columns=['Crypto 1', 'Crypto 2', 'Beta'])
    for crypto1 in sampleCryptoPrice.columns:
        for crypto2 in sampleCryptoPrice.columns:
            if crypto1 != crypto2:
                # OLS regression input
                y = sampleCryptoPrice[crypto1]
                x = sampleCryptoPrice[crypto2]
                x_withConst = sm.add_constant(x)

                # OLS Regression fitting
                model = sm.OLS(y, x_withConst).fit()

                # OLS Result
                const = model.params[0]
                beta = model.params[1]
                residuals = y - (x * beta + const)

                # the residuals are tested for stationarity by using the Augmented-Dickey-Fuller test (ADF-test)
                if adfuller(residuals)[1] < ADFtestThreshold:
                    CointegratedPairs.loc[len(CointegratedPairs)] = {'Crypto 1': crypto1, 'Crypto 2': crypto2, 'Beta': beta}
    
    return CointegratedPairs


### Trading Period (Need to consider close and open new positions in the same day)

In [7]:
def cointegrationMethodTrading(cryptoPriceDf, sampleCryptoPrice, CointegratedPairs, cutoffDate, forward, spreadThreshold, closeThreshold, constantModel=True):
    
    # initialize the records dataframe
    TransactionRecords = pd.DataFrame(columns=['Date', 'Crypto', 'Long/Short', 'Price', "Open/Close", "Transaction pair", "Round Trip No.", "Pair No.", "Quantity"])
    SpreadRecords = pd.DataFrame()
    PairNo = 0

    # get trading crpyto price
    cutoffRowIdx = cryptoPriceDf.index.get_loc(cutoffDate)
    if cutoffRowIdx + forward > len(cryptoPriceDf):
        # if there is not enough got the whole forward period, just get all the availiable data
        tradingCryptoPrice = cryptoPriceDf.iloc[cutoffRowIdx:]
    else:
        tradingCryptoPrice = cryptoPriceDf.iloc[cutoffRowIdx:cutoffRowIdx+forward]


    for i in range(len(CointegratedPairs)):
        ############ Trading Signal Calculation ############
        
        # parameter
        crypto1 = CointegratedPairs.loc[i, 'Crypto 1']
        crypto2 = CointegratedPairs.loc[i, 'Crypto 2']
        beta = CointegratedPairs.loc[i, 'Beta']

         # Calculate normalized spread
        if constantModel:
            # Calculate spread mean and std from formation period
            formationSpread = sampleCryptoPrice[crypto1] - sampleCryptoPrice[crypto1] * beta
            formationSpreadMean = formationSpread.mean()
            formationSpreadStd = formationSpread.std()

            # Calculate spread and normilized them in the trading period
            spread = tradingCryptoPrice[crypto1] - tradingCryptoPrice[crypto2] * beta
            normalizedSpread = (spread - formationSpreadMean) / formationSpreadStd

        else:
            # Expending window model
            # Combine the crpyto price from formation and trading period
            crypto1Price = pd.concat([sampleCryptoPrice[crypto1],tradingCryptoPrice[crypto1]], axis=0)
            crypto2Price = pd.concat([sampleCryptoPrice[crypto2],tradingCryptoPrice[crypto2]], axis=0)
            
            # Calculate the spread for both formation and trading period
            spread = crypto1Price - crypto2Price * beta
            # calculate spread mean and std in expaning window
            spreadMean = spread.expanding(min_periods=1).mean() # or df.rolling(window=len(df), min_periods=1).mean()
            spreadStd = spread.expanding(min_periods=1).std()

            # Calculate thr normilized spread in theformation and trading period
            normalizedSpread = (spread - spreadMean) / spreadStd
            # Extrate only the normalized spread from trading period
            normalizedSpread = normalizedSpread.loc[cutoffDate:]
        
        ############ Trading Execution ############
        # check if there is any trading opportunity
        SpreadWithoutLastDay = normalizedSpread.iloc[:-1]
        if len(SpreadWithoutLastDay[(SpreadWithoutLastDay >= spreadThreshold) | (SpreadWithoutLastDay <= -spreadThreshold)]) > 0:
            
            # save the spread records
            normalizedSpread.name = crypto1 + " " + crypto2
            SpreadRecords = SpreadRecords.merge(normalizedSpread, how='outer', left_index=True, right_index=True)

            # initialize before the transaction
            PairNo += 1
            normalizedSpread.name = 'spread'
            normalizedSpread = normalizedSpread.to_frame()
            Opened = False
            long = None
            RoundTripNo = 1

            for date in normalizedSpread.index:

                # When the trading date is not the last day
                if date != normalizedSpread.index[-1]:
                    # If there is an open position before that date
                    if Opened:
                        # Close postion if the spread cross closeThreshold
                        if not long and normalizedSpread.loc[date, 'spread'] <= closeThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo, 1]
                            if beta >= 0:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, beta]
                            else:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, -beta]
                            RoundTripNo += 1
                            long = None
                            Opened = False
                            
                        elif long and normalizedSpread.loc[date, 'spread'] >= -closeThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo, 1]
                            if beta >= 0:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, beta]
                            else:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, -beta]
                            RoundTripNo += 1
                            long = None
                            Opened = False
                            
                    
                    # Check again if there is any position, if no and fulfil the criteria, then open position 
                    if not Opened:
                        # short crypto 1 and long crypto 2 if spread >= spreadThreshold
                        if normalizedSpread.loc[date, 'spread'] >= spreadThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Open", crypto2, RoundTripNo, PairNo, 1]
                            if beta >= 0:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo, beta]
                            else:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo, -beta]
                            long = False
                            Opened = True
                            
                        # long crypto 1 and short crypto 2 if spread <= -spreadThreshold
                        elif normalizedSpread.loc[date, 'spread'] <= -spreadThreshold:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Open", crypto2, RoundTripNo, PairNo, 1]
                            if beta >= 0:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo, beta]
                            else:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Open", crypto1, RoundTripNo, PairNo, -beta]
                            long = True
                            Opened = True
                            

                # For last day closing position
                else:
                    if Opened:
                        if not long:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Long",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo, 1]
                            if beta >= 0:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, beta]
                            else:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, -beta]
                            long = None
                            Opened = False
                            
                        else:
                            TransactionRecords.loc[len(TransactionRecords)] = [date, crypto1, "Short",  tradingCryptoPrice.loc[date, crypto1], "Close", crypto2, RoundTripNo, PairNo, 1]
                            if beta >= 0:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Long",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, beta]
                            else:
                                TransactionRecords.loc[len(TransactionRecords)] = [date, crypto2, "Short",  tradingCryptoPrice.loc[date, crypto2], "Close", crypto1, RoundTripNo, PairNo, -beta]
                            long = None
                            Opened = False
                            
    return (TransactionRecords, SpreadRecords)

### Rolling Window

In [8]:
# General parameters
startDate = '2019-01-01'
endDate = '2023-07-01'
lookback =365
forward = 60

# Formation period paramenter
cutoffRank = 100
ADFtestThreshold = 0.01

# Trading period parameters
spreadThreshold = 2.5
closeThreshold = 0 # same sign as spreadThreshold

In [9]:
TransactionRecords = pd.DataFrame(columns=['Date', 'Crypto', 'Long/Short', 'Price', "Open/Close", "Transaction pair", "Period No.", "Pair No.", "Round Trip No.", "Quantity"])
SpreadRecords = pd.DataFrame()

period = 1
for cutoffDate in pd.date_range(startDate, endDate, freq='2MS'):
    sampleCrypto = marketCapCryptoSelection(cryptoMarketCapRankDf, cutoffDate, cutoffRank)
    sampleCryptoPrice = cryptoPriceCleaning(cryptoPriceDf, sampleCrypto, cutoffDate, lookback)
    CointegratedPairs = cointegrationMethodSelection(sampleCryptoPrice, ADFtestThreshold)
    Transaction, Spread = cointegrationMethodTrading(cryptoPriceDf, sampleCryptoPrice, CointegratedPairs, cutoffDate, forward, spreadThreshold, closeThreshold, False)
    Transaction['Period No.'] = period
    TransactionRecords = pd.concat([TransactionRecords, Transaction], ignore_index=True)
    SpreadRecords = pd.concat([SpreadRecords, Spread])
    period += 1

Remaining number of crpyto:  45
I0Series:  []
Remaining number of crpyto:  45
Remaining number of crpyto:  51
I0Series:  []
Remaining number of crpyto:  51
Remaining number of crpyto:  55
I0Series:  []
Remaining number of crpyto:  55
Remaining number of crpyto:  49
I0Series:  ['CRYPTO:USDTUSD']
Remaining number of crpyto:  48
Remaining number of crpyto:  47
I0Series:  []
Remaining number of crpyto:  47
Remaining number of crpyto:  48
I0Series:  ['CRYPTO:SCUSD', 'CRYPTO:USDTUSD', 'CRYPTO:ZRXUSD']
Remaining number of crpyto:  45
Remaining number of crpyto:  46
I0Series:  ['CRYPTO:USDTUSD']
Remaining number of crpyto:  45
Remaining number of crpyto:  47
I0Series:  ['CRYPTO:USDTUSD', 'CRYPTO:KCSUSD']
Remaining number of crpyto:  45
Remaining number of crpyto:  49
I0Series:  ['CRYPTO:USDTUSD', 'CRYPTO:TUSDUSD', 'CRYPTO:LINKUSD']
Remaining number of crpyto:  46
Remaining number of crpyto:  52
I0Series:  ['CRYPTO:USDTUSD', 'CRYPTO:TUSDUSD']
Remaining number of crpyto:  50
Remaining number of 

In [10]:
# TransactionRecords.to_csv('Transactions_cointegration.csv')

In [11]:
# SpreadRecords.to_csv('Spread_cointegration.csv')

### Transform transaction records to Result

In [17]:
result = pd.DataFrame(columns=['Period No.', 'Pair No.', 'Round Trip No.', 'Start Date', 'End Date', 'crypto 1', 'crypto 2', 'crypto 1 return', 'crypto 2 return', 'Quantity'])

for k in range(1, TransactionRecords['Period No.'].max() + 1):
    period =  TransactionRecords[TransactionRecords['Period No.'] == k]
    
    # if there is no trade in that period
    if len(period) == 0:
        continue

    # loop each pair of transactions
    for i in range(1, period['Pair No.'].max() + 1):
        pair = period[period['Pair No.'] == i]

        # loop each Round Trip in pair
        for j in range(1, pair['Round Trip No.'].max() + 1):
            roundTrip = pair[pair['Round Trip No.'] == j]

            returnResult = dict()

            # loop each crypto in the round trip
            for crypto in set(roundTrip['Crypto']):
                # prepare the specific round trip transaction record
                record = roundTrip[roundTrip['Crypto'] == crypto]
                record = record.reset_index(drop=True)

                # Calculate the return of the specific round trip
                returns = record['Price'][1] / record['Price'][0] - 1
                if record['Long/Short'][0] == 'Short':
                    returns = -returns 

                # Insert Record
                if len(returnResult) == 0:
                    returnResult['Period No.'] = k
                    returnResult['Pair No.'] = i
                    returnResult['Round Trip No.'] = j
                    returnResult['Start Date'] = record['Date'][0]
                    returnResult['End Date'] = record['Date'][1]
                
                if record['Quantity'][0] == 1:
                    returnResult['crypto 1'] = crypto
                    returnResult['crypto 1 return'] = returns
                else:
                    returnResult['crypto 2'] = crypto
                    returnResult['crypto 2 return'] = returns
                    returnResult['Quantity'] = record['Quantity'][0]

            result.loc[len(result)] = returnResult  


In [18]:
result

Unnamed: 0,Period No.,Pair No.,Round Trip No.,Start Date,End Date,crypto 1,crypto 2,crypto 1 return,crypto 2 return,Quantity
0,1,1,1,2019-02-23,2019-03-01,CRYPTO:STEEMUSD,CRYPTO:MKRUSD,0.125404,0.086981,0.005043
1,1,2,1,2019-02-23,2019-03-01,CRYPTO:XMRUSD,CRYPTO:MKRUSD,-0.106877,0.086981,0.276486
2,1,3,1,2019-01-15,2019-03-01,CRYPTO:KMDUSD,CRYPTO:LINKUSD,0.599276,0.155894,8.711759
3,1,4,1,2019-02-23,2019-03-01,CRYPTO:MKRUSD,CRYPTO:STEEMUSD,0.086981,0.125404,164.854994
4,1,5,1,2019-02-23,2019-03-01,CRYPTO:MKRUSD,CRYPTO:XLMUSD,0.086981,-0.088494,2034.912173
...,...,...,...,...,...,...,...,...,...,...
4748,28,123,1,2023-07-20,2023-08-28,CRYPTO:LINKUSD,CRYPTO:CRVUSD,0.285057,-0.379910,2.278256
4749,28,124,1,2023-07-20,2023-08-17,CRYPTO:LINKUSD,CRYPTO:SHIBUSD,0.258578,0.058748,310645.950249
4750,28,125,1,2023-07-20,2023-08-16,CRYPTO:LINKUSD,CRYPTO:RUNEUSD,0.191754,0.454208,0.887802
4751,28,126,1,2023-07-20,2023-08-17,CRYPTO:LINKUSD,CRYPTO:UNIUSD,0.258578,-0.167918,0.561871


In [20]:
# Remark: return can be more than -100% for the short selling position
result['Total Return'] = result['crypto 1 return'] + result['crypto 2 return'] * result['Quantity']
result

Unnamed: 0,Period No.,Pair No.,Round Trip No.,Start Date,End Date,crypto 1,crypto 2,crypto 1 return,crypto 2 return,Quantity,Total Return
0,1,1,1,2019-02-23,2019-03-01,CRYPTO:STEEMUSD,CRYPTO:MKRUSD,0.125404,0.086981,0.005043,0.125842
1,1,2,1,2019-02-23,2019-03-01,CRYPTO:XMRUSD,CRYPTO:MKRUSD,-0.106877,0.086981,0.276486,-0.082828
2,1,3,1,2019-01-15,2019-03-01,CRYPTO:KMDUSD,CRYPTO:LINKUSD,0.599276,0.155894,8.711759,1.957388
3,1,4,1,2019-02-23,2019-03-01,CRYPTO:MKRUSD,CRYPTO:STEEMUSD,0.086981,0.125404,164.854994,20.760413
4,1,5,1,2019-02-23,2019-03-01,CRYPTO:MKRUSD,CRYPTO:XLMUSD,0.086981,-0.088494,2034.912173,-179.990414
...,...,...,...,...,...,...,...,...,...,...,...
4748,28,123,1,2023-07-20,2023-08-28,CRYPTO:LINKUSD,CRYPTO:CRVUSD,0.285057,-0.379910,2.278256,-0.580475
4749,28,124,1,2023-07-20,2023-08-17,CRYPTO:LINKUSD,CRYPTO:SHIBUSD,0.258578,0.058748,310645.950249,18250.207113
4750,28,125,1,2023-07-20,2023-08-16,CRYPTO:LINKUSD,CRYPTO:RUNEUSD,0.191754,0.454208,0.887802,0.595001
4751,28,126,1,2023-07-20,2023-08-17,CRYPTO:LINKUSD,CRYPTO:UNIUSD,0.258578,-0.167918,0.561871,0.164230


In [21]:
result['Total Return'].mean()

-4069.793473131829