In [2]:
import numpy as np
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

In [3]:
# Array of all ticker to look up
ticker = [ 
    "ZRX-USD", "1INCH-USD", "AAVE-USD", "ADX-USD", "ACH-USD",
    "ALGO-USD", "TLM-USD", "FORTH-USD", "ANKR-USD", 'ALPINE-USD',
    "APE-USD", "API3-USD", "APT-USD", "ANT-USD", "ARB-USD", 
    "ASTR-USD", "AUDIO-USD", "AVAX-USD", "AXS-USD", 
    "BAL-USD", "BNT-USD", "BAND-USD", "BOND-USD", "BAT-USD", 
    "BICO-USD", "BTC-USD", "BCH-USD", 
    "BNB-USD", "BOSON-USD", "BTRST-USD", "ADA-USD", "CTSI-USD", 
    "CELR-USD", "CELO-USD", "LINK-USD", "CHZ-USD", "CLV-USD", 
    "ATOM-USD", "COTI-USD", "CUDOS-USD", "CRV-USD", 
    "DASH-USD", "MANA-USD", "DIA-USD", "DGB-USD", 
    "DOGE-USD", "XEC-USD", "EGLD-USD", "ENJ-USD", "EOS-USD", 
    "ETH-USD", "ETC-USD", "ENS-USD", "FTM-USD", "PORTO-USD", 
    "FET-USD", "FIL-USD", "FLOKI-USD", "FLOW-USD", "FLUX-USD", 
    "GAL-USD", "JAM-USD", "GTC-USD", 
    "GLM-USD", "ONE-USD", "HBAR-USD", "ZEN-USD", "ICX-USD", 
    "RLC-USD", "ILV-USD", "ICP-USD", "IOST-USD", 
    "KDA-USD", "KAVA-USD", "KNC-USD", "LAZIO-USD", 
    "LOKA-USD", "LDO-USD", "LSK-USD", "LTC-USD", "LPT-USD", 
    "LOOM-USD", "LRC-USD", "LTO-USD", "MKR-USD", "POND-USD", 
    "DAR-USD", "MXC-USD", "ALICE-USD", "XNO-USD", 
    "NEAR-USD", "NEO-USD", "NMR-USD", "ROSE-USD", "OCEAN-USD", 
    "ONT-USD", "OP-USD", "ORBS-USD", "OXT-USD", "OGN-USD", 
    "TRAC-USD", "PAXG-USD", "DOT-USD", "MATIC-USD", "POLYX-USD",
    "PROM-USD", "QTUM-USD", "QNT-USD", "RAD-USD", "RVN-USD", 
    "REEF-USD", "REN-USD", "RNDR-USD", "REQ-USD", "SANTOS-USD", 
    "SHIB-USD", "SKL-USD", "SLP-USD", "SOL-USD",
    "XLM-USD", "STMX-USD", "STORJ-USD", "RARE-USD", "SUSHI-USD", 
    "SNX-USD", "SYS-USD", "USDT-USD", "XRP-USD", "XTZ-USD", 
    "SAND-USD", "TFUEL-USD", "THETA-USD", "T-USD", 
    "TUSD-USD", "UNI-USD", "VET-USD", "VTHO-USD", 
    "VITE-USD", "VOXEL-USD", "WAVES-USD", "WAXP-USD", "WBTC-USD",
    "YFI-USD", "ZEC-USD", "ZIL-USD"
]



# Gather Ticker info from yfinance
Crypto = []
for i in ticker:
    Crypto.append(yf.Ticker(i))


In [4]:
# Gather price of list of crypto currency
one_month_5min_close_price = []
for i in range(len(ticker)):
    close_price = Crypto[i].history(period="14d", interval ='5m')['Close']
    one_month_5min_close_price.append(close_price)

In [5]:
# Determine the global start and end date
all_dates = [series.index for series in one_month_5min_close_price]
global_start = min(date.min() for date in all_dates)
global_end = max(date.max() for date in all_dates)

# Create a new datetime index from global start to global end incremented by 5 minutes
full_date_range = pd.date_range(start=global_start, end=global_end, freq='5T')

# Replace missing data point with previous entry
for index, series in enumerate(one_month_5min_close_price):
    one_month_5min_close_price[index] = series.reindex(full_date_range).fillna(method='ffill')


  one_month_5min_close_price[index] = series.reindex(full_date_range).fillna(method='ffill')


## Spliting Data Into Time Rolling List

In [17]:
one_month_5min_close_price[140]

2023-10-23 00:00:00+00:00    0.052276
2023-10-23 00:05:00+00:00    0.052271
2023-10-23 00:10:00+00:00    0.052169
2023-10-23 00:15:00+00:00    0.052149
2023-10-23 00:20:00+00:00    0.051978
                               ...   
2023-11-05 20:50:00+00:00    0.060388
2023-11-05 20:55:00+00:00    0.060368
2023-11-05 21:00:00+00:00    0.060345
2023-11-05 21:05:00+00:00    0.060341
2023-11-05 21:10:00+00:00    0.060301
Freq: 5T, Name: Close, Length: 3999, dtype: float64

In [18]:
# Spliting 1mo data into sets of data of rolling 12hr period
def one_month_5_min_split_12_hrs(df):
    time_rolling_list = []
    for i in range(0, len(df)-144, 72):
        one_week = df.iloc[i:(i+144)]
        time_rolling_list.append(one_week)
    return(time_rolling_list)

In [19]:
# Creating time rolling list for each crypto
time_rolling_crypto = []
for i in range(len(ticker)):
    # Calling splitting function to split cryptos
    temp = one_month_5_min_split_12_hrs(one_month_5min_close_price[i])
    time_rolling_crypto.append(temp)

In [31]:
# Write function for log return
def calculate_log_returns(prices):
    return np.log(prices / prices.shift(1))

Crypto_log_returns = []
for i in range(len(time_rolling_crypto)):
    temp = []
    for j in range(len(time_rolling_crypto[i])):
        log_return = calculate_log_returns(time_rolling_crypto[i][j])
        temp.append(log_return.dropna())
    Crypto_log_returns.append(temp)

## Helper Functions

In [34]:
# Defining Function that check of stationary
def check_for_stationary(X, critical_value = 0.00001):
    pvalue = adfuller(X)[1]
    if pvalue < critical_value:
        return True
    else:
        return False

## Pairs Finding Function

In [35]:
# Defining Function that calculate difference between the actual difference and the predicted difference
def one_to_one_pairs_trading_diff(Y, X1):
    # Match X with Y
    df = pd.concat([Y, X1], axis=1, keys=['Y', 'X1'])
    df = df.dropna()
    
    # Fitting linear regression using the training set
    X_add_cons = sm.add_constant(df['X1'])
    Y = df['Y']
    result = sm.OLS(Y, X_add_cons).fit()

    # Calculate difference between the actual and the prdicted difference
    b = result.params[result.params.index[1]]
    Diff = Y - b * X1
    
    # Return the difference
    return Diff

In [36]:
# Defining Function that calculate difference between the actual difference and the predicted difference
def two_to_one_pairs_trading_diff(Y, X1, X2):
    # Match X with Y
    df = pd.concat([Y, X1, X2], axis=1, keys=['Y', 'X1', 'X2'])
    df = df.dropna()
    
    # Fitting linear regression using the training set
    X_add_cons = sm.add_constant(df[['X1', 'X2']])
    Y = df['Y']
    result = sm.OLS(Y, X_add_cons).fit()

    # Calculate difference between the actual and the prdicted difference
    b1 = result.params[result.params.index[1]]
    b2 = result.params[result.params.index[2]]
    Diff = Y - b1 * X1 - b2 * X2
    
    # Return the difference
    return Diff

## Find Stationary Pairs by Time-rolling Log return

In [124]:
# Finding pairs of crypto that are stationary
two_to_one_stationary_pair_timeroll = []
for i in range(len(Crypto_log_returns)):
    temp_Y = []
    j_count = 0
    for j in range(len(Crypto_log_returns)):
        temp_X1 = []
        k_count = 0
        for k in range(j+1, len(Crypto_log_returns)):
            temp_X2 = []
            if j != i and k != i:
                for l in range(len(Crypto_log_returns[i])):
                    # Setting Y and X
                    Y = Crypto_log_returns[i][l]
                    X1 = Crypto_log_returns[j][l]
                    X2 = Crypto_log_returns[k][l]
                    Diff = two_to_one_pairs_trading_diff(Y, X1, X2)
                    Diff = Diff.dropna()
            
                    # Check if the period is stationary
                    stationary = check_for_stationary(Diff)
        
                    # Appending time segment stationarity to temp_X2
                    temp_X2.append(stationary)
            k_count = k_count + 1

            # Appending time segment stationarity to temp_X1
            temp_X1.append(temp_X2)
        j_count = j_count + 1
        # Appending all time segement for Y ~ X1 + X2 in temp_Y
        temp_Y.append(temp_X1)

    # Appending all pairs of X1 + X2 for Y for all time segment in two_to_one_stationary_pair_timeroll
    two_to_one_stationary_pair_timeroll.append(temp_Y)


KeyboardInterrupt: 

In [136]:
# Finding pairs of crypto that are stationary
one_to_one_stationary_pair_timeroll = []
i_count = 0
for i in range(len(Crypto_log_returns)):
    temp_Y = []
    j_count = 0
    for j in range(len(Crypto_log_returns)):
        temp_X = []
        k_count = 0
        if j != i:
            for k in range(len(Crypto_log_returns[i])):
                # Setting Y and X
                Y = Crypto_log_returns[i][k]
                X1 = Crypto_log_returns[j][k]
                Diff = one_to_one_pairs_trading_diff(Y, X1)
                Diff = Diff.dropna()
            
                # Check if the period is stationary
                stationary = check_for_stationary(Diff)
        
                # Appending time segment stationarity to temp_X
                temp_X.append(stationary)

                k_count = k_count + 1

        # Appending all time segement for Y ~ X1 in temp_Y
        temp_Y.append(temp_X)
        j_count = j_count + 1

    # Appending all pairs of X1 for Y1 for all time segment in one_to_one_stationary_pair_timeroll
    one_to_one_stationary_pair_timeroll.append(temp_Y)
    i_count = i_count + 1

KeyboardInterrupt: 