In [1]:
# Load imports and configuration

%load_ext autoreload
%autoreload 2

import hydra
from hydra.core.global_hydra import GlobalHydra

GlobalHydra.instance().clear()
hydra.initialize(version_base=None, config_path="../conf")
cfg = hydra.compose(config_name="config")

tickers = cfg.market.tickers
interval = cfg.market.interval

from modules.data_services.data_loaders import load_data
from modules.data_services.data_utils import merge_by_pair
from modules.performance.statistical_tests import ssd_cumulative_returns, pearson_correlation, engle_granger_cointegration, johansen_cointegration

In [2]:
start = "2024-01-01"
end = "2024-03-01"

In [3]:
# Load data

df = load_data(
    tickers=tickers,
    start=start,
    end=end,
    interval=interval,
)
df.head()

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,BNBUSDT,SOLUSDT,XRPUSDT,ADAUSDT,AVAXUSDT,DOGEUSDT,TRXUSDT,DOTUSDT,LINKUSDT,SHIBUSDT,LTCUSDT,BCHUSDT,UNIUSDT
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-01-01 00:00:00,42475.23,2295.51,314.4,101.96,0.6162,0.5979,38.94,0.08983,0.10832,8.267,15.122,1e-05,73.06,259.9,7.313
2024-01-01 01:00:00,42613.56,2303.72,315.3,104.12,0.6185,0.6023,39.39,0.09016,0.1082,8.294,15.167,1e-05,73.34,260.6,7.352
2024-01-01 02:00:00,42581.1,2293.02,310.9,103.69,0.6154,0.5995,39.04,0.0898,0.10781,8.253,15.065,1e-05,73.16,260.0,7.307
2024-01-01 03:00:00,42330.49,2273.81,309.2,103.07,0.613,0.5956,38.51,0.08921,0.10711,8.148,14.931,1e-05,72.67,258.3,7.218
2024-01-01 04:00:00,42399.99,2279.55,309.2,102.62,0.6116,0.5953,38.21,0.0891,0.10654,8.14,14.92,1e-05,72.53,257.2,7.226


In [4]:
# Calculate SSD of cumulative returns

ssd_df = ssd_cumulative_returns(df)
ssd_df.head()

Unnamed: 0,pair,ssd
0,DOGEUSDT-SHIBUSDT,1.316605
1,XRPUSDT-DOTUSDT,2.393193
2,ADAUSDT-DOTUSDT,2.591386
3,SHIBUSDT-LTCUSDT,3.702435
4,ADAUSDT-AVAXUSDT,3.805218


In [5]:
# Calculate Pearson's correlation matrix

corr_prices_df = pearson_correlation(df, source="prices")
corr_prices_df.head()

Unnamed: 0,pair,corr_prices
0,ETHUSDT-BNBUSDT,0.939692
1,DOGEUSDT-SHIBUSDT,0.935798
2,SOLUSDT-AVAXUSDT,0.926325
3,ADAUSDT-DOTUSDT,0.917982
4,BTCUSDT-ETHUSDT,0.909241


In [6]:
# Calculate returns correlation

corr_returns_df = pearson_correlation(df, source="returns")
corr_returns_df.head()

Unnamed: 0,pair,corr_returns
0,ADAUSDT-DOTUSDT,0.824841
1,SOLUSDT-AVAXUSDT,0.820279
2,DOGEUSDT-SHIBUSDT,0.812545
3,AVAXUSDT-DOTUSDT,0.784385
4,ADAUSDT-AVAXUSDT,0.76279


In [7]:
# Calculate log-returns correlation

corr_log_returns_df = pearson_correlation(df, source="log_returns")
corr_log_returns_df.head()

Unnamed: 0,pair,corr_log_returns
0,ADAUSDT-DOTUSDT,0.826138
1,SOLUSDT-AVAXUSDT,0.821408
2,DOGEUSDT-SHIBUSDT,0.81852
3,AVAXUSDT-DOTUSDT,0.78576
4,ADAUSDT-AVAXUSDT,0.764557


In [8]:
# Perform Engle-Granger cointegration test

eg_df = engle_granger_cointegration(df, source="log_prices")
eg_df.head()

Unnamed: 0,pair,eg_p_value,adf_stat
0,BNBUSDT-UNIUSDT,0.001301,-4.484008
1,ETHUSDT-BCHUSDT,0.00581,-4.063894
2,BTCUSDT-BCHUSDT,0.00834,-3.954092
3,BTCUSDT-ADAUSDT,0.01376,-3.795124
4,ETHUSDT-UNIUSDT,0.019243,-3.683448


In [9]:
# Perform Johansen cointegration test

johansen_df = johansen_cointegration(df)
johansen_df.head()

Unnamed: 0,pair,trace_stat,crit_95,crit_99,trace_stat - crit_95,trace_stat - crit_99
0,BTCUSDT-BCHUSDT,20.874688,15.4943,19.9349,5.380388,0.939788
1,ETHUSDT-BCHUSDT,18.198021,15.4943,19.9349,2.703721,-1.736879
2,DOGEUSDT-SHIBUSDT,17.415646,15.4943,19.9349,1.921346,-2.519254
3,BTCUSDT-ADAUSDT,17.156938,15.4943,19.9349,1.662638,-2.777962
4,BNBUSDT-UNIUSDT,16.792436,15.4943,19.9349,1.298136,-3.142464


In [10]:
# Merge dataframes

merged_df = merge_by_pair(
    dfs=[ssd_df, corr_log_returns_df, eg_df, johansen_df],
    keep_cols=[
        ['ssd'],
        ['corr_log_returns'],
        ['eg_p_value'],
        ['trace_stat - crit_99']
    ]
).sort_values('eg_p_value', ascending=True).reset_index(drop=True)
merged_df.head()

Unnamed: 0,pair,ssd,corr_log_returns,eg_p_value,trace_stat - crit_99
0,BNBUSDT-UNIUSDT,30.106199,0.356806,0.001301,-3.142464
1,ETHUSDT-BCHUSDT,35.760886,0.589011,0.00581,-1.736879
2,BTCUSDT-BCHUSDT,24.048188,0.633006,0.00834,0.939788
3,BTCUSDT-ADAUSDT,49.567267,0.711226,0.01376,-2.777962
4,ETHUSDT-UNIUSDT,49.86891,0.399035,0.019243,-5.729862
