In [9]:
# Import packages and set configuration
import pandas as pd
from IPython.display import display, Markdown

%load_ext autoreload
%autoreload 2

### Asset list
Market Cap Ranking Comparison Based on Historical CoinMarketCap Snapshots.

In [10]:
# Load tickers
tickers = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT", "ADAUSDT", "AVAXUSDT", "DOGEUSDT", "TRXUSDT",
           "DOTUSDT", "LINKUSDT", "SHIBUSDT", "LTCUSDT", "BCHUSDT", "UNIUSDT"]
pd.read_excel('../data/asset_list.xlsm', dtype={'07.01.2024': 'Int64', '05.10.2025': 'Int64'}).iloc[:15, :2]

Unnamed: 0,Name,Symbol
0,Bitcoin,BTC
1,Ethereum,ETH
2,BNB,BNB
3,Solana,SOL
4,XRP,XRP
5,Cardano,ADA
6,Avalanche,AVAX
7,Dogecoin,DOGE
8,TRON,TRX
9,Polkadot,DOT


### Benchmark
#### Basic pair selection
- ##### Pair Selection Range: 01.01.2024 - 01.03.2024
- ##### Interval: 1h

In [11]:
# Load data
from modules.pair_selection.statistical_tests import perform_statistical_tests
from modules.data_services.data_pipeline import load_data

ps_start = "2024-01-01"
ps_end = "2024-03-01"
interval = "1h"

df = load_data(
    tickers=tickers,
    start=ps_start,
    end=ps_end,
    interval=interval
)

In [7]:
# Statistical tests
corr_coint_tests = perform_statistical_tests(df)
display(corr_coint_tests)

NameError: name 'perform_statistical_tests' is not defined

#### Global parameters
- ##### Trading Range: 01.03.2024 - 01.04.2024
- ##### Interval: 1h
- ##### Fee Rate: 0.05% per Transaction
- ##### Position Size: Static, Always 100% (50% Long, 50% Short)

In [12]:
# Set parameters
trading_start = "2024-03-01"
trading_end = "2024-04-01"
fee_rate = 0.0005  # 0.05%
position_size = 1  # always 100% of portfolio

#### 1. Benchmark (Gatev et al. 2006)
- ##### Pair Selection Method: Top 5 of SSD of Cumulative Returns Test
- ##### Z-Score: Calculated from Cumulative Returns
- ##### Entry Threshold: Static
- ##### Exit Threshold: Static

In [13]:
# Pair Selection
from modules.pair_selection.statistical_tests import sum_of_standard_deviation

ssd_df = sum_of_standard_deviation(df)
pairs_1 = ssd_df.iloc[0:5, 0].tolist()
display(ssd_df.head(5))

Unnamed: 0,pair,ssd
0,DOGEUSDT-SHIBUSDT,1.316605
1,XRPUSDT-DOTUSDT,2.393193
2,ADAUSDT-DOTUSDT,2.591386
3,SHIBUSDT-LTCUSDT,3.702435
4,ADAUSDT-AVAXUSDT,3.805218


In [9]:
# Optimize hyperparameters
from skopt.space import Real, Integer

from modules.data_services.param_optimization import bayesian_optimization
from modules.performance.debug import run_strategy


param_space = [
    Integer(2, 30*24, name='window_in_steps'),
    Real(0.0, 10.0, name='entry_threshold'),
    Real(0.0, 10.0, name='exit_threshold'),
]

static_params = {
    'pairs': pairs_1,
    'trading_start': trading_start,
    'trading_end': trading_end,
    'interval': interval,
    'position_size': position_size,
    'z_score_method': 'cum_returns',
    'fee_rate': fee_rate
}

best_params_1, best_score_1, res_1 = bayesian_optimization(
    strategy_func=run_strategy,
    param_space=param_space,
    static_params=static_params,
    n_calls=40,
    random_state=42,
    minimize=False,
    metric_path=('sharpe_ratio', '0.05% fee', 'Summary')
)
print(f"Parameters: {best_params_1}")
print(f"Score: {best_score_1}")

Parameters: {'window_in_steps': np.int64(70), 'entry_threshold': 5.790528112881987, 'exit_threshold': 4.443517073270073}
Score: 3.5200037429160247


In [14]:
# Perform strategy
from modules.visualization.plots import plot_summary_pnl
from modules.performance.debug import run_strategy


# window_in_steps = best_params_1['window_in_steps']
# entry_threshold = best_params_1['entry_threshold']
# exit_threshold = best_params_1['exit_threshold']
window_in_steps = 100
entry_threshold = 2
exit_threshold = 0.5

portfolio_1 = run_strategy(
    pairs=pairs_1,
    trading_start=trading_start,
    trading_end=trading_end,
    interval=interval,
    position_size = position_size,
    z_score_method='rolling_beta',
    fee_rate=fee_rate,
    window_in_steps=window_in_steps,
    entry_threshold=entry_threshold,
    exit_threshold=exit_threshold
)

UnboundLocalError: cannot access local variable 'w_x' where it is not associated with a value

In [54]:
portfolio_1.pairs_data[0].data.to_excel("wyniki.xlsx", index=True)

In [15]:
# Show benchmark
plot_summary_pnl(portfolio_1, '1')
summary_brutto = portfolio_1.summary.xs("0% fee", level=1, axis=1)
summary_netto = portfolio_1.summary.xs(f"{portfolio_1.fee_rate * 100}% fee", level=1, axis=1)
display(Markdown("#### 0% fee"))
display(summary_brutto)

display(Markdown(f"#### {portfolio_1.fee_rate * 100}% fee"))
display(summary_netto)

#### 0% fee

Metric,total_return,cagr,volatility,annual_volatility,max_drawdown,win_count,lose_count,win_rate,max_win_pos,max_lose_pos,avg_trade_return,sharpe_ratio,sortino_ratio,calmar_ratio
Pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
DOGEUSDT-SHIBUSDT,-0.002289,-0.024269,8e-05,0.007494,-0.002289,0.0,1.0,0.0,0.0,-0.002289,-0.002289,-3.27447,,-10.603654
XRPUSDT-DOTUSDT,0.005232,0.05755,0.000183,0.017133,0.0,1.0,0.0,1.0,0.005232,0.0,0.005232,3.27447,,
ADAUSDT-DOTUSDT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
SHIBUSDT-LTCUSDT,0.034046,0.431847,0.001191,0.111481,0.0,1.0,0.0,1.0,0.034046,0.0,0.034046,3.27447,,
ADAUSDT-AVAXUSDT,0.003833,0.041875,0.000134,0.012552,0.0,1.0,0.0,1.0,0.003833,0.0,0.003833,3.27447,,
Summary,0.040822,0.535729,0.001213,0.113562,-0.002213,3.0,1.0,0.75,0.006809,-0.000458,0.010206,3.833295,,242.042827


#### 0.05% fee

Metric,total_return,cagr,volatility,annual_volatility,max_drawdown,win_count,lose_count,win_rate,max_win_pos,max_lose_pos,avg_trade_return,sharpe_ratio,sortino_ratio,calmar_ratio
Pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
DOGEUSDT-SHIBUSDT,-0.003271,-0.034523,9.9e-05,0.009223,-0.003271,0.0,2.0,0.0,0.0,-0.002771,-0.001636,-3.804426,-0.233298,-10.553263
XRPUSDT-DOTUSDT,0.004223,0.046215,0.000166,0.01556,-0.0005,1.0,1.0,0.5,0.004723,-0.0005,0.002111,2.911324,,92.430283
ADAUSDT-DOTUSDT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
SHIBUSDT-LTCUSDT,0.03305,0.417142,0.001175,0.109929,-0.0005,1.0,1.0,0.5,0.03355,-0.0005,0.016525,3.225279,,834.284556
ADAUSDT-AVAXUSDT,0.002822,0.030681,0.000118,0.011009,-0.0005,1.0,1.0,0.5,0.003322,-0.0005,0.001411,2.750484,,61.361389
Summary,0.036824,0.473643,0.001196,0.111894,-0.003168,3.0,5.0,0.375,0.00671,-0.000554,0.004603,3.520004,4.037261,149.501752


#### 2. Benchmark (Yang & Malik, 2024)
- ##### Pair Selection Method: Top 5 of Correlation of Log Returns and Engle-Granger Cointegration Test (Yang & Malik 2024)
- ##### Z-Score: calculated from cumulative returns
- ##### Entry Threshold: static
- ##### Exit Threshold: static

In [None]:
# Pair Selection
from modules.data_services.data_pipeline import merge_by_pair
from modules.pair_selection.statistical_tests import pearson_correlation, engle_granger_cointegration

corr_df = pearson_correlation(df, source='prices')
eg_df = engle_granger_cointegration(df)
merged_df = merge_by_pair(
    dfs=[corr_df, eg_df],
    keep_cols=[
        ['corr_prices'],
        ['eg_p_value']
    ]
)
merged_df['corr_prices * (1 - eg_p_value)'] = merged_df['corr_prices'] * (1 - merged_df['eg_p_value'])
merged_df.sort_values(by=['corr_prices * (1 - eg_p_value)'], ascending=False, inplace=True)
merged_df.reset_index(drop=True, inplace=True)
pairs_2 = merged_df.iloc[0:5, 0].tolist()
display(merged_df.head(5))

In [None]:
# Optimize hyperparameters
best_params_2 = ...

In [None]:
# Perform strategy
from modules.visualization.plots import plot_summary_pnl


window_in_steps = best_params_2['window_in_steps']
entry_threshold = best_params_2['entry_threshold']
exit_threshold = best_params_2['exit_threshold']

portfolio_2 = run_strategy(
    pairs=pairs_2,
    trading_start=trading_start,
    trading_end=trading_end,
    interval=interval,
    position_size=position_size,
    z_score_method='rolling_beta',
    fee_rate=fee_rate,
    window_in_steps=window_in_steps,
    entry_threshold=entry_threshold,
    exit_threshold=exit_threshold
)

In [None]:
# Show benchmark
plot_summary_pnl(portfolio_2, '2')
summary_brutto = portfolio_2.summary.xs("0% fee", level=1, axis=1)
summary_netto = portfolio_2.summary.xs(f"{portfolio_2.fee_rate * 100}% fee", level=1, axis=1)
display(Markdown("#### 0% fee"))
display(summary_brutto)

display(Markdown(f"#### {portfolio_2.fee_rate * 100}% fee"))
display(summary_netto)