In [10]:
import pandas as pd
from IPython.display import display

from modules.pair_selection.statistical_tests import perform_statistical_tests, sum_of_standard_deviation
from modules.data_services.data_pipeline import load_pair, load_data
from modules.performance.strategy import generate_action_space, benchmark_strategy, calculate_stats
from modules.data_services.data_models import PortfolioData
from modules.data_services.normalization import cumulative_returns_index

### Asset list
Market Cap Ranking Comparison Based on Historical CoinMarketCap Snapshots.

In [2]:
pd.read_excel('../data/asset_list.xlsm', dtype={'07.01.2024': 'Int64', '05.10.2025': 'Int64'}).iloc[:15, :2]

Unnamed: 0,Name,Symbol
0,Bitcoin,BTC
1,Ethereum,ETH
2,BNB,BNB
3,Solana,SOL
4,XRP,XRP
5,Cardano,ADA
6,Avalanche,AVAX
7,Dogecoin,DOGE
8,TRON,TRX
9,Polkadot,DOT


In [3]:
tickers = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT", "ADAUSDT", "AVAXUSDT", "DOGEUSDT", "TRXUSDT",
           "DOTUSDT", "LINKUSDT", "SHIBUSDT", "LTCUSDT", "BCHUSDT", "UNIUSDT"]

### Benchmark

#### 1.1 Basic pair selection
- ##### Pair Selection Range: 01.01.2024 - 01.03.2024
- ##### Interval: 1h

In [4]:
ps_start = "2024-01-01"
ps_end = "2024-03-01"
interval = "1h"

df = load_data(
    tickers=tickers,
    start=ps_start,
    end=ps_end,
    interval=interval
)

In [5]:
corr_coint_tests = perform_statistical_tests(df)
display(corr_coint_tests)

Unnamed: 0,pair,corr_prices,corr_returns,corr_log_returns,eg_p_value,trace_stat - crit_95
0,ADAUSDT-AVAXUSDT,0.844651,0.762790,0.764557,0.299380,-5.955607
1,ADAUSDT-BCHUSDT,0.846158,0.599222,0.605413,0.060705,-0.529435
2,ADAUSDT-DOGEUSDT,0.734129,0.582758,0.596864,0.814421,-10.053250
3,ADAUSDT-DOTUSDT,0.917982,0.824841,0.826138,0.132649,-4.259059
4,ADAUSDT-LINKUSDT,0.544536,0.697393,0.700111,0.680250,-4.715633
...,...,...,...,...,...,...
100,XRPUSDT-LINKUSDT,-0.214749,0.625080,0.629183,0.903945,-1.871872
101,XRPUSDT-LTCUSDT,0.473618,0.658723,0.666960,0.912118,-6.890417
102,XRPUSDT-SHIBUSDT,0.703735,0.646653,0.654380,0.993766,-7.244699
103,XRPUSDT-TRXUSDT,-0.126484,0.411531,0.414024,0.990972,-6.429498


#### 1.2.1 Benchmark (Gatev et al. 2006)
- ##### Pair Selection Method: Top 5 of SSD of Cumulative Returns Test
- ##### Trading Range: 01.03.2024 - 01.04.2024
- ##### Interval: 1h
- ##### Z-Score: Calculated from Cumulative Returns
- ##### Entry Threshold: Static, 2σ / -2σ
- ##### Exit Threshold: Static, 0σ
- ##### Position Size: Static, Always 100% (50% Long, 50% Short)
- ##### Fee Rate: 0.05% per Position

In [11]:
ssd_df = sum_of_standard_deviation(cumulative_returns_index(df))
display(ssd_df.head(5))

AttributeError: 'DataFrame' object has no attribute 'data'

In [None]:
from modules.visualization.plots import plot_zscore, plot_pnl, plot_positions
from modules.data_services.z_score_calculation import calculate_rolling_zscore
from modules.data_services.normalization import cumulative_returns_index

b_start = "2024-03-01"
b_end = "2024-04-01"

fee_rate = 0.0005
entry_threshold = 2
exit_threshold = 0
position_size = 1

pairs = ssd_df.iloc[0:5, 0].tolist()

portfolio = PortfolioData()
for pair in pairs:
    x, y = pair.split('-')
    data = load_pair(x=x, y=y, start=b_start, end=b_end, interval=interval)
    normalized_data = cumulative_returns_index(data)
    calculate_rolling_zscore(normalized_data, rolling_window=...)
    generate_action_space(pair_data=normalized_data, entry_threshold=entry_threshold, exit_threshold=exit_threshold, position_size=1)
    benchmark_strategy(pair_data=data, initial_cash=1000000, fee_rate=fee_rate)
    data.fee_rate = fee_rate
    data.stats = calculate_stats(data)
    portfolio.pairs_data.append(data)
    plot_zscore(data, '1.2.1', True)
    plot_pnl(data, '1.2.1')
    plot_positions(data, '1.2.1')

In [None]:
for pair_data in portfolio.pairs_data:
    print(pair_data.x, pair_data.y)
    print(pair_data.data)
    print(pair_data.stats)

In [None]:
rows = []
for pair_data in portfolio.pairs_data:
    x = pair_data.x
    y = pair_data.y
    stats = pair_data.stats

    rows.append({
        "pair": f"{x}-{y}",
        "total_return_brutto": stats["total_return_brutto"],
        "total_return_netto": stats["total_return_netto"],
        "sharpe_brutto": stats["sharpe_brutto"],
        "sharpe_netto": stats["sharpe_netto"],
        "max_drawdown_brutto": stats["max_drawdown_brutto"],
        "max_drawdown_netto": stats["max_drawdown_netto"]
    })

results_df = pd.DataFrame(rows)
results_df

#### 1.2.2 Benchmark
- ##### Pair Selection Method: Top 5 of Correlation of Log Returns and Engle-Granger Cointegration Test (Yang & Malik 2024)
- ##### Trading Range: 01.03.2024 - 01.04.2024
- ##### Interval: 1h
- ##### Z-Score: calculated from cumulative returns
- ##### Entry Threshold: static, 2σ / -2σ
- ##### Exit Threshold: static, 0σ
- ##### Position size: static, always 100% (50% long, 50% short)
- ##### Fee rate: 0.05% per position

#### 2.1.1 Pair Selection
- ##### Pair Selection Method: Top 5 of Correlation of Log Returns and Engle-Granger Cointegration Test (Yang & Malik 2024)
- ##### Trading Range: 01.03.2024 - 01.04.2024
- ##### Interval: 1h
- ##### Z-Score: calculated from cumulative returns
- ##### Entry Threshold: static, 2σ / -2σ
- ##### Exit Threshold: static, 0σ
- ##### Position size: static, always 100% (50% long, 50% short)
- ##### Fee rate: 0.05% per position