#**Capstone project: Can ML Outperform Market-Neutral Trading in Structural Breaks?**

**Note:**

It's a good practise to "restart the session", whenever changes are made to the github repository


##**Step 1:** GitHub setup

1) Clone/Update the projects github repository

In [None]:
# Use for the first time to clone the github repo or when the repo is updated
%rm -rf /content/MarketNeutral_Trading_multiple_pairs/
%cd /content
!git clone https://github.com/WQU-Capstone-11205/MarketNeutral_Trading_multiple_pairs.git
%cd /content/MarketNeutral_Trading_multiple_pairs/

/content
Cloning into 'MarketNeutral_Trading_multiple_pairs'...
remote: Enumerating objects: 87, done.[K
remote: Counting objects: 100% (87/87), done.[K
remote: Compressing objects: 100% (86/86), done.[K
remote: Total 87 (delta 27), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (87/87), 1.16 MiB | 6.97 MiB/s, done.
Resolving deltas: 100% (27/27), done.
/content/MarketNeutral_Trading_multiple_pairs


2) Add the project's github repository's path to the system path

In [None]:
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append('/content/MarketNeutral_Trading_multiple_pairs')

3) Install projects required packages

In [None]:
!pip install -r requirements.txt



##**Step 2:** Data loading

Load data and convert to distance spread

In [None]:
from util.ff_benchmark import get_ff_benchmark_returns
from data_loading.fetch_data import fetch_from_yfinance
from data_loading.PairsSpread import SP500PairSpread

start_date = '2005-01-01' #'2019-01-01' # '2005-01-01'
end_date = '2025-01-01' # '2024-01-01' # '2025-01-01'
in_sample_cutoff_date = '2022-01-01' # '2017-01-01' # After this date the cointegration fails

# Fetch a benchmark for alpha/beta calculation (e.g., SPY)
benchmark_data = fetch_from_yfinance(['SPY'], start_date, end_date)
benchmark_returns = benchmark_data.pct_change().dropna()
train_spy_returns = benchmark_returns.loc[:in_sample_cutoff_date]
test_spy_returns = benchmark_returns.loc[in_sample_cutoff_date:]
ff_benchmark_returns = get_ff_benchmark_returns(in_sample_cutoff_date, end_date)

selector = SP500PairSpread(
    selection_start=start_date,
    selection_end=end_date,
    method="distance",
    pairs_per_sector=4
)

spread = selector.distance_spread()
train_spread = spread.loc[:in_sample_cutoff_date]
test_spread = spread.loc[in_sample_cutoff_date:]

spread_returns = spread.pct_change().dropna()
train_spread_returns = spread_returns.loc[:in_sample_cutoff_date]
test_spread_returns = spread_returns.loc[in_sample_cutoff_date:]

[*********************100%***********************]  1 of 1 completed


Fetching SP500 pairs from Wikipedia...
Calculating spread...


[*********************100%***********************]  66 of 66 completed


In [None]:
train_spread.describe()

Unnamed: 0,AIG-C,HBAN-KEY,KEY-RF,HBAN-RF,GOOG-GOOGL,OMC-VZ,T-VZ,CMCSA-DIS,EXC-FE,AEP-DUK,...,RSG-WM,EMR-EXPD,HAL-SLB,APA-DVN,BKR-DVN,EQT-HAL,MDT-PFE,DGX-JNJ,DGX-PFE,BMY-CVS
count,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,...,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0,4280.0
mean,5.821087,-0.307093,0.753031,0.242342,-0.163224,-2.681222,-0.062746,-1.206771,-1.054238,-0.563224,...,-1.263534,0.916288,-0.879826,4.878386,0.745935,-1.103581,2.109713,-0.537016,-2.42105,-0.739538
std,98.399855,1.185104,0.990434,1.528055,0.622632,8.67752,1.501129,2.192718,2.749002,4.820232,...,2.958463,7.409852,4.69264,9.03881,7.082437,5.723286,8.122337,9.232925,10.933464,5.156148
min,-271.078033,-5.222305,-3.516929,-4.796867,-1.672613,-37.062639,-4.61743,-10.614651,-8.777939,-9.771523,...,-11.60534,-28.720684,-15.819373,-22.612242,-20.299948,-17.43291,-52.575482,-32.526829,-38.596586,-13.499035
25%,-48.380334,-0.530445,0.058661,-0.509424,-0.339697,-6.534239,-0.646327,-2.119374,-2.774988,-3.808207,...,-2.680981,-2.790261,-4.48545,-1.931387,-4.188475,-4.967546,-2.890675,-7.778832,-11.254719,-3.573509
50%,-32.145687,0.019126,0.677491,0.656787,-0.065888,-0.730952,0.239755,-0.877538,-0.25224,-0.975035,...,-0.555367,0.81473,-0.723511,3.803082,1.64105,-0.854595,2.045907,2.1732,-0.206425,-1.492666
75%,-17.969971,0.415916,1.584427,1.353569,-0.045157,3.510241,0.828456,0.223641,0.744938,0.899417,...,0.731074,4.513111,2.915265,12.162998,6.124055,2.756196,5.711113,6.587581,5.723389,1.635664
max,362.497064,1.439274,2.987145,2.635334,5.59565,11.537453,3.387543,4.889337,6.950719,17.851456,...,4.91241,19.885394,11.205264,27.337386,16.524618,15.196343,28.354925,29.95372,18.874905,17.107266


##**Final Tuning/Training:**

In [None]:
from train.train_loop_rl import train_loop_rl

bocpd_params2 = {"hazard": 20, "mu": 0, "kappa": 0.3, "alpha": 1.0, "beta": 0.8}
vae_params2 = {'input_dim': 2, 'latent_dim': 12, 'hidden_dim': 256, 'lr': 0.001, 'vae_seq_len': 1, 'kl_wt': 0.01}
rl_params2 = {'state_dim': 12, 'hidden_dim': 64, 'lr': 1e-03, 'gamma': 0.99, 'cp_weight': 0.0, 'var_penalty': 0.01, 'var_window': 20, 'dd_penalty': 0.0, 'dd_threshold': 0.2, 'entropy_coef': 0.01, 'tau': 0.005}#, 'actor_l2': 1e-3}
joint_params2 = {'state_window': 25, 'base_action_sigma': 0.0, 'wt_multplier': 1.5, 'buffer_size_updates': 128, 'sample_batch_size': 16, 'transaction_cost': 0.001, 'tc_scale': 0.3, 'exploration_alpha': 0.8, 'update_every': 25}#50}


train_loop_rl(
    spreads=train_spread,
    bocpd_params=bocpd_params2,
    vae_params=vae_params2,
    rl_params=rl_params2,
    joint_params=joint_params2,
    num_epochs=20
)

In [None]:
from backtest.evaluate_loop_rl import evaluate_loop_rl

test_metrics, results = evaluate_loop_rl(
                    spreads=test_spread,
                    bocpd_params=bocpd_params2,
                    vae_params=vae_params2,
                    rl_params=rl_params2,
                    joint_params=joint_params2,
                    use_trained_rms=True
                )

##**Step 6:** Test metrics BOCPD+VAE+RL pipeline:

In [None]:
import numpy as np
from metrics.stats import sharpe_ratio, compute_max_drawdown, sortino_ratio, annual_volatility, alpha_beta

print("Test metrics for BOCPD+VAE+RL model:")
cum_pnl = np.cumsum(test_metrics['portfolio_pnl'])
print(f"Cummulative profit and loss                       : {cum_pnl[-1]:.6f}")
print(f"Sharpe Ratio                                      : {sharpe_ratio(test_metrics['portfolio_pnl']):.6f}")
print(f"Sortino ratio                                     : {sortino_ratio(test_metrics['portfolio_pnl']):.6f}")
print(f"Max drawdown                                      : {compute_max_drawdown(np.cumsum(test_metrics['portfolio_pnl'])):.6f}")
print(f"Annual Volatility                                 : {annual_volatility(test_metrics['portfolio_pnl']):.6f}")
alpha_ff, beta_ff = alpha_beta(test_metrics['portfolio_pnl'], ff_benchmark_returns)
print(f"Factor-adjusted Alpha-Beta                        : alpha = {alpha_ff:.6f} , beta = {beta_ff:.6f}")
alpha_spy, beta_spy = alpha_beta(test_metrics['portfolio_pnl'], test_spy_returns)
print(f"CAPM Alpha-Beta versus SPY                        : alpha = {alpha_spy:.6f} , beta = {beta_spy:.6f}")
