# Deep Reinforcement Learning for Portfolio Optimization


This experiment demonstrates the application of deep reinforcement learning (DRL) techniques for portfolio optimization.

- The experiment leverages the **Attention Feature Extractor** to integrate **temporal self-attention** and **inter-asset graph-attention** mechanisms. 
- This feature extractor processes historical data and asset relationships to generate embeddings that are used by reinforcement learning models for portfolio optimization.
- The **inter-asset graph-attention** captures cross-asset relationships.
- Applies a **Multi-Head self-attention** mechanism across the per-asset embeddings, enabling each asset to “attend” to its peers before passing through the final MLP for feature extraction.

Algotihms:
- **Off-policy algorithms**: `SAC`, `DDPG`, `TD3` (use Temporal Self Attention-based models).  
- **On-policy algorithms**: `A2C`, `PPO` (do not use Temporal Self Attention-based models).

## Dependencies


In [19]:
# ! pip install pandas numpy matplotlib \
#                ipywidgets \
#                xarray \
#                stable-baselines3 \
#                PyPortfolioOpt \
#                pandas_market_calendars quantstats gymnasium \
#                git+https://github.com/AI4Finance-Foundation/FinRL.git -q

In [20]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import time
from tqdm.auto import tqdm

import torch
import torch.nn as nn
from torch.nn import TransformerEncoderLayer, LayerNorm

import gymnasium as gym
from gymnasium import ObservationWrapper
 
from stable_baselines3 import A2C, PPO, SAC, DDPG, TD3
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

from finrl import config_tickers
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import (
    backtest_stats,
    get_daily_return,
    get_baseline, backtest_plot
)
from finrl.meta.env_portfolio_optimization.env_portfolio_optimization import (
    PortfolioOptimizationEnv,
)
from finrl.agents.portfolio_optimization.models import DRLAgent as PGAgent
from finrl.agents.portfolio_optimization.architectures import EIIE

from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models

## Configs

In [21]:
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

%matplotlib inline

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

experiment_name = "attention_grapgh_multi_agent"
results_dir = f"results/models/{experiment_name}"
os.makedirs(results_dir, exist_ok=True)

Using device: cpu


## Data loading and pre-processing


Define training and trading/test periods


In [23]:
start_date = "2015-01-01"
end_date = (datetime.now() - pd.Timedelta(days=1)).strftime("%Y-%m-%d")  # Yesterday

trade_period = 2  # 2 years for testing
train_period = 10  # 10 years for training

train_end_date = (
    datetime.strptime(end_date, "%Y-%m-%d") - timedelta(days=trade_period * 365)
).strftime("%Y-%m-%d")
train_start_date = (
    datetime.strptime(train_end_date, "%Y-%m-%d") - timedelta(days=train_period * 365)
).strftime("%Y-%m-%d")
test_start_date = (
    datetime.strptime(train_end_date, "%Y-%m-%d") + timedelta(days=1)
).strftime("%Y-%m-%d")

train_dates = (train_start_date, train_end_date)
test_dates = (test_start_date, end_date)

print(f"Training period: {train_dates}")
print(f"Testing period: {test_dates}")

Training period: ('2013-05-05', '2023-05-03')
Testing period: ('2023-05-04', '2025-05-02')


- Fetch historical stock data for a given list of tickers within a specified date range.
- We use the DOW_30_TICKER stocks
- The data includes `date`, `close`, `high`, `low`, `open`, `volume`, and `tic` (ticker symbol).


In [24]:
def download_data(tickers, start_date, end_date):
    print(f"Downloading {start_date} → {end_date}")
    return YahooDownloader(
        start_date=start_date, end_date=end_date, ticker_list=tickers
    ).fetch_data()


# Uncomment the following line to use the DOW 30 tickers
# tickers = config_tickers.DOW_30_TICKER
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "TSLA", "JPM"]
df = download_data(tickers, start_date, end_date)

Downloading 2015-01-01 → 2025-05-02


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (18186, 8)


### Feature Engineering

We apply feature engineering to the dataset of stock data:

- Add technical indicators (e.g., moving averages, RSI).
- Calculate turbulence indicators, which measure market volatility.

This Enhance the dataset with features that are critical for modeling market dynamics and making informed trading decisions.


In [25]:
def preprocess_data(df):
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=["macd", "cci", "rsi", "dx"],  # TODO: Check
        use_turbulence=False,
    )
    return fe.preprocess_data(df)


df_feat = preprocess_data(df)

# TODO: Normalise the data??

Successfully added technical indicators


## Covariance & Returns for State


- Calculate the rolling covariance matrices and daily returns for the given dataset of stock prices.
- This prepares the state representation (the state of the portfolio) for the RL models in the RL environments for portfolio optimization.
- The **rolling covariance matrices** (`cov_list`) capture the relationships between asset returns, while the daily returns (`return_list`) provide information about recent price movements.
- These metrics are critical for modeling the dynamics of the financial market and making informed trading decisions.


In [26]:
def compute_covariance_and_returns(df_feat, lookback=252):
    print(f"Computing covariance and returns with lookback {lookback}")
    
    df_sorted = df_feat.sort_values(["date", "tic"], ignore_index=True)
    df_sorted.index = df_sorted.date.factorize()[0]
    cov_list, return_list = [], []

    dates = df_sorted.date.unique()
    for i in tqdm(range(lookback, len(dates)),
                  total=len(dates) - lookback,
                  desc="Computing cov/returns"):
        win = df_sorted.loc[i - lookback : i]
        pm = win.pivot_table(index="date", columns="tic", values="close")
        rm = pm.pct_change().dropna()
        cov_list.append(rm.cov().values)
        return_list.append(rm)
    df_cov = pd.DataFrame(
        {"date": dates[lookback:], "cov_list": cov_list, "return_list": return_list}
    )

    return pd.merge(df_feat, df_cov, on="date", how="left").dropna(subset=["cov_list"])


df_all = compute_covariance_and_returns(df_feat)

Computing covariance and returns with lookback 252


Computing cov/returns:   0%|          | 0/2346 [00:00<?, ?it/s]

In [27]:
display(f"Shape of df_all: {df_all.shape}")

'Shape of df_all: (16422, 14)'

## Train/Trade split


In [28]:
def split_data(df_all, train_dates, test_dates):
    print(f"Splitting data into train and test sets")
    
    train = data_split(df_all, *train_dates)
    test = data_split(df_all, *test_dates)
    return train, test


train_df, test_df = split_data(df_all, train_dates, test_dates)

print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")

Splitting data into train and test sets
Train shape: (12915, 14)
Test shape: (3500, 14)


## Environment setup


Casts every observation to np.float16.

In [29]:
print("Setting up the environment")

class CastObservationToFloat16(ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
    def observation(self, obs):
        return obs.astype(np.float16)

Setting up the environment


- Create instances of the **PortfolioOptimizationEnv** class for both training and testing datasets.
- It also wrap the training environment for use with Stable-Baselines3 (SB3).


In [30]:
def initialize_portfolio_env(df, time_window=30):
    env = PortfolioOptimizationEnv(
        df,
        initial_amount=100_000,
        comission_fee_pct=0.0025,
        time_window=time_window,  # now 30 instead of 50
        features=["close", "high", "low"],
        normalize_df=None,
        new_gym_api=True,
    )
    env.df = df.reset_index(drop=True)

    # wrap to cast obs to float16
    env = CastObservationToFloat16(env)
    return env

train_env = initialize_portfolio_env(train_df, time_window=30)
test_env = initialize_portfolio_env(test_df, time_window=30)

In [31]:
# raise ValueError("Test error")

## Inter-Asset Graph Attention

Apply a Multi-Head self-attention across the per-asset embeddings, so each asset “attends” to its peers before the final MLP

In [32]:
class InterAssetGraphAttention(nn.Module):
    def __init__(self, d_model=64, nhead=4, dropout=0.1):
        super().__init__()
        # PyTorch’s MultiheadAttention as a graph layer
        #   embed_dim = d_model, nhead heads
        self.attn = nn.MultiheadAttention(
            embed_dim=d_model, num_heads=nhead, dropout=dropout, batch_first=True
        )
        self.norm = LayerNorm(d_model)

    def forward(self, emb: torch.Tensor) -> torch.Tensor:
        # MultiheadAttention expects (batch, seq, feature) when batch_first=True
        # here seq = n_assets
        attn_out, _ = self.attn(emb, emb, emb)
        # residual + norm
        return self.norm(attn_out + emb)

## Attention Feature Extractor


- Combines temporal self-attention per asset with inter-asset graph-attention,
- Then reduces via MLP to a fixed-size feature vector for SB3

In [33]:
class AttentionFeatureExtractor(BaseFeaturesExtractor):
    def __init__(
        self,
        observation_space,
        feature_dim: int,
        n_assets: int,
        time_window: int,
        d_model: int = 64,
        nhead: int = 4,
    ):
        """
        Combines temporal self-attention per asset with inter-asset graph-attention,
        then reduces via MLP to a fixed-size feature vector for SB3.

        - feature_dim: number of input features per asset (e.g., 3 for close/high/low)
        - n_assets: size of portfolio (number of tickers)
        - time_window: history length (number of timesteps)
        - d_model: embedding dimension for attention
        - nhead: attention heads
        """
        # internal dims
        self.feature_dim = feature_dim
        self.n_assets = n_assets
        self.time_window = time_window
        self.d_model = d_model
        # flattened embedding before MLP: per-asset embeddings + weights
        flat_emb_dim = n_assets * d_model + n_assets
        # final output dimension after extractor MLP
        mlp_output_dim = 256
        super().__init__(observation_space, features_dim=mlp_output_dim)

        # --- Attention blocks ---
        # per-timestep projection from feature_dim -> d_model
        self.input_proj = nn.Linear(feature_dim, d_model)
        # temporal transformer encoder (per-asset)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, batch_first=True
        )
        self.temporal = nn.TransformerEncoder(encoder_layer, num_layers=1)
        # inter-asset graph attention
        self.graph_attn = InterAssetGraphAttention(d_model=d_model, nhead=nhead)

        # --- MLP head to reduce to mlp_output_dim ---
        self.mlp = nn.Sequential(
            nn.Linear(flat_emb_dim, mlp_output_dim),
            nn.ReLU(),
            nn.Linear(mlp_output_dim, mlp_output_dim),
            nn.ReLU(),
        )

    def forward(self, obs: torch.Tensor) -> torch.Tensor:
        # obs is flat: [batch, obs_dim]
        b = obs.size(0)
        flat = obs.reshape(b, -1)

        # extract history part and reshape to [b, feature_dim, n_assets, time_window]
        hist_size = self.feature_dim * self.n_assets * self.time_window
        hist_flat = flat[:, :hist_size]
        hist = hist_flat.reshape(b, self.feature_dim, self.n_assets, self.time_window)

        # 1) Temporal Self-Attention per asset
        #    → [b, n_assets, time_window, feature_dim]
        x = hist.permute(0, 2, 3, 1)
        # project features -> d_model → [b, n_assets, time_window, d_model]
        x = self.input_proj(x)
        # merge batch+asset dims → [b*n_assets, time_window, d_model]
        bna = b * self.n_assets
        x = x.reshape(bna, self.time_window, self.d_model)
        # apply transformer
        x = self.temporal(x)                 # [b*n, t, d_model]
        x = x.mean(dim=1)                   # [b*n, d_model]
        # restore batch dims → [b, n_assets, d_model]
        x = x.reshape(b, self.n_assets, self.d_model)

        # 2) Inter-Asset Graph Attention → [b, n_assets, d_model]
        x = self.graph_attn(x)

        # 3) Extract current weights (last n_assets entries)
        weights = flat[:, -self.n_assets:]

        # 4) Concatenate embeddings + weights → [b, flat_emb_dim]
        emb_flat = x.reshape(b, self.n_assets * self.d_model)
        concat = torch.cat([emb_flat, weights], dim=1)

        # 5) MLP to mlp_output_dim
        return self.mlp(concat)

## Model Configs


Configure model algorithms


In [34]:
# obs_shape = train_env.observation_space.shape  # (features, n_assets, time_window)


def prepare_model_configs():

    policy_kwargs = dict(
        features_extractor_class=AttentionFeatureExtractor,
        features_extractor_kwargs=dict(
            feature_dim=3,  # number of features/asset
            n_assets=7,  # your ticker count
            time_window=30,  # match your env
            d_model=64,
            nhead=4,
        ),
    )

    return [
        # (
        #     SAC,
        #     "SAC",
        #     dict(
        #         learning_rate=0.0007,
        #         gamma=0.99,
        #         buffer_size=50_000,
        #         tau=0.005,
        #         policy_kwargs=policy_kwargs,
        #     ),
        # ),
        # (
        #     TD3,
        #     "TD3",
        #     dict(
        #         learning_rate=0.0007,
        #         gamma=0.99,
        #         buffer_size=50_000,
        #         tau=0.005,
        #         policy_kwargs=policy_kwargs,
        #     ),
        # ),
        (
            DDPG,
            "DDPG",
            dict(
                learning_rate=0.0001,
                gamma=0.99,
                buffer_size=50_000,
                tau=0.005,
                policy_kwargs=policy_kwargs,
            ),
        ),
        # A2C/PPO entries will ignore the extractor.
        # (
        #     A2C,
        #     "A2C",
        #     dict(
        #         learning_rate=0.0007,
        #         gamma=0.99,
        #         n_steps=5000,
        #         ent_coef=0.0001,
        #         policy_kwargs=dict(net_arch=[64, 64]),
        #     ),
        # ),
        # (
        #     PPO,
        #     "PPO",
        #     dict(
        #         learning_rate=0.0007,
        #         gamma=0.99,
        #         n_steps=2048,
        #         clip_range=0.2,
        #         policy_kwargs=dict(net_arch=[64, 64]),
        #     ),
        # ),
    ]


model_configs = prepare_model_configs()

## Training


Train multiple reinforcement learning (RL) models using the specified training environment and configuration.


In [35]:
def train_models(agent, model_configs, results_dir, total_timesteps=200_000):
    training_times = {}
    trained_models = {}

    for model_class, model_name, model_kwargs in model_configs:
        print(f"Training {model_name}...")
        start_time = time.time()

        # 1) Copy your model_kwargs and pop off the policy_kwargs bundle
        mk = model_kwargs.copy()
        pk = mk.pop("policy_kwargs", {}) or {}

        # 2) If this is an *off-policy* algo, strip out use_sde flags
        if model_name.lower() in ("sac", "td3", "ddpg"):
            pk.pop("use_sde", None)
            pk.pop("use_sde_at_warmup", None)

        # 3) Call get_model exactly once with model_kwargs and policy_kwargs
        model = agent.get_model(
            model_name=model_name.lower(),
            model_kwargs=mk,
            policy_kwargs=pk,
        )

        trained_model = agent.train_model(
            model,
            tb_log_name=f"{experiment_name}_{model_name.lower()}",
            total_timesteps=total_timesteps,
        )

        # Save & record times
        model_path = f"{results_dir}/{model_name.lower()}_model"
        trained_model.save(model_path)
        trained_models[model_name] = trained_model
        training_times[model_name] = (time.time() - start_time) / 60
        print(f"{model_name} training completed in {training_times[model_name]:.2f} minutes.")

    return trained_models, training_times


In [None]:
trained_models, training_times = train_models(
    DRLAgent(train_env), model_configs, results_dir, total_timesteps=50_000
)

Training DDPG...
{'learning_rate': 0.0001, 'gamma': 0.99, 'buffer_size': 50000, 'tau': 0.005}
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Initial portfolio value:100000
Final portfolio value: 522368.15625
Final accumulative portfolio value: 5.223681449890137
Maximum DrawDown: -0.4261020658406679
Sharpe ratio: 1.0658298148003997
Initial portfolio value:100000
Final portfolio value: 444811.6875
Final accumulative portfolio value: 4.448116779327393
Maximum DrawDown: -0.44828097333521755
Sharpe ratio: 0.9862650174738314
Initial portfolio value:100000
Final portfolio value: 501069.59375
Final accumulative portfolio value: 5.010695934295654
Maximum DrawDown: -0.4482803305300813
Sharpe ratio: 1.048955705071255
Initial portfolio value:100000
Final portfolio value: 431460.875
Final accumulative portfolio value: 4.314608573913574
Maximum DrawDown: -0.36687694625374445
Sharpe ratio: 1.0435834925190939
-------------------------------------
| rollou

In [None]:
training_times_df = pd.DataFrame(
    list(training_times.items()), columns=["model", "training_duration (min)"]
)
training_times_df.to_csv(f"{results_dir}/training_times.csv", index=False)

print("Training summary:")
display(training_times_df)

## Model loading


Load the trained models from memory for analysis without the need for time consuming retraining


In [None]:
def load_models(model_configs, results_dir):
    models = {}
    for model_class, name, _ in model_configs:
        model_path = f"{results_dir}/{name.lower()}_model.zip"
        if os.path.exists(model_path):
            print(f"Loading saved model for {name}...")
            models[name] = model_class.load(model_path)
        else:
            print(f"No saved model found for {name}.")
    return models


# If you already trained above you can skip this; otherwise:
# trained_models = load_models(model_configs, results_dir)

## Backtesting


- Evaluates the performance of the RL models/algorithms in a trading environment.
- We do this by calculating the **cumulative portfolio value** and **performance metrics** for each RL model.


In [None]:
env_kwargs = {"initial_amount": 100_000}

In [None]:
def manual_backtest(model, env, initial_amount):
    """
    Runs the env step by step, predicting actions with `model`, and
    builds a DataFrame of dates, daily returns, and account values.
    """
    # reset and get initial obs
    obs, _ = env.reset()
    done = False

    # start from initial capital
    portfolio_value = initial_amount

    dates, daily_rets, account_vals = [], [], []

    while not done:
        # predict an action
        action, _ = model.predict(obs, deterministic=True)
        # step the environment
        result = env.step(action)

        # handle gymnasium vs. gym return signature
        if len(result) == 5:
            obs, reward, terminated, truncated, info = result
            done = terminated or truncated
        else:
            obs, reward, done, info = result

        # update portfolio value from the reward
        portfolio_value *= 1 + reward

        # record
        daily_rets.append(reward)
        account_vals.append(portfolio_value)

        # grab the current date from info
        dates.append(info["end_time"])

    return pd.DataFrame(
        {
            "date": dates,
            "daily_return": daily_rets,
            "account_value": account_vals,
        }
    )


def backtest_rl_strategies_manual(models, test_env, env_kwargs):
    out = {}
    for name, model in models.items():
        print(f"\nManual backtest: {name}")
        df_ret = manual_backtest(model, test_env, env_kwargs["initial_amount"])
        # ensure it's sorted by date
        df_ret = df_ret.sort_values("date").reset_index(drop=True)

        # compute performance stats
        stats = backtest_stats(df_ret, value_col_name="account_value")
        out[name] = {"df": df_ret, "stats": stats}
    return out


results = backtest_rl_strategies_manual(trained_models, test_env, env_kwargs)

### Plotting


In [None]:
def plot_backtest_results():
    os.makedirs(f"{results_dir}/backtest_plots", exist_ok=True)
    for name, res in results.items():
        print(f"Plotting {name}…")
        backtest_plot(
            account_value=res["df"],
            baseline_start=test_start_date,
            baseline_end=end_date,
            baseline_ticker="SPY",
            value_col_name="account_value",
        )

plot_backtest_results()

In [None]:
def plot_cumulative_returns(results):
    plt.figure(figsize=(12, 8))
    for name, res in results.items():
        df = res["df"]
        # compute cumulative returns from account_value
        cum = df["account_value"] / df["account_value"].iloc[0] - 1
        plt.plot(df["date"], cum, label=name)
    plt.title("Cumulative Returns vs. SPY")
    plt.xlabel("Date")
    plt.ylabel("Cumulative Return")
    plt.legend()
    plt.show()


plot_cumulative_returns(results)

## Benchmarks


In [None]:
def compute_mpt_benchmark(test, env_kwargs):
    dates_test = test.date.unique()
    min_vals = [env_kwargs["initial_amount"]]
    for i in range(len(dates_test) - 1):
        curr = test[test.date == dates_test[i]]
        nxt = test[test.date == dates_test[i + 1]]
        covm = np.array(curr.cov_list.values[0])
        ef = EfficientFrontier(None, covm, weight_bounds=(0, 1))
        ef.min_volatility()
        w = ef.clean_weights()
        prices = curr.close.values
        nextp = nxt.close.values
        shares = np.array(list(w.values())) * min_vals[-1] / prices
        min_vals.append(np.dot(shares, nextp))
    min_df = pd.DataFrame({"date": dates_test, "account_value": min_vals})
    stats_mpt = backtest_stats(min_df, value_col_name="account_value")
    return {"df": min_df, "stats": stats_mpt}


mpt_benchmark = compute_mpt_benchmark(test_df, env_kwargs)

In [None]:
def compute_equal_weighted_benchmark(test, env_kwargs):
    ew_daily = test.groupby("date")["close"].apply(
        lambda d: d.pct_change().fillna(0).mean()
    )

    ew_df = ew_daily.reset_index(name="daily_return")
    ew_df["account_value"] = (ew_df.daily_return + 1).cumprod() * env_kwargs[
        "initial_amount"
    ]
    stats_ew = backtest_stats(ew_df, value_col_name="account_value")
    return {"df": ew_df, "stats": stats_ew}


ew_benchmark = compute_equal_weighted_benchmark(test_df, env_kwargs)

In [None]:
def compute_equal_weighted_benchmark(df, initial_amount=100_000):
    # Pivot to have one column per ticker
    price_wide = df.pivot_table(
        index="date", columns="tic", values="close"
    ).sort_index()

    # Compute each ticker's daily return, then average equally
    daily_rets = price_wide.pct_change().fillna(0).mean(axis=1)

    # Build the equity curve
    ew_df = pd.DataFrame({"date": daily_rets.index, "daily_return": daily_rets.values})
    ew_df["account_value"] = (ew_df["daily_return"] + 1).cumprod() * initial_amount

    # Compute performance statistics
    stats_ew = backtest_stats(ew_df, value_col_name="account_value")

    return {"df": ew_df.reset_index(drop=True), "stats": stats_ew}


ew_benchmark = compute_equal_weighted_benchmark(test_df, env_kwargs["initial_amount"])

In [None]:
def compute_spy_benchmark(test, env_kwargs):
    spy_close = get_baseline("SPY", test.date.min(), test.date.max())["close"]
    spy_ret = spy_close.pct_change().dropna()
    spy_df = pd.DataFrame({"date": spy_ret.index, "daily_return": spy_ret.values})
    spy_df["account_value"] = (spy_df.daily_return + 1).cumprod() * env_kwargs[
        "initial_amount"
    ]
    stats_spy = backtest_stats(spy_df, value_col_name="account_value")
    return {"df": spy_df, "stats": stats_spy}


spy_benchmark = compute_spy_benchmark(test_df, env_kwargs)

In [None]:
benchmarks = {
    "MPT": mpt_benchmark,
    "EW": ew_benchmark,
    "SPY": spy_benchmark,
}

results.update(benchmarks)

## Performance Summary


In [None]:
perf_stats = pd.DataFrame({key.upper(): res["stats"] for key, res in results.items()})
display(perf_stats)

In [None]:
metrics_to_include = [
    "Cumulative returns",
    "Annual return",
    "Annual volatility",
    "Sharpe ratio",
    "Max drawdown",
]
filtered_perf_stats = perf_stats.loc[metrics_to_include]

filtered_perf_stats.T.plot(kind="bar", figsize=(14, 8), legend=True)
plt.title("Performance Metrics Comparison")
plt.ylabel("Metric Value")
plt.xlabel("Models")
plt.xticks(rotation=45)
plt.legend(loc="upper left", bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()

In [None]:
def plot_cumulative_returns(results):
    plt.figure(figsize=(12, 8))
    for name, res in results.items():
        # Ensure the date column is converted to datetime
        res["df"]["date"] = pd.to_datetime(res["df"]["date"])
        # Filter data to start from the trade start date
        filtered_df = res["df"][res["df"]["date"] >= test_start_date]
        cum = (
            (filtered_df["daily_return"] + 1).cumprod() - 1
            if "daily_return" in filtered_df
            else filtered_df["account_value"] / filtered_df["account_value"].iloc[0] - 1
        )
        plt.plot(filtered_df["date"], cum, label=name)
    plt.title("Cumulative Returns")
    plt.xlabel("Date")
    plt.ylabel("Cumulative Return")
    plt.legend()
    plt.show()


plot_cumulative_returns(results)