# Deep Reinforcement Learning for Portfolio Optimization - Constrained-DRL framework


This experiement demonstrates the application of deep reinforcement learning (DRL) techniques for portfolio optimization.

- Policy network architecture: **MLP backbone**
- Compares `A2C`, `PPO`, `SAC`, `DDPG`, `TD3` all with simple MLPs


## Dependencies


In [2]:
# ! pip install pandas numpy matplotlib \
#                stable-baselines3 \
#                PyPortfolioOpt \
#                pandas_market_calendars quantstats gymnasium \
#                git+https://github.com/AI4Finance-Foundation/FinRL.git -q

In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import time

from tqdm.auto import tqdm

import torch

from stable_baselines3 import A2C, PPO, SAC, DDPG, TD3
from stable_baselines3.common.noise import NormalActionNoise

from finrl import config
from finrl import config_tickers
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent, DRLEnsembleAgent
from finrl.plot import backtest_stats, get_daily_return, get_baseline, backtest_plot

from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models



In [4]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

%matplotlib inline

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [None]:
experiment_name = "mlp_constrained_drl"
results_dir = f"results/models/{experiment_name}"
os.makedirs(results_dir, exist_ok=True)

## Data loading and pre-processing


Define training and trading/test periods


In [7]:
start_date = "2015-01-01"
end_date = (datetime.now() - pd.Timedelta(days=1)).strftime("%Y-%m-%d")  # Yesterday

trade_period = 2  # 2 years for testing
train_period = 10  # 10 years for training

train_end_date = (
    datetime.strptime(end_date, "%Y-%m-%d") - timedelta(days=trade_period * 365)
).strftime("%Y-%m-%d")
train_start_date = (
    datetime.strptime(train_end_date, "%Y-%m-%d") - timedelta(days=train_period * 365)
).strftime("%Y-%m-%d")
test_start_date = (
    datetime.strptime(train_end_date, "%Y-%m-%d") + timedelta(days=1)
).strftime("%Y-%m-%d")

train_dates = (train_start_date, train_end_date)
test_dates = (test_start_date, end_date)

print(f"Training period: {train_dates}")
print(f"Testing period: {test_dates}")

Training period: ('2013-05-06', '2023-05-04')
Testing period: ('2023-05-05', '2025-05-03')


- Fetch historical stock data for a given list of tickers within a specified date range.
- We use the DOW_30_TICKER stocks
- The data includes `date`, `close`, `high`, `low`, `open`, `volume`, and `tic` (ticker symbol).


In [8]:
def download_data(tickers, start_date, end_date):
    print(f"Downloading {start_date} → {end_date}")
    return YahooDownloader(
        start_date=start_date, end_date=end_date, ticker_list=tickers
    ).fetch_data()


df = download_data(config_tickers.DOW_30_TICKER, start_date, end_date)

Downloading 2015-01-01 → 2025-05-03


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Shape of DataFrame:  (76911, 8)


---

We apply feature engineering to the dataset of stock data:

- Add technical indicators (e.g., moving averages, RSI).
- Calculate turbulence indicators, which measure market volatility.

This Enhance the dataset with features that are critical for modeling market dynamics and making informed trading decisions.


In [9]:
def preprocess_data(df):
    fe = FeatureEngineer(use_technical_indicator=True, use_turbulence=True)
    return fe.preprocess_data(df)


df_feat = preprocess_data(df)

# TODO: Normalise the data??

Successfully added technical indicators
Successfully added turbulence index


## Covariance & Returns for State


- Calculate the rolling covariance matrices and daily returns for the given dataset of stock prices.
- This prepares the state representation (the state of the portfolio) for the RL models in the RL environments for portfolio optimization.
- The **rolling covariance matrices** (`cov_list`) capture the relationships between asset returns, while the daily returns (`return_list`) provide information about recent price movements.
- These metrics are critical for modeling the dynamics of the financial market and making informed trading decisions.


In [10]:
def compute_covariance_and_returns(df_feat, lookback=252):
    df_sorted = df_feat.sort_values(["date", "tic"], ignore_index=True)
    df_sorted.index = df_sorted.date.factorize()[0]
    cov_list, return_list = [], []

    dates = df_sorted.date.unique()
    for i in tqdm(range(lookback, len(dates)), desc="Computing covariance and returns"):
        win = df_sorted.loc[i - lookback : i]
        pm = win.pivot_table(index="date", columns="tic", values="close")
        rm = pm.pct_change().dropna()
        cov_list.append(rm.cov().values)
        return_list.append(rm)
    df_cov = pd.DataFrame(
        {"date": dates[lookback:], "cov_list": cov_list, "return_list": return_list}
    )

    return pd.merge(df_feat, df_cov, on="date", how="left").dropna(subset=["cov_list"])


df_all = compute_covariance_and_returns(df_feat)

Computing covariance and returns:   0%|          | 0/2347 [00:00<?, ?it/s]

## Train/Trade split


In [11]:
def split_data(df_all, train_dates, test_dates):
    train = data_split(df_all, *train_dates)
    test = data_split(df_all, *test_dates)
    return train, test


train, test = split_data(df_all, train_dates, test_dates)

## Environment setup


**Constrained-DRL framework**


The key innovation of our constrained‐DRL framework is that it **bakes real trading frictions and budget dynamics directly into the MDP**, rather than treating them as post-hoc adjustments. Concretely:

1. **Transaction Costs & Slippage**

   * **Unconstrained DRL** typically assumes zero commissions and perfect execution.
   * **Our framework** charges the agent a proportional fee (α bps) and a slippage penalty (β bps) **every time it rebalances**. Those costs are subtracted from the simulated portfolio value *before* computing the reward.

2. **Liquidity Limits**

   * Rather than letting the agent instantly shift 100% of its capital between assets, we cap each trade to a fraction of the asset’s *average daily volume*. If the agent requests more than, say, 5% of ADV in a single day, the order is only partially filled—mirroring real market depth constraints.

3. **Dynamic Budget Tracking & Early Exit**

   * The agent’s *available capital* evolves with gains and losses. If its portfolio value ever falls below a preset threshold (e.g. 50% of the starting capital), the episode terminates with a large negative penalty—discouraging reckless strategies that risk ruin.

4. **Reward Function Redefined**

   * Instead of raw portfolio return, we use the **log net return**:

     $$
       R_t = \log\frac{V_{t+1} - \text{costs}_t}{V_t}\,,
     $$

     where $\text{costs}_t$ includes both transaction fees and slippage. This forces the agent to learn the trade-off between chasing small gains and paying execution costs.

5. **State & Action Adjustments**

   * **State** now includes not only price histories and technical indicators, but also the *current portfolio weights* (the Portfolio Vector Memory) and, optionally, market liquidity metrics (e.g. daily volume).
   * **Action** remains a target weight vector over the N assets (plus cash), enforced via a softmax so weights sum to 1. The environment’s `step()` method translates that weight shift into actual trades, applies the cost/slippage, enforces volume caps, updates the internal capital, and then returns the new state and log-return reward.

6. **Implementation in Code**

   * We extended the standard `PortfolioOptimizationEnv` (from FinRL) by overriding its `step()` function to:

     1. Compute desired weight change $\Delta w$.
     2. For each asset, calculate trade volume = $\Delta w · V_t$, cap it by `liquidity_limit · ADV`.
     3. Deduct `α|Δw|V_t + β|Δw|V_t` from the portfolio value.
     4. Update holdings and compute next-day portfolio value using actual market returns.
     5. Compute reward as the log of net portfolio change.
     6. Check for early termination if `V_{t+1} < bankruptcy_threshold`.

7. **Integrating with DRL Agents**

   * All of our agents (A2C, PPO, SAC, DDPG, TD3) interact with this modified environment exactly as before. The only difference is that the gym‐style interface now fully accounts for costs and liquidity when returning the `(obs, reward, done, info)` tuple.
   * Architectures like **EIIE** and **EI³** leverage the extra state inputs (previous weights, ADV data) to learn cost-aware policies that naturally avoid expensive churn and only trade when the expected gain exceeds the execution expense.

By implementing these changes at the environment level, we ensure that *every* DRL algorithm and network architecture is forced to internalize real-world trading constraints during training, resulting in strategies that are both profitable and feasible to deploy.


-----

**Exploring environment modifications for code**

- To fact in slippage and liquidity, we extend `StockPortfolioEnv` into a `ConstrainedPortfolioEnv`. 
- This should allow for the integration of transaction costs, liquidity capping, and memory of portfolio vectors while keeping the original MLP architecture.


We fold all of the “real-world” logic into a custom Gym env by subclassing FinRL’s `StockPortfolioEnv` (or `PortfolioOptimizationEnv`) and overriding its `step()` to:

1. Compute desired weight changes → translate into trade volumes
2. Cap each volume by `liquidity_limit × ADV`
3. Charge both a proportional commission and a slippage fee on that volume
4. Update cash and holdings, recompute portfolio value
5. Return the log net‐return as reward (with costs already deducted)
6. Early-stop if value < bankruptcy threshold

In [None]:
class ConstrainedPortfolioEnv(StockPortfolioEnv):
    def __init__(
        self,
        df,
        slippage_pct: float = 0.0005,
        liquidity_limit: float = 0.05,
        bankruptcy_threshold: float = 0.5,
        **kwargs,
    ):
        # note: pass df as first positional arg, all others via kwargs
        super().__init__(df, **kwargs)
        self.slippage_pct = slippage_pct
        self.liquidity_limit = liquidity_limit
        self.bankruptcy_threshold = bankruptcy_threshold

    def step(self, action: np.ndarray):
        # 1) record previous portfolio value
        prev_val = self.asset_memory[-1]

        # 2) get today’s prices & volumes
        today = self.date_list[self.day]    # this is the current date string
        df_today = self.df[self.df.date == today]
        prices = df_today.close.values      # shape (stock_dim,)
        vols   = df_today.volume.values     # shape (stock_dim,)

        # 3) cap the raw action (shares to trade) by liquidity_limit * volume
        capped_action = np.zeros_like(action)
        for i in range(self.stock_dim):
            max_shares = int(vols[i] * self.liquidity_limit)
            capped_action[i] = np.clip(action[i], -max_shares, max_shares)

        # 4) perform the trades & commission as usual
        obs, _, done, info = super().step(capped_action)

        # 5) deduct slippage cost = sum(|shares_traded| * price * slippage_pct)
        slippage_cost = np.sum(np.abs(capped_action) * prices * self.slippage_pct)
        self.cash -= slippage_cost

        # 6) compute new portfolio value & log-return reward
        new_val = self.asset_memory[-1]
        reward = np.log(new_val / prev_val)

        # 7) early termination if ruined
        if new_val < self.initial_amount * self.bankruptcy_threshold:
            done = True
            reward -= 10.0   # large penalty

        return obs, reward, done, info


In [None]:
def configure_environment(train, test, fe):
    stock_dim = train.tic.nunique()
    base_kwargs = dict(
        stock_dim=stock_dim,
        hmax=100,
        initial_amount=1e6,
        transaction_cost_pct=0.001,     # must match StockPortfolioEnv’s arg
        reward_scaling=1e-4,
        state_space=stock_dim,
        action_space=stock_dim,
        tech_indicator_list=fe.tech_indicator_list,
    )

    raw_train_env = ConstrainedPortfolioEnv(
        train,
        slippage_pct=0.0005,
        liquidity_limit=0.05,
        bankruptcy_threshold=0.5,
        **base_kwargs,
    )
    raw_test_env = ConstrainedPortfolioEnv(
        test,
        slippage_pct=0.0005,
        liquidity_limit=0.05,
        bankruptcy_threshold=0.5,
        **base_kwargs,
    )

    env_train_sb3, _ = raw_train_env.get_sb_env()
    return env_train_sb3, raw_train_env, raw_test_env, base_kwargs

env_train_sb3, raw_train_env, raw_test_env, env_kwargs = configure_environment(
    train, test, FeatureEngineer()
)


----

- Create instances of the StockPortfolioEnv class for both training and testing datasets.
- It also wrap the training environment for use with Stable-Baselines3 (SB3).


## Training


- We define the configuration for various RL models to be trained in the portfolio optimization environment.
- The training environment (`env_train_sb3`) is wrapped for use with Stable-Baselines3 (SB3).
- The SB3 environment provides the `state` and `action space` dimensions needed for configuring the models.


In [21]:
def prepare_models():
    model_configs = [
        (A2C, "A2C", {}),
        (PPO, "PPO", {}),
        (SAC, "SAC", {}),
        (DDPG, "DDPG", {}),
        (TD3, "TD3", {}),
    ]
    return model_configs


model_configs = prepare_models()

Train multiple reinforcement learning (RL) models using the specified training environment and configuration.


In [22]:
def train_models(env_train_sb3, model_configs, save_dir, timesteps_override=None):
    # Recommended timesteps
    timesteps_map = {
        "A2C": 150_000,  # on-policy, fewer passes
        "PPO": 250_000,  # on-policy, more stable
        "SAC": 1_000_000,  # off-policy, reuse via replay
        "DDPG": 1_000_000,  # off-policy
        "TD3": 1_000_000,  # off-policy with twin critics
    }

    models, training_times = {}, {}

    for cls, name, kwargs in model_configs:
        n_steps = (
            timesteps_override
            if timesteps_override is not None
            else timesteps_map[name]
        )
        print(f"Training {name} for {n_steps} timesteps…")
        start_time = time.time()

        model = cls("MlpPolicy", env_train_sb3, verbose=0, **kwargs)
        model.learn(total_timesteps=n_steps)

        end_time = time.time()
        training_times[name] = (end_time - start_time) / 60
        models[name] = model
        model.save(f"{save_dir}/{name}_mlp_model")

        print(f"{name} training completed in {training_times[name]:.2f} minutes.")

    return models, training_times

**Timesteps**
- On-policy methods (A2C, PPO)
    - 200 k–500 k timesteps (~80–200 passes):
    - PPO often converges around 200 k–300 k if you use a clip range of 0.2 and 10–20 epochs per rollout; A2C can need slightly fewer.
- Off-policy methods (SAC, DDPG, TD3)
    - 500 k–1 M+ timesteps:
    - These algorithms reuse experience via replay, so they typically benefit from 1 million+ steps to fully explore the state–action space.

**Guideline**:
- PPO/EIIE (on-policy): 250 k timesteps
- SAC/EI³ (off-policy): 500 k timesteps, bump to 1 M if you still see improvement
- A2C/MLP (on-policy): 150 k timesteps

In [24]:
models, training_times = train_models(
    env_train_sb3, model_configs, results_dir, timesteps_override=None
)

Training A2C for 150000 timesteps…


AttributeError: 'ConstrainedPortfolioEnv' object has no attribute 'date_list'

In [None]:
training_times_df = pd.DataFrame(
    list(training_times.items()), columns=["model", "training_duration (min)"]
)

training_times_df.to_csv(f"{results_dir}/training_times.csv", index=False)

print("Training summary:")
display(training_times_df)

## Model loading


Load the trained models from memory for analysis without the need for time consuming retraining


In [None]:
def load_models(model_configs, results_dir):
    models = {}
    for _, name, _ in model_configs:
        model_path = f"{results_dir}/{name}_mlp_model.zip"
        if os.path.exists(model_path):
            print(f"Loading saved model for {name}...")
            models[name] = globals()[name].load(model_path)
        else:
            print(f"No saved model found for {name}.")
    return models


# models = load_models(model_configs, results_dir)

## Backtesting


- Evaluates the performance of the RL models/algorithms in a trading environment.
- We do this by calculating the **cumulative portfolio value** and **performance metrics** for each RL model.


In [None]:
def backtest_rl_strategies(models, raw_env, env_kwargs):
    results = {}
    for name, model in models.items():
        print(f"Backtesting {name}…")
        # Simulate trading using the model in the raw_env environment
        df_ret, _ = DRLAgent.DRL_prediction(
            model=model, environment=raw_env, deterministic=True
        )
        df_ret["account_value"] = (df_ret.daily_return + 1).cumprod() * env_kwargs[
            "initial_amount"
        ]
        stats = backtest_stats(df_ret, value_col_name="account_value")
        results[name] = {"df": df_ret, "stats": stats}
    return results


results = backtest_rl_strategies(models, raw_test_env, env_kwargs)

### Plotting


In [None]:
def plot_backtest_results():
    for name, res in results.items():
        print(f"Plotting {name}…")
        backtest_plot(
            account_value=res["df"],
            baseline_start=test_start_date,
            baseline_end=end_date,
            baseline_ticker="SPY",
            value_col_name="account_value",
        )


plot_backtest_results()

## Benchmarks


These benchmarks will provide baseline performance metrics for comparison with the RL strategies.
We evaluate the performance of **Mean-Variance Optimization (MVO)** and simple benchmarks (**Equal-Weighted Portfolio** and **SPY**) in terms of returns, volatility, and cumulative portfolio value.


---

### Mean-Variance Optimization Benchmark

- **Objective**: Calculate the benchmark portfolio using **Mean-Variance Optimization (MVO)**.
- **Purpose**: This function benchmarks the performance of a portfolio optimized for minimum volatility using **Modern Portfolio Theory (MPT)**.
- **Comparison**: It allows us to compare the MPT strategy with other RL strategies by analyzing metrics like returns, volatility, and cumulative performance.

##### Workflow:

1. **Covariance Matrix**:

   - Extract the covariance matrix of asset returns for each trading day in the test period.
   - Use this matrix to model the relationships between asset returns.

2. **Optimization**:

   - Apply **Efficient Frontier** to minimize portfolio volatility.
   - Compute the optimal weights for each asset in the portfolio.

3. **Portfolio Value Calculation**:

   - Calculate the portfolio's account value over time using the optimized weights and asset prices.

4. **Performance Metrics**:
   - Evaluate the portfolio's performance using metrics such as annual return, cumulative return, and volatility.
   - Add the results to the `results` dictionary under the `"MPT"` key.


In [None]:
def compute_mpt_benchmark(test, env_kwargs):
    dates_test = test.date.unique()
    min_vals = [env_kwargs["initial_amount"]]
    for i in range(len(dates_test) - 1):
        curr = test[test.date == dates_test[i]]
        nxt = test[test.date == dates_test[i + 1]]
        covm = np.array(curr.cov_list.values[0])
        ef = EfficientFrontier(None, covm, weight_bounds=(0, 1))
        ef.min_volatility()
        w = ef.clean_weights()
        prices = curr.close.values
        nextp = nxt.close.values
        shares = np.array(list(w.values())) * min_vals[-1] / prices
        min_vals.append(np.dot(shares, nextp))
    min_df = pd.DataFrame({"date": dates_test, "account_value": min_vals})
    stats_mpt = backtest_stats(min_df, value_col_name="account_value")
    return {"df": min_df, "stats": stats_mpt}


mpt_benchmark = compute_mpt_benchmark(test, env_kwargs)

---

### Equal-Weighted Portfolio Benchmark

- Calculate the performance of an **equal-weighted portfolio** benchmark.
- This benchmark assumes that all assets in the portfolio are equally weighted, and their daily returns are averaged to compute the portfolio's overall return.

##### Workflow:

1. **Daily Returns Calculation**:

   - Group the test dataset by `date`.
   - Compute the percentage change (`pct_change`) in the `close` prices for each group.
   - Calculate the mean of the daily percentage changes to represent the portfolio's daily return.

2. **Cumulative Portfolio Value**:

   - Reset the index of the daily returns to create a DataFrame (`ew_df`).
   - Compute the cumulative product of the daily returns (`cumprod`) to calculate the portfolio's cumulative value over time.
   - Multiply the cumulative returns by the initial portfolio value (`initial_amount`) to get the portfolio's account value.

3. **Performance Metrics**:
   - Use the `backtest_stats` function to calculate performance metrics for the equal-weighted portfolio, such as annual return, cumulative return, and volatility.


In [None]:
def compute_equal_weighted_benchmark(df, initial_amount=100_000):
    # Pivot to have one column per ticker
    price_wide = df.pivot_table(
        index="date", columns="tic", values="close"
    ).sort_index()

    # Compute each ticker's daily return, then average equally
    daily_rets = price_wide.pct_change().fillna(0).mean(axis=1)

    # Build the equity curve
    ew_df = pd.DataFrame({"date": daily_rets.index, "daily_return": daily_rets.values})
    ew_df["account_value"] = (ew_df["daily_return"] + 1).cumprod() * initial_amount

    # Compute performance statistics
    stats_ew = backtest_stats(ew_df, value_col_name="account_value")

    return {"df": ew_df.reset_index(drop=True), "stats": stats_ew}


ew_benchmark = compute_equal_weighted_benchmark(test, env_kwargs["initial_amount"])

---

### SPY Benchmark

- **Objective**: Calculate the benchmark performance of the `SPY ETF`, which tracks the **S&P 500** index.
- **Purpose**: This function provides a baseline for comparing the performance of reinforcement learning models and other portfolio strategies.

##### Workflow:

1. **Data Retrieval**:
   - Use the `get_baseline` function to fetch the historical closing prices of the SPY ETF for the test period.
2. **Daily Returns Calculation**:
   - Compute the percentage change (`pct_change`) in the SPY closing prices to calculate daily returns.
3. **Cumulative Portfolio Value**:
   - Create a DataFrame (`spy_df`) with the daily returns and calculate the cumulative product (`cumprod`) of the daily returns to compute the portfolio's cumulative value over time.
   - Multiply the cumulative returns by the initial portfolio value (`initial_amount`) to get the portfolio's account value.
4. **Performance Metrics**:
   - Use the `backtest_stats` function to calculate performance metrics for the SPY benchmark, such as annual return, cumulative return, and volatility.


In [None]:
def compute_spy_benchmark(test, env_kwargs):
    spy_close = get_baseline("SPY", test_start_date, end_date)["close"]
    spy_ret = spy_close.pct_change().dropna()
    spy_df = pd.DataFrame({"date": spy_ret.index, "daily_return": spy_ret.values})
    spy_df["account_value"] = (spy_df.daily_return + 1).cumprod() * env_kwargs[
        "initial_amount"
    ]
    stats_spy = backtest_stats(spy_df, value_col_name="account_value")
    return {"df": spy_df, "stats": stats_spy}


spy_benchmark = compute_spy_benchmark(test, env_kwargs)

In [None]:
benchmarks = {
    "MPT": mpt_benchmark,
    "EW": ew_benchmark,
    "SPY": spy_benchmark,
}

results.update(benchmarks)

## Performance Summary


In [None]:
perf_stats = pd.DataFrame({key.upper(): res["stats"] for key, res in results.items()})
display(perf_stats)

In [None]:
comparison_metrics = [
    "Cumulative returns",
    "Annual return",
    "Annual volatility",
    "Sharpe ratio",
    "Max drawdown",
]

# Filter the performance statistics for the selected metrics
comparison_table = perf_stats.loc[comparison_metrics]


# Plot the comparison metrics as a bar chart
comparison_table.T.plot(kind="bar", figsize=(16, 8))
plt.title("Comparison of Key Metrics Across Models")
plt.ylabel("Metric Value")
plt.xlabel("Models")
plt.xticks(rotation=45)
plt.legend(title="Metrics", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
plt.show()

Visualize the cumulative returns of various strategies over time


In [None]:
def plot_cumulative_returns(results):
    plt.figure(figsize=(12, 8))
    for name, res in results.items():
        # Ensure the date column is converted to datetime
        res["df"]["date"] = pd.to_datetime(res["df"]["date"])
        # Filter data to start from the trade start date
        filtered_df = res["df"][res["df"]["date"] >= test_start_date]
        cum = (
            (filtered_df["daily_return"] + 1).cumprod() - 1
            if "daily_return" in filtered_df
            else filtered_df["account_value"] / filtered_df["account_value"].iloc[0] - 1
        )
        plt.plot(filtered_df["date"], cum, label=name)
    plt.title("Cumulative Returns")
    plt.xlabel("Date")
    plt.ylabel("Cumulative Return")
    plt.legend()
    plt.show()


plot_cumulative_returns(results)