# Backtest for 30 Stocks (1-minute interval)

# Part 1. Install Packages

In [10]:
# Setup and environment configuration
import sys
import os
from dotenv import load_dotenv

# Get relative paths
notebook_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in locals() else os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))

# Load environment variables
env_path = os.path.join(notebook_dir, '.env')
load_dotenv(env_path)

# Install FinRL package
%pip install -e {project_root} -q

# Add to path
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import finrl
print(f'Using finrl from: {os.path.dirname(finrl.__file__)}')
print(f'Project root: {project_root}')


Note: you may need to restart the kernel to use updated packages.
Using finrl from: /Users/ayushraj/Documents/Python/FinRL/FinRL/finrl
Project root: /Users/ayushraj/Documents/Python/FinRL/FinRL


In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3
import seaborn as sns

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS, TRAINED_MODEL_DIR
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader

# Set plotting style for better visualization with multiple stocks
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline

# Part 2. Backtesting

To backtest the agents, ensure you have train_data_30.csv and trade_data_30.csv files with 30 stocks and 1-minute interval data in the same directory of this notebook. The data should include the following 30 stocks or similar portfolio.

In [11]:
# Load training and trading data using relative paths
train_data_path = os.path.join(notebook_dir, 'train_data.csv')
trade_data_path = os.path.join(notebook_dir, 'trade_data.csv')

train = pd.read_csv(train_data_path)
trade = pd.read_csv(trade_data_path)

# If you are not using the data generated from part 1 of this tutorial, make sure 
# it has the columns and index in the form that could be make into the environment. 
# Then you can comment and skip the following lines.
train = train.set_index(train.columns[0])
train.index.names = ['']
trade = trade.set_index(trade.columns[0])
trade.index.names = ['']

print(f"✓ Loaded training data from: {os.path.basename(train_data_path)}")
print(f"✓ Loaded trading data from: {os.path.basename(trade_data_path)}")


✓ Loaded training data from: train_data.csv
✓ Loaded trading data from: trade_data.csv


Then, upload the trained agent to the same directory, and set the corresponding variable to True.

In [12]:
trade.head(15)

Unnamed: 0,date,open,high,low,close,volume,tic,vixy,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
,,,,,,,,,,,,,,,,
0.0,2025-08-01 13:30:00+00:00,210.95,212.08,210.2647,210.92,2196802.0,AAPL,43.6667,-0.016376,209.729366,206.474604,70.162321,349.612349,71.017047,208.289657,208.597832
0.0,2025-08-01 13:30:00+00:00,297.06,297.06,295.0001,296.0,45683.0,AMGN,43.6667,0.077751,295.521115,293.597135,57.885006,292.118872,58.827589,294.626417,295.191618
0.0,2025-08-01 13:30:00+00:00,217.21,218.14,215.25,218.045,4441419.0,AMZN,43.6667,-1.262524,240.96635,226.27306,9.506528,-999.188913,92.061077,233.690983,233.822755
0.0,2025-08-01 13:30:00+00:00,294.03,294.03,293.33,293.68,4011.0,AXP,43.6667,-0.782257,302.231459,296.419541,15.135586,-526.53211,77.377455,299.644583,300.187625
0.0,2025-08-01 13:30:00+00:00,219.45,219.89,218.27,218.34,242056.0,BA,43.6667,-0.420214,223.415415,219.993085,21.567821,-494.102827,70.81021,221.914667,222.03485
0.0,2025-08-01 13:30:00+00:00,426.97,428.82,426.5,428.62,59935.0,CAT,43.6667,-0.837849,441.737928,433.234672,19.221738,-912.838976,72.837896,437.59695,438.170475
0.0,2025-08-01 13:30:00+00:00,254.99,255.99,254.21,254.56,132001.0,CRM,43.6667,-0.642269,260.904625,256.401885,16.362219,-410.939938,78.889391,258.93831,259.373382
0.0,2025-08-01 13:30:00+00:00,67.51,67.61,67.06,67.065,572205.0,CSCO,43.6667,-0.105469,68.467819,67.531121,20.436286,-525.07205,71.801742,68.055323,68.162232
0.0,2025-08-01 13:30:00+00:00,152.545,154.025,152.16,153.31,242134.0,CVX,43.6667,0.031329,152.67147,150.77153,70.50935,306.52802,69.086969,151.81902,151.91723


In [None]:
# Make true for the model which you want to use for backtesting - ensure trained model zip file is present
if_using_a2c = False
if_using_ddpg = False
if_using_ppo = True
if_using_td3 = False
if_using_sac = False

Load the agents

In [None]:
# Load trained models using relative paths
trained_models_dir = os.path.join(project_root, 'trained_model_lab')

trained_a2c = A2C.load(os.path.join(trained_models_dir, "agent_a2c.zip")) if if_using_a2c else None
trained_ddpg = DDPG.load(os.path.join(trained_models_dir, "agent_ddpg")) if if_using_ddpg else None
trained_ppo = PPO.load(os.path.join(trained_models_dir, "agent_ppo.zip")) if if_using_ppo else None
trained_td3 = TD3.load(os.path.join(trained_models_dir, "agent_td3.zip")) if if_using_td3 else None
trained_sac = SAC.load(os.path.join(trained_models_dir, "agent_sac")) if if_using_sac else None

print(f"✓ Loaded models from: {os.path.basename(trained_models_dir)}/")
if if_using_a2c:
    print(f"  ✓ A2C model loaded")
if if_using_ddpg:
    print(f"  ✓ DDPG model loaded")
if if_using_ppo:
    print(f"  ✓ PPO model loaded")
if if_using_td3:
    print(f"  ✓ TD3 model loaded")
if if_using_sac:
    print(f"  ✓ SAC model loaded")


### Trading (Out-of-sample Performance)

We update periodically in order to take full advantage of the data, e.g., retrain quarterly, monthly or weekly. We also tune the parameters along the way, in this notebook we use the in-sample data from 2009-01 to 2020-07 to tune the parameters once, so there is some alpha decay here as the length of trade date extends. 

Numerous hyperparameters – e.g. the learning rate, the total number of samples to train on – influence the learning process and are usually determined by testing some variations.

In [None]:
stock_dimension = len(trade.tic.unique())
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

In [None]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [None]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vixy', **env_kwargs)
# env_trade, obs_trade = e_trade_gym.get_sb_env()

In [None]:
df_account_value_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=trained_a2c, 
    environment = e_trade_gym) if if_using_a2c else (None, None)

In [None]:
df_account_value_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym) if if_using_ddpg else (None, None)

In [None]:
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=trained_ppo, 
    environment = e_trade_gym) if if_using_ppo else (None, None)

In [None]:
df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
    model=trained_td3, 
    environment = e_trade_gym) if if_using_td3 else (None, None)

In [None]:
df_account_value_sac, df_actions_sac = DRLAgent.DRL_prediction(
    model=trained_sac, 
    environment = e_trade_gym) if if_using_sac else (None, None)

# Part 3: Mean Variance Optimization

Mean Variance optimization is a very classic strategy in portfolio management. Here, we go through the whole process to do the mean variance optimization and add it as a baseline to compare.

First, process dataframe to the form for MVO weight calculation.

In [14]:
def process_df_for_mvo(df):
  return df.pivot(index="date", columns="tic", values="close")

### Helper functions for mean returns and variance-covariance matrix

In [15]:
# Codes in this section partially refer to Dr G A Vijayalakshmi Pai
# https://www.kaggle.com/code/vijipai/lesson-5-mean-variance-optimization-of-portfolios/notebook

def StockReturnsComputing(StockPrice, Rows, Columns): 
  import numpy as np 
  StockReturn = np.zeros([Rows-1, Columns]) 
  for j in range(Columns):        # j: Assets 
    for i in range(Rows-1):     # i: Daily Prices 
      StockReturn[i,j]=((StockPrice[i+1, j]-StockPrice[i,j])/StockPrice[i,j])* 100 
      
  return StockReturn

### Calculate the weights for mean-variance

In [16]:
StockData = process_df_for_mvo(train)
TradeData = process_df_for_mvo(trade)

TradeData.to_numpy()

array([[210.92  , 296.    , 218.045 , ..., 344.76  ,  43.33  ,  98.175 ],
       [213.1512, 296.21  , 217.87  , ..., 344.185 ,  43.015 ,  97.95  ],
       [212.1297, 297.295 , 217.41  , ..., 344.15  ,  43.    ,  97.88  ],
       ...,
       [269.97  , 318.71  , 249.98  , ..., 340.26  ,  39.705 , 101.485 ],
       [270.03  , 319.27  , 250.175 , ..., 340.215 ,  39.705 , 101.45  ],
       [270.1   , 320.    , 250.25  , ..., 340.07  ,  39.715 , 101.475 ]],
      shape=(26520, 30))

In [17]:
#compute asset returns
arStockPrices = np.asarray(StockData)
[Rows, Cols]=arStockPrices.shape
arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

#compute mean returns and variance covariance matrix of returns
meanReturns = np.mean(arReturns, axis = 0)
covReturns = np.cov(arReturns, rowvar=False)
 
#set precision for printing results
np.set_printoptions(precision=3, suppress = True)

#display mean returns and variance-covariance matrix of returns
print('Mean returns of assets in k-portfolio 1\n', meanReturns)
print('Variance-Covariance matrix of returns\n', covReturns)

Mean returns of assets in k-portfolio 1
 [ 0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
 -0.001  0.001  0.     0.     0.    -0.     0.     0.     0.     0.
  0.    -0.     0.     0.     0.001 -0.    -0.001  0.     0.     0.   ]
Variance-Covariance matrix of returns
 [[ 0.011  0.001  0.007  0.004  0.005  0.004  0.005  0.003  0.002  0.004
   0.004  0.005  0.003  0.002  0.003  0.006  0.     0.003  0.     0.001
   0.003  0.001  0.004  0.005  0.009  0.     0.001  0.003  0.     0.002]
 [ 0.001  0.009  0.001  0.002  0.001  0.002  0.001  0.001  0.001  0.001
   0.002  0.002  0.002  0.002  0.002  0.002  0.002  0.001  0.001  0.002
   0.002  0.002  0.001  0.002  0.001  0.001  0.002  0.001  0.001  0.001]
 [ 0.007  0.001  0.014  0.006  0.006  0.006  0.006  0.004  0.003  0.004
   0.005  0.007  0.004  0.002  0.003  0.01  -0.001  0.005 -0.     0.001
   0.004 -0.     0.006  0.006  0.012 -0.     0.001  0.003 -0.001  0.003]
 [ 0.004  0.002  0.006  0.01   0.005  0.006  0.005  0.003  0.

### Use PyPortfolioOpt

In [18]:
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
mvo_weights = np.array([1000000 * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))])
mvo_weights

array([     0.,      0.,      0.,      0.,      0.,      0.,      0.,
       181350.,      0.,      0.,      0.,  57040.,      0.,      0.,
       153970.,      0., 110490.,  54320.,      0.,  52480.,  47840.,
            0.,      0.,      0.,  32040.,      0.,      0.,  27740.,
            0., 282720.])

In [19]:
LastPrice = np.array([1/p for p in StockData.tail(1).to_numpy()[0]])
Initial_Portfolio = np.multiply(mvo_weights, LastPrice)
Initial_Portfolio

array([   0.   ,    0.   ,    0.   ,    0.   ,    0.   ,    0.   ,
          0.   , 2655.975,    0.   ,    0.   ,    0.   ,   78.018,
          0.   ,    0.   ,  591.692,    0.   ,  660.588,  181.151,
          0.   ,  172.785,  326.174,    0.   ,    0.   ,    0.   ,
        178.705,    0.   ,    0.   ,   79.025,    0.   , 2895.831])

In [20]:
Portfolio_Assets = TradeData @ Initial_Portfolio
MVO_result = pd.DataFrame(Portfolio_Assets, columns=["Mean Var"])
MVO_result

Unnamed: 0_level_0,Mean Var
date,Unnamed: 1_level_1
2025-08-01 13:30:00+00:00,9.862299e+05
2025-08-01 13:31:00+00:00,9.835911e+05
2025-08-01 13:32:00+00:00,9.835846e+05
2025-08-01 13:33:00+00:00,9.837172e+05
2025-08-01 13:34:00+00:00,9.830497e+05
...,...
2025-11-05 20:55:00+00:00,1.077250e+06
2025-11-05 20:56:00+00:00,1.077454e+06
2025-11-05 20:57:00+00:00,1.076594e+06
2025-11-05 20:58:00+00:00,1.076421e+06


# Part 3.1: Classical Dynamic Portfolio Algorithms
Import and run classical algorithms for comparison


In [22]:
"""
Classical Algorithms for Optimal Dynamic Portfolio Management
Real implementations ready to use in your backtest
"""

import numpy as np
import pandas as pd
from typing import Tuple, List


class ExponentialGradientPortfolio:
    """
    Exponential Gradient (EG) Algorithm - RECOMMENDED
    Optimal for all market conditions
    Reference: Helmbold et al. (1998)
    """
    
    def __init__(self, num_stocks: int, learning_rate: float = 0.05):
        self.num_stocks = num_stocks
        self.eta = learning_rate
        self.weights = np.ones(num_stocks) / num_stocks
        self.portfolio_values = [1000000]
        
    def rebalance(self, price_changes: np.ndarray) -> np.ndarray:
        """Update weights based on price changes"""
        log_returns = np.log(price_changes)
        self.weights = self.weights * np.exp(self.eta * log_returns)
        self.weights = self.weights / np.sum(self.weights)
        return self.weights.copy()
    
    def step(self, price_changes: np.ndarray, current_value: float) -> float:
        """Execute one step of portfolio management"""
        weights = self.rebalance(price_changes)
        portfolio_return = np.dot(weights, price_changes)
        new_value = current_value * portfolio_return
        self.portfolio_values.append(new_value)
        return new_value
    
    def get_results(self) -> List[float]:
        return self.portfolio_values


class UniversalPortfolio:
    """
    Follow the Winner (FTW) - Universal Portfolio Algorithm
    Best for trending markets
    Reference: Cover (1991)
    """
    
    def __init__(self, num_stocks: int):
        self.num_stocks = num_stocks
        self.cumulative_returns = np.ones(num_stocks)
        self.portfolio_values = [1000000]
        
    def rebalance(self, current_prices: np.ndarray, 
                  initial_prices: np.ndarray) -> np.ndarray:
        """Weights proportional to cumulative returns"""
        self.cumulative_returns = current_prices / initial_prices
        weights = self.cumulative_returns / np.sum(self.cumulative_returns)
        return weights.copy()
    
    def step(self, current_prices: np.ndarray, 
             initial_prices: np.ndarray,
             previous_prices: np.ndarray,
             current_value: float) -> float:
        """Execute one step"""
        weights = self.rebalance(current_prices, initial_prices)
        price_changes = current_prices / (previous_prices + 1e-10)
        portfolio_return = np.dot(weights, price_changes)
        new_value = current_value * portfolio_return
        self.portfolio_values.append(new_value)
        return new_value
    
    def get_results(self) -> List[float]:
        return self.portfolio_values


class MeanReversionPortfolio:
    """
    Follow the Loser - Mean Reversion Strategy
    Best for oscillating markets
    """
    
    def __init__(self, num_stocks: int, lookback_window: int = 20):
        self.num_stocks = num_stocks
        self.lookback = lookback_window
        self.portfolio_values = [1000000]
        
    def rebalance(self, price_history: pd.DataFrame, 
                  current_time_idx: int) -> np.ndarray:
        """Inverse weights to recent returns"""
        if current_time_idx < self.lookback:
            return np.ones(self.num_stocks) / self.num_stocks
        
        start_idx = current_time_idx - self.lookback
        recent_start = price_history.iloc[start_idx].values
        recent_end = price_history.iloc[current_time_idx].values
        
        returns = recent_end / (recent_start + 1e-10)
        inverse_returns = 1.0 / (returns + 1e-10)
        weights = inverse_returns / np.sum(inverse_returns)
        return weights.copy()
    
    def step(self, price_history: pd.DataFrame,
             current_time_idx: int,
             current_value: float) -> float:
        """Execute one step"""
        if current_time_idx == 0:
            weights = np.ones(self.num_stocks) / self.num_stocks
        else:
            weights = self.rebalance(price_history, current_time_idx)
        
        current_prices = price_history.iloc[current_time_idx].values
        if current_time_idx > 0:
            previous_prices = price_history.iloc[current_time_idx - 1].values
        else:
            previous_prices = current_prices
        
        price_changes = current_prices / (previous_prices + 1e-10)
        portfolio_return = np.dot(weights, price_changes)
        new_value = current_value * portfolio_return
        self.portfolio_values.append(new_value)
        return new_value
    
    def get_results(self) -> List[float]:
        return self.portfolio_values


class DynamicMVOPortfolio:
    """
    Dynamic Mean Variance Optimization
    Recalculates MVO weights every period using rolling window
    """
    
    def __init__(self, num_stocks: int, rolling_window: int = 60):
        self.num_stocks = num_stocks
        self.rolling_window = rolling_window
        self.portfolio_values = [1000000]
        
        try:
            from pypfopt.efficient_frontier import EfficientFrontier
            self.EF = EfficientFrontier
        except ImportError:
            self.EF = None
            print("Warning: pypfopt not installed. Install with: pip install pypfopt")
    
    def calculate_mvo_weights(self, price_history: pd.DataFrame,
                             current_time_idx: int) -> np.ndarray:
        """Calculate MVO optimal weights for current period"""
        if current_time_idx < self.rolling_window:
            return np.ones(self.num_stocks) / self.num_stocks
        
        if self.EF is None:
            return np.ones(self.num_stocks) / self.num_stocks
        
        start_idx = current_time_idx - self.rolling_window
        end_idx = current_time_idx + 1
        window_prices = price_history.iloc[start_idx:end_idx].values
        
        returns = np.diff(window_prices, axis=0) / (window_prices[:-1] + 1e-10)
        mean_returns = np.mean(returns, axis=0)
        cov_returns = np.cov(returns, rowvar=False)
        cov_returns += np.eye(self.num_stocks) * 1e-5
        
        try:
            ef = self.EF(mean_returns, cov_returns, weight_bounds=(0, 0.5))
            ef.max_sharpe()
            weights = ef.get_weights()
            return np.array(weights)
        except:
            return np.ones(self.num_stocks) / self.num_stocks
    
    def step(self, price_history: pd.DataFrame,
             current_time_idx: int,
             current_value: float) -> float:
        """Execute one step"""
        weights = self.calculate_mvo_weights(price_history, current_time_idx)
        
        current_prices = price_history.iloc[current_time_idx].values
        if current_time_idx > 0:
            previous_prices = price_history.iloc[current_time_idx - 1].values
        else:
            previous_prices = current_prices
        
        price_changes = current_prices / (previous_prices + 1e-10)
        portfolio_return = np.dot(weights, price_changes)
        new_value = current_value * portfolio_return
        self.portfolio_values.append(new_value)
        return new_value
    
    def get_results(self) -> List[float]:
        return self.portfolio_values

print("✅ Classical algorithms loaded successfully")

✅ Classical algorithms loaded successfully


In [23]:
# Run Exponential Gradient (EG) Algorithm
print("Running Exponential Gradient Algorithm (EG)...")
eg = ExponentialGradientPortfolio(num_stocks=30, learning_rate=0.05)
portfolio_value_eg = 1000000

for t in range(1, len(TradeData)):
    price_changes = TradeData.iloc[t] / TradeData.iloc[t-1]
    portfolio_value_eg = eg.step(price_changes, portfolio_value_eg)

eg_results = eg.get_results()
print(f"✅ EG Final Value: ${eg_results[-1]:,.2f} ({((eg_results[-1]/1000000-1)*100):.2f}%)")


Running Exponential Gradient Algorithm (EG)...
✅ EG Final Value: $1,088,293.62 (8.83%)


In [24]:
# Run Follow the Winner (FTW) Algorithm
print("Running Follow the Winner Algorithm (FTW)...")
ftw = UniversalPortfolio(num_stocks=30)
portfolio_value_ftw = 1000000
initial_prices = TradeData.iloc[0]

for t in range(1, len(TradeData)):
    current = TradeData.iloc[t]
    prev = TradeData.iloc[t-1]
    portfolio_value_ftw = ftw.step(current, initial_prices, prev, portfolio_value_ftw)

ftw_results = ftw.get_results()
print(f"✅ FTW Final Value: ${ftw_results[-1]:,.2f} ({((ftw_results[-1]/1000000-1)*100):.2f}%)")


Running Follow the Winner Algorithm (FTW)...
✅ FTW Final Value: $1,114,983.96 (11.50%)


In [25]:
# Run Mean Reversion (Loser) Algorithm
print("Running Mean Reversion Algorithm...")
loser = MeanReversionPortfolio(num_stocks=30, lookback_window=20)
portfolio_value_loser = 1000000

for t in range(1, len(TradeData)):
    portfolio_value_loser = loser.step(TradeData, t, portfolio_value_loser)

loser_results = loser.get_results()
print(f"✅ Mean Reversion Final Value: ${loser_results[-1]:,.2f} ({((loser_results[-1]/1000000-1)*100):.2f}%)")


Running Mean Reversion Algorithm...
✅ Mean Reversion Final Value: $1,067,014.17 (6.70%)


# Part 4: DJIA index

In [26]:
import pandas as pd


# Dow divisor (approx 2025)
DOW_DIVISOR = 0.151727

# Group by minute timestamp → sum of 30 closes per minute
djia = (
    trade.groupby("date")["close"]
      .sum()
      .reset_index()
)

# Compute DJIA
djia["djia"] = djia["close"] / DOW_DIVISOR

# Save result
djia.to_csv("djia_minute.csv", index=False)

djia.head(10)

Unnamed: 0,date,close,djia
0,2025-08-01 13:30:00+00:00,6564.3075,43263.937862
1,2025-08-01 13:31:00+00:00,6551.102,43176.903254
2,2025-08-01 13:32:00+00:00,6552.2661,43184.575586
3,2025-08-01 13:33:00+00:00,6553.8558,43195.052957
4,2025-08-01 13:34:00+00:00,6552.8611,43188.497103
5,2025-08-01 13:35:00+00:00,6549.8075,43168.371483
6,2025-08-01 13:36:00+00:00,6550.8977,43175.556757
7,2025-08-01 13:37:00+00:00,6549.638,43167.254345
8,2025-08-01 13:38:00+00:00,6544.8002,43135.369446
9,2025-08-01 13:39:00+00:00,6539.2056,43098.496642


Add DJIA index as a baseline to compare with.

In [27]:
df_dji = djia[["date", "close"]]
fst_day = df_dji["close"][0]
dji = pd.merge(
    df_dji["date"],
    df_dji["close"].div(fst_day).mul(1000000),
    how="outer",
    left_index=True,
    right_index=True,
).set_index("date")

df_dji

Unnamed: 0,date,close
0,2025-08-01 13:30:00+00:00,6564.3075
1,2025-08-01 13:31:00+00:00,6551.1020
2,2025-08-01 13:32:00+00:00,6552.2661
3,2025-08-01 13:33:00+00:00,6553.8558
4,2025-08-01 13:34:00+00:00,6552.8611
...,...,...
26515,2025-11-05 20:55:00+00:00,7141.8983
26516,2025-11-05 20:56:00+00:00,7140.0066
26517,2025-11-05 20:57:00+00:00,7134.0170
26518,2025-11-05 20:58:00+00:00,7133.7208


<a id='4'></a>
# Part 5: Backtesting Results
Backtesting plays a key role in evaluating the performance of a trading strategy. Automated backtesting tool is preferred because it reduces the human error. We usually use the Quantopian pyfolio package to backtest our trading strategies. It is easy to use and consists of various individual plots that provide a comprehensive image of the performance of a trading strategy.

In [None]:
df_result_a2c = (
    df_account_value_a2c.set_index(df_account_value_a2c.columns[0])
    if if_using_a2c
    else None
)
df_result_ddpg = (
    df_account_value_ddpg.set_index(df_account_value_ddpg.columns[0])
    if if_using_ddpg
    else None
)
df_result_ppo = (
    df_account_value_ppo.set_index(df_account_value_ppo.columns[0])
    if if_using_ppo
    else None
)
df_result_td3 = (
    df_account_value_td3.set_index(df_account_value_td3.columns[0])
    if if_using_td3
    else None
)
df_result_sac = (
    df_account_value_sac.set_index(df_account_value_sac.columns[0])
    if if_using_sac
    else None
)

result = pd.DataFrame(
    {
        "a2c": df_result_a2c["account_value"] if if_using_a2c else None,
        "ddpg": df_result_ddpg["account_value"] if if_using_ddpg else None,
        "ppo": df_result_ppo["account_value"] if if_using_ppo else None,
        "td3": df_result_td3["account_value"] if if_using_td3 else None,
        "sac": df_result_sac["account_value"] if if_using_sac else None,
        "mvo": MVO_result["Mean Var"],
        "dji": dji["close"],
    }
)

In [None]:
# Add classical algorithms to results
result['EG'] = eg_results
result['FTW'] = ftw_results
result['MeanReversion'] = loser_results

# Remove any NaN columns
result = result.dropna(axis=1, how='all')

print("\n" + "="*80)
print("FINAL PORTFOLIO COMPARISON")
print("="*80)
print(f"Initial Capital: $1,000,000.00\n")

final_comparison = pd.DataFrame({
    'Strategy': [],
    'Final Value': [],
    'Return %': []
})

for col in result.columns:
    final_val = result[col].iloc[-1]
    roi = ((final_val / 1000000) - 1) * 100
    print(f"{col.upper():20}: ${final_val:>14,.2f}  ({roi:>7.2f}%)")

In [None]:
result

Now, everything is ready, we can plot the backtest results. We'll create:
1. Overall portfolio performance comparison
2. Individual stock performance plots (30 plots) showing each stock vs DJIA and all 5 algorithms

In [None]:
result.to_csv('comparison_all_algo.csv')

In [None]:
# Overall Portfolio Performance Plot
plt.rcParams["figure.figsize"] = (15,8)
plt.figure()
result.plot(title="Overall Portfolio Performance Comparison")
plt.xlabel("Date")
plt.ylabel("Portfolio Value")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# Part 6: Individual Stock Performance Analysis

Now let's create individual plots for each of the 30 stocks, showing how each stock performed with different algorithms compared to the DJIA index.

In [None]:
# First, let's prepare the data for individual stock analysis
def prepare_stock_data_for_individual_analysis(trade_df):
    """
    Prepare individual stock data for plotting
    Returns stock price data for each individual stock normalized to $1M starting value
    """
    stock_list = sorted(trade_df['tic'].unique())
    stock_data = {}
    
    print(f"Preparing data for {len(stock_list)} stocks: {stock_list}")
    
    for stock in stock_list:
        stock_df = trade_df[trade_df['tic'] == stock].copy()
        stock_df = stock_df.sort_values('date')
        stock_df = stock_df.set_index('date')
        
        # Get individual stock price evolution (normalized to start at $1,000,000)
        stock_prices = stock_df['close']
        if len(stock_prices) > 0:
            initial_price = stock_prices.iloc[0]
            stock_normalized = (stock_prices / initial_price) * 1000000
            
            stock_data[stock] = {
                'price': stock_normalized,
                'dates': stock_df.index
            }
        else:
            print(f"Warning: No data found for stock {stock}")
    
    return stock_data, stock_list

# Prepare the stock data
print("="*60)
print("PREPARING INDIVIDUAL STOCK DATA FOR ANALYSIS")
print("="*60)

stock_data, stock_list = prepare_stock_data_for_individual_analysis(trade)

print(f"\\nSuccessfully prepared data for {len(stock_list)} stocks")
print(f"Each stock's price data has been normalized to start at $1,000,000")
print(f"This allows direct comparison with the RL portfolio algorithms")
print("="*60)

### Important Note About the Plots

**What Each Plot Shows:**
- **Individual Stock Line**: Shows what would happen if you invested $1M in just that one stock (Buy & Hold strategy)
- **RL Algorithm Lines (A2C, DDPG, PPO, TD3, SAC)**: Shows the portfolio performance of each algorithm trading across all 30 stocks
- **DJIA Index Line**: Shows the DJIA index performance normalized to $1M starting value
- **MVO Line**: Shows Mean Variance Optimization portfolio performance

**Key Insight**: The RL algorithms manage a diversified portfolio of 30 stocks, while the individual stock line shows single-stock performance. This comparison helps understand:
1. How individual stocks performed vs the diversified RL portfolios
2. Which RL algorithm achieved the best portfolio performance
3. How both compare against market benchmarks (DJIA)

In [None]:
# Create individual plots for each stock (30 plots total)
# Each plot shows how each stock performed with different algorithms vs DJIA index

def create_individual_stock_plots(stock_data, result_data, stock_list, save_plots=True):
    """
    Create individual plots for each stock showing performance vs algorithms and DJIA
    Each plot will show:
    1. Individual stock price (Buy & Hold)
    2. All 5 RL algorithms portfolio performance (same for all stocks as it's portfolio-level)
    3. DJIA index performance
    """
    plt.rcParams["figure.figsize"] = (15, 8)
    
    # Common date index from result data
    common_dates = result_data.index
    
    for i, stock in enumerate(stock_list, 1):
        plt.figure(figsize=(15, 8))
        
        # Get individual stock price data aligned with common dates
        stock_price_data = stock_data[stock]['price']
        aligned_stock_data = stock_price_data.reindex(common_dates, method='ffill')
        
        # Plot individual stock performance (buy and hold strategy for this specific stock)
        plt.plot(common_dates, aligned_stock_data, 
                label=f'{stock} (Buy & Hold)', linewidth=2.5, alpha=0.9)
        
        # Plot RL algorithm portfolio performances (same for all stocks since it's portfolio level)
        if result_data['a2c'] is not None:
            plt.plot(common_dates, result_data['a2c'], 
                    label='A2C Portfolio', linewidth=2, alpha=0.8)
        
        if result_data['ddpg'] is not None:
            plt.plot(common_dates, result_data['ddpg'], 
                    label='DDPG Portfolio', linewidth=2, alpha=0.8)
        
        if result_data['ppo'] is not None:
            plt.plot(common_dates, result_data['ppo'], 
                    label='PPO Portfolio', linewidth=2, alpha=0.8)
        
        if result_data['td3'] is not None:
            plt.plot(common_dates, result_data['td3'], 
                    label='TD3 Portfolio', linewidth=2, alpha=0.8)
        
        if result_data['sac'] is not None:
            plt.plot(common_dates, result_data['sac'], 
                    label='SAC Portfolio', linewidth=2, alpha=0.8)
        
        # Plot DJIA index
        plt.plot(common_dates, result_data['dji'], 
                label='DJIA Index', linewidth=2.5, alpha=0.9, linestyle='--', color='black')
        
        # Plot MVO baseline
        if 'mvo' in result_data.columns and result_data['mvo'] is not None:
            plt.plot(common_dates, result_data['mvo'], 
                    label='Mean Variance Optimization', linewidth=2, alpha=0.8, linestyle=':')
        
        # Formatting
        plt.title(f'Performance Comparison: {stock} vs RL Algorithms & Benchmarks\\n' + 
                 f'(Individual Stock Buy & Hold vs Portfolio Strategies vs DJIA Index)', 
                 fontsize=14, fontweight='bold')
        plt.xlabel('Date', fontsize=12)
        plt.ylabel('Portfolio Value ($)', fontsize=12)
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        
        # Save individual plots if requested
        if save_plots:
            plt.savefig(f'stock_performance_{stock}.png', dpi=300, bbox_inches='tight')
        
        plt.show()
        
        # Print performance summary for this stock
        final_values = {
            f'{stock} Buy & Hold': aligned_stock_data.iloc[-1] if not aligned_stock_data.empty else 1000000,
            'DJIA Index': result_data['dji'].iloc[-1]
        }
        
        # Add algorithm portfolio performances
        if result_data['a2c'] is not None:
            final_values['A2C Portfolio'] = result_data['a2c'].iloc[-1]
        if result_data['ddpg'] is not None:
            final_values['DDPG Portfolio'] = result_data['ddpg'].iloc[-1]
        if result_data['ppo'] is not None:
            final_values['PPO Portfolio'] = result_data['ppo'].iloc[-1]
        if result_data['td3'] is not None:
            final_values['TD3 Portfolio'] = result_data['td3'].iloc[-1]
        if result_data['sac'] is not None:
            final_values['SAC Portfolio'] = result_data['sac'].iloc[-1]
        if 'mvo' in result_data.columns and result_data['mvo'] is not None:
            final_values['MVO Portfolio'] = result_data['mvo'].iloc[-1]
        
        print(f"\\n{'='*60}")
        print(f"PERFORMANCE SUMMARY FOR {stock}")
        print(f"{'='*60}")
        
        for strategy, value in final_values.items():
            if pd.notna(value):
                roi = ((value - 1000000) / 1000000) * 100
                print(f"{strategy:20}: ${value:10,.2f} (ROI: {roi:6.2f}%)")
        
        best_strategy = max([(k, v) for k, v in final_values.items() if pd.notna(v)], 
                          key=lambda x: x[1])
        print(f"\\nBest Strategy: {best_strategy[0]} with ${best_strategy[1]:,.2f}")
        print(f"{'='*60}\\n")

# Create all individual stock plots
print("Creating individual stock performance plots...")
print("Note: Each plot shows individual stock buy & hold vs RL portfolio strategies vs benchmarks")
create_individual_stock_plots(stock_data, result, stock_list, save_plots=True)

# Part 7: Performance Summary and Statistics

Let's create a comprehensive summary table showing the performance of each algorithm across all stocks.

In [None]:
# Create comprehensive performance summary
def create_performance_summary(stock_data, result_data, stock_list):
    """
    Create a comprehensive performance summary table
    """
    summary_data = []
    
    # Common date index from result data
    common_dates = result_data.index
    
    for stock in stock_list:
        # Get stock price data aligned with common dates
        stock_price_data = stock_data[stock]['price']
        aligned_stock_data = stock_price_data.reindex(common_dates, method='ffill')
        
        # Calculate returns for each strategy
        initial_value = 1000000
        
        stock_final = aligned_stock_data.iloc[-1]
        stock_roi = ((stock_final - initial_value) / initial_value) * 100
        
        row_data = {
            'Stock': stock,
            'Buy_Hold_Final': stock_final,
            'Buy_Hold_ROI': stock_roi
        }
        
        # Add algorithm performance
        algorithms = ['a2c', 'ddpg', 'ppo', 'td3', 'sac']
        for algo in algorithms:
            if result_data[algo] is not None:
                final_val = result_data[algo].iloc[-1]
                roi = ((final_val - initial_value) / initial_value) * 100
                row_data[f'{algo.upper()}_Final'] = final_val
                row_data[f'{algo.upper()}_ROI'] = roi
            else:
                row_data[f'{algo.upper()}_Final'] = None
                row_data[f'{algo.upper()}_ROI'] = None
        
        # Add DJIA performance
        djia_final = result_data['dji'].iloc[-1]
        djia_roi = ((djia_final - initial_value) / initial_value) * 100
        row_data['DJIA_Final'] = djia_final
        row_data['DJIA_ROI'] = djia_roi
        
        summary_data.append(row_data)
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

# Create and display performance summary
performance_summary = create_performance_summary(stock_data, result, stock_list)

# Display the summary table
print("=== COMPREHENSIVE PERFORMANCE SUMMARY ===")
print("\\nFinal Portfolio Values:")
display_cols = ['Stock', 'Buy_Hold_Final', 'A2C_Final', 'DDPG_Final', 'PPO_Final', 'TD3_Final', 'SAC_Final', 'DJIA_Final']
available_cols = [col for col in display_cols if col in performance_summary.columns]
print(performance_summary[available_cols].round(2))

print("\\nReturn on Investment (ROI %):")
roi_cols = ['Stock', 'Buy_Hold_ROI', 'A2C_ROI', 'DDPG_ROI', 'PPO_ROI', 'TD3_ROI', 'SAC_ROI', 'DJIA_ROI']
available_roi_cols = [col for col in roi_cols if col in performance_summary.columns]
print(performance_summary[available_roi_cols].round(2))

# Save the summary
performance_summary.to_csv('individual_stock_performance_summary.csv', index=False)
print("\\nPerformance summary saved to 'individual_stock_performance_summary.csv'")

In [None]:
# Create algorithm ranking analysis
def analyze_algorithm_rankings(performance_summary):
    """
    Analyze which algorithms perform best across all stocks
    """
    algorithms = ['A2C', 'DDPG', 'PPO', 'TD3', 'SAC']
    available_algos = [algo for algo in algorithms if f'{algo}_ROI' in performance_summary.columns]
    
    print("=== ALGORITHM PERFORMANCE ANALYSIS ===")
    
    # Average ROI across all stocks
    print("\\nAverage ROI across all stocks:")
    avg_performance = {}
    for algo in available_algos:
        roi_col = f'{algo}_ROI'
        if performance_summary[roi_col].notna().any():
            avg_roi = performance_summary[roi_col].mean()
            avg_performance[algo] = avg_roi
            print(f"{algo}: {avg_roi:.2f}%")
    
    # Add buy and hold and DJIA averages
    if 'Buy_Hold_ROI' in performance_summary.columns:
        avg_performance['Buy_Hold'] = performance_summary['Buy_Hold_ROI'].mean()
        print(f"Buy & Hold Average: {avg_performance['Buy_Hold']:.2f}%")
    
    if 'DJIA_ROI' in performance_summary.columns:
        avg_performance['DJIA'] = performance_summary['DJIA_ROI'].mean()
        print(f"DJIA Average: {avg_performance['DJIA']:.2f}%")
    
    # Rank algorithms by average performance
    sorted_performance = sorted(avg_performance.items(), key=lambda x: x[1], reverse=True)
    print("\\nAlgorithm Ranking (by average ROI):")
    for i, (algo, roi) in enumerate(sorted_performance, 1):
        print(f"{i}. {algo}: {roi:.2f}%")
    
    # Count wins for each algorithm
    print("\\nNumber of stocks where each algorithm was the best performer:")
    algo_wins = {algo: 0 for algo in available_algos}
    algo_wins['Buy_Hold'] = 0
    algo_wins['DJIA'] = 0
    
    for idx, row in performance_summary.iterrows():
        best_algo = None
        best_roi = float('-inf')
        
        for algo in available_algos:
            roi_col = f'{algo}_ROI'
            if not pd.isna(row[roi_col]) and row[roi_col] > best_roi:
                best_roi = row[roi_col]
                best_algo = algo
        
        # Check buy and hold
        if not pd.isna(row['Buy_Hold_ROI']) and row['Buy_Hold_ROI'] > best_roi:
            best_roi = row['Buy_Hold_ROI']
            best_algo = 'Buy_Hold'
        
        # Check DJIA
        if not pd.isna(row['DJIA_ROI']) and row['DJIA_ROI'] > best_roi:
            best_roi = row['DJIA_ROI']
            best_algo = 'DJIA'
        
        if best_algo:
            algo_wins[best_algo] += 1
    
    for algo, wins in sorted(algo_wins.items(), key=lambda x: x[1], reverse=True):
        print(f"{algo}: {wins} stocks")
    
    return avg_performance, algo_wins

# Run the analysis
avg_perf, wins = analyze_algorithm_rankings(performance_summary)