# Tutorial on running backtesting

In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from quantrl_lab.data import (
    DataSourceRegistry,
    DataProcessor,
    IndicatorRegistry,
    AlpacaDataLoader,
    AlphaVantageDataLoader
)
from quantrl_lab.data.indicators.technical_indicators import *
from quantrl_lab.backtesting import BacktestRunner
from quantrl_lab.custom_envs.stock import SingleStockTradingEnv, SingleStockEnvConfig


# src/quantrl_lab/custom_envs/stock/strategies/actions/types/standard_market_action_strategy.py

# Note: Can extend more concrete action strategies 
# from src/quantrl_lab/custom_envs/stock/strategies/actions/base_action.py

from quantrl_lab.custom_envs.stock.strategies.actions.types import StandardMarketActionStrategy


# Not all the strategies are useful, you can cherry-pick the ones you need

from quantrl_lab.custom_envs.stock.strategies.rewards import (
    PortfolioValueChangeReward,
    InvalidActionPenalty,
    TrendFollowingReward,
    HoldPenalty,
    PositionSizingRiskReward,
    WeightedCompositeReward
)

# So far only implemented 1 observation strategy

from quantrl_lab.custom_envs.stock.strategies.observations import PortfolioWithTrendObservation


from stable_baselines3 import PPO, A2C, SAC

from typing import Optional, Dict, Any, List, Type

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)


#### Using DataSourceRegistry as the loader to extract different sources of data

- Currently supports OLHCV data and News data from Alpaca and Alpha Vantage. However, do not that Alpha Vantage's news data coverage is limited and it only has a rate limit of 25 API calls per day for free user. 

- Alpaca also provides Trade counts and VWAP which Alpha vantage doesn't

- Working to support more variety of data sources which might be helpful for trading (at least for non-high frequency trading)

In [2]:
# Uses default sources if we do not want to override it explicitly

data_loader = DataSourceRegistry(
    # sources = {
    #     "primary_source": AlpacaDataLoader,
    #     "news_source": AlpacaDataLoader
    # }
)

In [3]:
olhcv_df = data_loader.get_historical_ohlcv_data(
    symbols="MU",
    start="2022-01-01",
    end="2025-07-31",
    timeframe="1d", # can be adjusted based on the data granularity needed
)

In [4]:
olhcv_df.head()

Unnamed: 0,Symbol,Timestamp,Open,High,Low,Close,Volume,Trade_count,VWAP,Date
0,MU,2022-01-03 05:00:00+00:00,93.905,95.835,93.48,95.75,19907641.0,163032.0,95.028965,2022-01-03
1,MU,2022-01-04 05:00:00+00:00,95.6,96.59,93.54,96.34,20892833.0,192840.0,95.386837,2022-01-04
2,MU,2022-01-05 05:00:00+00:00,95.38,98.45,94.35,94.4,28880722.0,252641.0,96.712594,2022-01-05
3,MU,2022-01-06 05:00:00+00:00,95.1046,96.71,94.12,95.65,23737246.0,215101.0,95.715463,2022-01-06
4,MU,2022-01-07 05:00:00+00:00,96.09,97.19,93.94,94.45,16618859.0,153941.0,95.030314,2022-01-07


In [6]:
news_df = data_loader.get_news_data(
    "MU",
    start="2022-01-01",
    end="2025-07-31",
)

Output()

Remark: We allow different ways of specifying the window sizes / parameters for different technical indicators. You may wanna check out some other examples listed in data_processing.ipynb

In [7]:
data_processor = DataProcessor(olhcv_data=olhcv_df, news_data=news_df)

# The window sizes can be adjusted based on the signals from feature importance analysis.

# configuring the technical indicators with specific parameters
processed_data = data_processor.data_processing_pipeline(
    indicators=["SMA", "EMA", "RSI", "MACD", "ATR", "BB", "STOCH", "OBV"],
    
    fillna_strategy="neutral",
    SMA_params={
        "window": 20,
    },
    EMA_params={"window": 9},
    RSI_params={"window": 7},
    MACD_params={"fast": 12, "slow": 26, "signal": 9},
    ATR_params={"window": 14},
    BB_params={"window": 20, "num_std": 2},
    STOCH_params={"k_window": 14, "d_window": 3, "smooth_k": 1},
    # OBV_params={},
)

Device set to use cpu


In [8]:
processed_data.head()

Unnamed: 0,Open,High,Low,Close,Volume,Trade_count,VWAP,SMA_20,EMA_9,RSI_7,MACD_line_12_26,MACD_signal_9,ATR_14,BB_middle_20,BB_upper_20_2,BB_lower_20_2,BB_bandwidth_20,STOCH_%K_14_1,STOCH_%D_3,OBV,sentiment_score
0,78.96,82.31,78.02,82.27,25587263.0,177832.0,81.23175,89.427,83.289881,38.54667,-4.05688,-2.957446,3.908338,89.427,103.057519,75.796481,0.304841,29.887093,17.256112,29104168.0,0.615621
1,82.2,82.39,80.42,81.45,16677706.0,156159.0,81.282006,88.712,82.921905,36.195812,-4.012427,-3.168442,3.769885,88.712,102.445824,74.978176,0.309627,26.256365,24.249133,12426462.0,0.861282
2,82.92,84.7,82.54,84.51,21429140.0,174863.0,83.93071,88.1205,83.239524,49.58255,-3.68777,-3.272308,3.73275,88.1205,101.485089,74.755911,0.303325,39.80518,31.982879,33855602.0,0.0
3,82.67,85.18,81.71,81.97,21519262.0,183404.0,83.199568,87.499,82.985619,41.209527,-3.594005,-3.336647,3.713982,87.499,100.78991,74.20809,0.303796,29.438613,31.833386,12336340.0,0.0
4,81.222,82.19,79.84,81.17,16827299.0,153181.0,81.113986,86.775,82.622495,38.801794,-3.543403,-3.377998,3.616555,86.775,99.770654,73.779346,0.299525,27.655409,32.299734,-4490959.0,0.0


### Backtesting Examples

Remark:
- The environment is able to handle price in numpy array or dataframe with price column defined in config
- train-eval-test split example will be used in the optuna_tuning.ipynb

In [9]:
train_size = int(len(processed_data) * 0.8)
train_data_df = processed_data[:train_size]  # Keep as DataFrame
test_data_df = processed_data[train_size:]   # Keep as DataFrame

#### Strategy instances

In [10]:
# Create strategy instances

action_strategy = StandardMarketActionStrategy() # 7 actions: buy, sell ,hold, limit buy, limit sell, take profit, stop loss
observation_strategy = PortfolioWithTrendObservation()

# Create composite reward strategy
portfolio_reward = PortfolioValueChangeReward()
invalid_penalty = InvalidActionPenalty(penalty=-1.0) # free to adjust the penalty value
trend_reward = TrendFollowingReward()
hold_penalty = HoldPenalty(penalty=-0.5) # free to adjust the penalty value
position_sizing_reward = PositionSizingRiskReward()

#### Reward strategies with different emphasis based on investor's persona or risk profile

you can play around with the weights at will

Some examples are shown below:

In [11]:
# Multiple reward strategies with different weights
# These weights can be adjusted based on the emphasis on each strategy or the risk appetite of the trading strategy.

reward_strategies = {
    "balanced": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[1.0, 1.5, 0.5, 0.1, 0.7]
        #        ^     ^    ^    ^    ^
        #        |     |    |    |    position sizing (moderate importance)
        #        |     |    |    hold penalty (low)
        #        |     |    trend following (low-med)
        #        |     invalid actions (high penalty)
        #        portfolio value (primary objective)
    ),
    
    "conservative": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[1.2, 4.0, 0.1, 0.05, 1.0]
        #        ^     ^    ^     ^     ^
        #        |     |    |     |     HIGH weight on risk management
        #        |     |    |     very low hold penalty
        #        |     |    low trend following
        #        |     high invalid penalty
        #        steady portfolio growth
    ),
    
    "aggressive": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[1.0, 1.0, 1.0, 0.4, 0.2]
        #        ^     ^    ^    ^    ^
        #        |     |    |    |    lower position sizing weight (more risk tolerance)
        #        |     |    |    higher hold penalty (forces action)
        #        |     |    higher trend following
        #        |     moderate invalid penalty
        #        portfolio growth still important
    ),
    
    "risk_managed": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[0.8, 3.0, 0.1, 0.05, 1.5]
        #        ^     ^    ^     ^     ^
        #        |     |    |     |     HIGHEST weight on position sizing
        #        |     |    |     minimal hold penalty
        #        |     |    minimal trend following
        #        |     high invalid penalty
        #        moderate portfolio focus
    )
}

#### Different ways of creating BacktestRunner config

In [12]:
# 1. The standard way using dictionary

# I'm creating 2 sets of environment configs for demonstration.
# You can create more variations by changing the parameters in SingleStockEnvConfig 
# or using different strategies

env_configs = {
    "standard": {
        'train_env_factory': lambda: SingleStockTradingEnv(
            data=train_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.001,  # assuming a 0.1% transaction cost
                slippage=0.0005,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        ),
        'test_env_factory': lambda: SingleStockTradingEnv(
            data=test_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.001,
                slippage=0.0005,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        )
    },
    "low_cost": {
        'train_env_factory': lambda: SingleStockTradingEnv(
            data=train_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.0,  # assuming no transaction costs
                slippage=0.001,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        ),
        'test_env_factory': lambda: SingleStockTradingEnv(
            data=test_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.0,
                slippage=0.001,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        )
    }
}

print("✅ Created environment configs:")
print(f"   Environment names: {list(env_configs.keys())}")


# To look at the config in detail, you can do:
sample_train_env_factory = env_configs['standard']['train_env_factory']
sample_env_instance = sample_train_env_factory()

print("\n--- Accessing Configuration Parameters ---")

# Parameters stored directly on the environment
print(f"  Window Size:          {sample_env_instance.window_size}")
print(f"  Max Episode Steps:    {sample_env_instance.max_episode_steps}")

# Parameters stored within the portfolio object
print(f"  Initial Balance:      {sample_env_instance.portfolio.initial_balance}")
print(f"  Transaction Cost Pct: {sample_env_instance.portfolio.transaction_cost_pct}")
print(f"  Slippage:             {sample_env_instance.portfolio.slippage}")


✅ Created environment configs:
   Environment names: ['standard', 'low_cost']

--- Accessing Configuration Parameters ---
  Window Size:          20
  Max Episode Steps:    681
  Initial Balance:      100000.0
  Transaction Cost Pct: 0.001
  Slippage:             0.0005


In [None]:
# 2. using the factory function, a staticmethod in BacktestRunner

single_standard_config = BacktestRunner.create_env_config_factory(
    train_data=train_data_df,
    test_data=test_data_df,
    action_strategy=action_strategy,
    reward_strategy=reward_strategies["conservative"],
    observation_strategy=observation_strategy,
)

print(f"Keys in standard_config: {list(single_standard_config.keys())}")

Keys in standard_config: ['train_env_factory', 'test_env_factory']


#### Running a single experiment

In [17]:
# Initialize BacktestRunner
runner = BacktestRunner(verbose=1)


# We can specify custom algorithm parameters if needed, otherwise the default parameters will be used

custom_sac_config = BacktestRunner.create_custom_config(
    SAC,
    learning_rate=0.0001,           # Lower learning rate to stabilize training
    batch_size=256,                  # Increased batch size for more stable updates
    gamma=0.995,                    # Higher gamma for long-term focus
    tau=0.01,                       # Slower soft updates
    ent_coef=0.2,                   # Higher entropy for more exploration
    gradient_steps=1,
    buffer_size=500000,  
    verbose=0,  # Set to 0 to suppress output           
    policy_kwargs={
        'net_arch': [256, 256],      
        'activation_fn': nn.ReLU  # not that you cannot use str here, it must be a callable
    }
)

# Run single experiment using the single_env_config
results = runner.run_single_experiment(
    SAC,          # Algorithm to use
    single_standard_config,     # Use the single environment config we created
    # config=custom_sac_config,  # Custom algorithm configuration, this is an optional parameter
    total_timesteps=50000,  # Total timesteps for training
    num_eval_episodes=3
)

print(f"✅ Single experiment completed!")

Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


✅ Single experiment completed!


In [18]:
BacktestRunner.inspect_single_experiment(results)

In [21]:
results["test_action_stats"]

{'total_steps': 465,
 'action_counts': {'Buy': 156, 'Sell': 60, 'Hold': 228, 'LimitBuy': 21},
 'action_percentages': {'Buy': 33.5483870967742,
  'Sell': 12.903225806451612,
  'Hold': 49.03225806451613,
  'LimitBuy': 4.516129032258064}}

In [20]:
results["test_return_list_pct"]

[np.float32(9.012109), np.float32(9.012109), np.float32(9.012109)]

#### Running comprehensive backtesting for different combinations

The example below shows 3 different algorithm preset configs:
("default", "explorative", "conservative") x 2 different env configs ("standard","low_cost")

3 algos x 3 different sets of algo config x 2 sets of env configs = 18 combinations

Note: The following cell will take a long time to run

In [None]:
# custom_configs = {
#     'PPO': BacktestRunner.create_custom_config(
#         PPO,
#         learning_rate=0.001,
#         n_steps=2048,
#         batch_size=64,
#         gamma=0.99,
#         gae_lambda=0.95,
#         clip_range=0.2,
#         ent_coef=0.01
#     ),
#     'SAC': BacktestRunner.create_custom_config(
#         SAC,
#         learning_rate=0.0003,
#         batch_size=256,
#         gamma=0.99,
#         tau=0.005,
#         ent_coef='auto',
#         target_update_interval=1,
#         train_freq=1,
#         policy_kwargs={
#             'net_arch': [256, 256]
#         }
#     )
#   
# }

presets = ["default", "explorative", "conservative"]

algorithms = [PPO, A2C, SAC]

comprehensive_results = runner.run_comprehensive_backtest(
    algorithms=algorithms,
    env_configs=env_configs,
    presets=presets,           
    # custom_configs=custom_configs,  # either use presets or customize config by yourself
    total_timesteps=50000,
    n_envs=4,
    num_eval_episodes=3
)

print("\n✅ Comprehensive backtest completed!")

Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

In [None]:
comprehensive_results["PPO"]["low_cost"]["conservative"]

In [None]:
comprehensive_results["SAC"]["low_cost"]["conservative"]