# Tutorial on running backtesting

In [1]:
import mlflow
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from quantrl_lab.data import (
    DataSourceRegistry,
    DataProcessor,
    IndicatorRegistry,
)
from quantrl_lab.data.indicators.technical_indicators import *
from quantrl_lab.backtesting.runner import BacktestRunner
from quantrl_lab.custom_envs.stock import SingleStockTradingEnv, SingleStockEnvConfig
from quantrl_lab.custom_envs.stock.strategies.actions.types import StandardMarketActionStrategy
from quantrl_lab.custom_envs.stock.strategies.rewards import (
    PortfolioValueChangeReward,
    InvalidActionPenalty,
    TrendFollowingReward,
    HoldPenalty,
    PositionSizingRiskReward,
    WeightedCompositeReward
)
from quantrl_lab.custom_envs.stock.strategies.observations import PortfolioWithTrendObservation
from quantrl_lab.tuning.mlflow_runner import (
    MLflowOptunaRunner, 
    create_sac_search_space, 
    create_ppo_search_space, 
    create_a2c_search_space
)
from stable_baselines3 import PPO, A2C, SAC

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)


# Using DataSourceRegistry as the loader to extract different sources of data

Currently supports OLHCV data an Sentiment data from Alpaca primarily, 
working on integrating Alpha Vantage's data (format standardization required).

In [2]:
data_loader = DataSourceRegistry()

In [3]:
olhcv_df = data_loader.get_historical_ohlcv_data(
    symbols="MU",
    start="2022-01-01",
    end="2025-07-31",
    timeframe="1d", # can be adjusted based on the data granularity needed
)

In [4]:
olhcv_df.head()

Unnamed: 0,Symbol,Timestamp,Open,High,Low,Close,Volume,Trade_count,VWAP,Date
0,MU,2022-01-03 05:00:00+00:00,93.905,95.835,93.48,95.75,19907641.0,163032.0,95.028965,2022-01-03
1,MU,2022-01-04 05:00:00+00:00,95.6,96.59,93.54,96.34,20892833.0,192840.0,95.386837,2022-01-04
2,MU,2022-01-05 05:00:00+00:00,95.38,98.45,94.35,94.4,28880722.0,252641.0,96.712594,2022-01-05
3,MU,2022-01-06 05:00:00+00:00,95.1046,96.71,94.12,95.65,23737246.0,215101.0,95.715463,2022-01-06
4,MU,2022-01-07 05:00:00+00:00,96.09,97.19,93.94,94.45,16618859.0,153941.0,95.030314,2022-01-07


In [5]:
news_df = data_loader.get_news_data(
    "MU",
    start="2022-01-01",
    end="2025-07-31",
)

Output()

Current list of technical indicators supported:
- SMA
- EMA
- RSI
- MACD
- ATR
- BB
- STOCH
- OBV

(Working on supporting more indicators)

In [6]:
data_processor = DataProcessor(olhcv_data=olhcv_df, news_data=news_df)

# The window sizes can be adjusted based on the signals from feature importance analysis.

# configuring the technical indicators with specific parameters
processed_data = data_processor.data_processing_pipeline(
    indicators=["SMA", "EMA", "RSI", "MACD", "ATR", "BB", "STOCH", "OBV"],
    # The following are optional, you could do
    # data_processor.append_technical_indicators(olhcv_df, indicators=["SMA", "EMA"])
    # and the default parameters will be used.
    
    # fillna_strategy="neutral",
    SMA_params={
        "window": 20,
    },
    EMA_params={"window": 9},
    RSI_params={"window": 7},
    MACD_params={"fast": 12, "slow": 26, "signal": 9},
    ATR_params={"window": 14},
    BB_params={"window": 20, "num_std": 2},
    STOCH_params={"k_window": 14, "d_window": 3, "smooth_k": 1},
    # OBV_params={},
)

Device set to use cpu


In [7]:
processed_data.head()

Unnamed: 0,Open,High,Low,Close,Volume,Trade_count,VWAP,SMA_20,EMA_9,RSI_7,MACD_line_12_26,MACD_signal_9,MACD_histogram,ATR_14,BB_middle_20,BB_upper_20_2,BB_lower_20_2,BB_bandwidth_20,STOCH_%K_14,STOCH_%D_3,OBV,sentiment_score
0,78.96,82.31,78.02,82.27,25587263.0,177832.0,81.23175,89.427,83.289881,38.54667,-4.05688,-2.957446,-1.099434,3.908338,89.427,103.057519,75.796481,0.304841,29.887093,17.256112,29104168.0,0.615621
1,82.2,82.39,80.42,81.45,16677706.0,156159.0,81.282006,88.712,82.921905,36.195812,-4.012427,-3.168442,-0.843985,3.769885,88.712,102.445824,74.978176,0.309627,26.256365,24.249133,12426462.0,0.861282
2,82.92,84.7,82.54,84.51,21429140.0,174863.0,83.93071,88.1205,83.239524,49.58255,-3.68777,-3.272308,-0.415463,3.73275,88.1205,101.485089,74.755911,0.303325,39.80518,31.982879,33855602.0,0.0
3,82.67,85.18,81.71,81.97,21519262.0,183404.0,83.199568,87.499,82.985619,41.209527,-3.594005,-3.336647,-0.257358,3.713982,87.499,100.78991,74.20809,0.303796,29.438613,31.833386,12336340.0,0.0
4,81.222,82.19,79.84,81.17,16827299.0,153181.0,81.113986,86.775,82.622495,38.801794,-3.543403,-3.377998,-0.165404,3.616555,86.775,99.770654,73.779346,0.299525,27.655409,32.299734,-4490959.0,0.0


## Comprehensive Backtesting Example

#### Train-test split and keeping them as dataframes. The environment will handle to price column detection and numpy conversion

In [8]:
train_size = int(len(processed_data) * 0.8)
train_data_df = processed_data[:train_size]  # Keep as DataFrame
test_data_df = processed_data[train_size:]   # Keep as DataFrame

#### Strategy instances

In [9]:
# Create strategy instances
action_strategy = StandardMarketActionStrategy()
observation_strategy = PortfolioWithTrendObservation()

# Create composite reward strategy
portfolio_reward = PortfolioValueChangeReward()
invalid_penalty = InvalidActionPenalty(penalty=-1.0)
trend_reward = TrendFollowingReward()
hold_penalty = HoldPenalty(penalty=-0.5)
position_sizing_reward = PositionSizingRiskReward()

# Multiple reward strategies with different weights
# These weights can be adjusted based on the emphasis on each strategy or the risk appetite of the trading strategy.
reward_strategies = {
    "balanced": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[1.0, 2.0, 0.3, 0.1, 0.5]
        #        ^     ^    ^    ^    ^
        #        |     |    |    |    position sizing (moderate importance)
        #        |     |    |    hold penalty (low)
        #        |     |    trend following (low-med)
        #        |     invalid actions (high penalty)
        #        portfolio value (primary objective)
    ),
    
    "conservative": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[1.0, 3.0, 0.2, 0.05, 0.8]
        #        ^     ^    ^     ^     ^
        #        |     |    |     |     HIGH weight on risk management
        #        |     |    |     very low hold penalty
        #        |     |    low trend following
        #        |     high invalid penalty
        #        steady portfolio growth
    ),
    
    "aggressive": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[0.8, 1.5, 0.6, 0.3, 0.3]
        #        ^     ^    ^    ^    ^
        #        |     |    |    |    lower position sizing weight (more risk tolerance)
        #        |     |    |    higher hold penalty (forces action)
        #        |     |    higher trend following
        #        |     moderate invalid penalty
        #        portfolio growth still important
    ),
    
    # Optional: Add a risk-focused strategy
    "risk_managed": WeightedCompositeReward(
        strategies=[portfolio_reward, invalid_penalty, trend_reward, hold_penalty, position_sizing_reward],
        weights=[0.7, 2.5, 0.1, 0.05, 1.0]
        #        ^     ^    ^     ^     ^
        #        |     |    |     |     HIGHEST weight on position sizing
        #        |     |    |     minimal hold penalty
        #        |     |    minimal trend following
        #        |     high invalid penalty
        #        moderate portfolio focus
    )
}

In [10]:
# Create multiple environment configurations for comprehensive backtesting
# To run single experiment, you can use one of the keys in `env_configs`.
# For example, you can use "standard" config to run a standard backtest.

env_configs = {
    "standard": {
        'train_env_factory': lambda: SingleStockTradingEnv(
            data=train_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.001,  # assuming a 0.1% transaction cost
                slippage=0.0005,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        ),
        'test_env_factory': lambda: SingleStockTradingEnv(
            data=test_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.001,
                slippage=0.0005,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        )
    },
    "low_cost": {
        'train_env_factory': lambda: SingleStockTradingEnv(
            data=train_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.0,  # assuming no transaction costs
                slippage=0.001,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        ),
        'test_env_factory': lambda: SingleStockTradingEnv(
            data=test_data_df,
            config=SingleStockEnvConfig(
                initial_balance=100000.0,
                transaction_cost_pct=0.0,
                slippage=0.001,
                window_size=20,
                order_expiration_steps=5
            ),
            action_strategy=action_strategy,
            reward_strategy=reward_strategies["balanced"],
            observation_strategy=observation_strategy
        )
    }
}

print("✅ Created environment configs:")
print(f"   Environment names: {list(env_configs.keys())}")

✅ Created environment configs:
   Environment names: ['standard', 'low_cost']


In [11]:
single_env_config = env_configs["low_cost"]  # Use the "low_cost" config for a single experiment

print("✅ Single environment config created for individual experiments")
# Use single_env_config for: run_single_experiment, run_algorithm_comparison, run_preset_comparison
# Use env_configs for: run_comprehensive_backtest, run_environment_comparison

✅ Single environment config created for individual experiments


In [12]:
# Single experiment using custom configuration

runner = BacktestRunner(verbose=1)

# TODO: read up the algorithms again 

custom_config = BacktestRunner.create_custom_config(
    SAC,
    learning_rate=0.0001,           # Lower learning rate to stabilize training
    batch_size=256,                  # Increased batch size for more stable updates
    gamma=0.995,                    # Higher gamma for long-term focus
    tau=0.01,                       # Slower soft updates
    ent_coef=0.2,                   # Higher entropy for more exploration
    gradient_steps=1,
    buffer_size=500000,  
    verbose=0,  # Set to 0 to suppress output           
    policy_kwargs={
        'net_arch': [256, 256],      
        'activation_fn': nn.ReLU  # not that you cannot use str here, it must be a callable
    }
)

# Run single experiment using the single_env_config
results = runner.run_single_experiment(
    SAC,          # Algorithm to use
    single_env_config,     # Use the single environment config we created
    config=custom_config,  # Custom algorithm configuration, this is an optional parameter
    total_timesteps=50000,  # Total timesteps for training
    num_eval_episodes=3
)

print(f"✅ Single experiment completed!")

Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


✅ Single experiment completed!


In [14]:
# End any existing MLflow runs to avoid conflicts
try:
    mlflow.end_run()
except Exception:
    pass  # No active run to end

runner = BacktestRunner(verbose=1)

# Initialize the tuner with your existing runner
tuner = MLflowOptunaRunner(
    runner=runner,
    experiment_name="quantrl_hyperparameter_optimization",
    storage_url="sqlite:///optuna_studies.db"  # Optional: for persistent storage
)

# Example: Basic hyperparameter optimization for SAC
print("Starting SAC hyperparameter optimization...")

# Use the pre-defined search space or create a custom one
sac_search_space = create_sac_search_space()
# Run optimization
study = tuner.optimize_hyperparameters(
    algo_class=SAC,
    env_config=single_env_config,  # Use your existing env config
    search_space=sac_search_space,
    fixed_params = { "verbose": 0 },  # Optional: fixed parameters to keep constant
    study_name="sac_optimization",
    n_trials=2,  # Start with very few trials for quick testing on mlflow + optuna
    total_timesteps=50000,
    num_eval_episodes=5,
    optimization_metric="test_avg_return_pct",  # or "test_avg_reward"
    direction="maximize"
)

print(f"✅ Best parameters: {study.best_params}")
print(f"✅ Best value: {study.best_value:.4f}")
print(f"✅ test_avg_return_pct: {study.best_trial.user_attrs.get('test_avg_return_pct', 'N/A')}")


2025/08/04 22:57:49 INFO mlflow.tracking.fluent: Experiment with name 'quantrl_hyperparameter_optimization' does not exist. Creating a new experiment.


  self.mlflow_callback = MLflowCallback(


Starting SAC hyperparameter optimization...


[I 2025-08-04 22:57:49,853] Using an existing study with name 'sac_optimization' instead of creating a new one.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


[I 2025-08-04 23:01:32,078] Trial 16 finished with value: -88.62322235107422 and parameters: {'learning_rate': 0.0009488847836753405, 'batch_size': 256, 'gamma': 0.9670296862598182, 'tau': 0.09254593878602649, 'train_freq': 4, 'gradient_steps': 8, 'target_update_interval': 1}. Best is trial 11 with value: 33.12828063964844.


Output()

SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.
SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


Output()

SingleStockTradingEnv closed.


[I 2025-08-04 23:02:02,014] Trial 17 finished with value: 0.3403593897819519 and parameters: {'learning_rate': 0.0002998541882170933, 'batch_size': 256, 'gamma': 0.9055448775320112, 'tau': 0.07176579021834961, 'train_freq': 8, 'gradient_steps': 1, 'target_update_interval': 8}. Best is trial 11 with value: 33.12828063964844.


✅ Best parameters: {'learning_rate': 0.0004588680478819734, 'batch_size': 256, 'gamma': 0.9949238538436462, 'tau': 0.07594400563035035}
✅ Best value: 33.1283
✅ test_avg_return_pct: N/A


# Running backtesting for different combinations

The example below shows 3 different algorithm configs x 2 different env configs

In [None]:
# Step 1: Create custom configurations for PPO and SAC
# A2C will use the presets defined later
custom_configs = {
    'PPO': BacktestRunner.create_custom_config(
        PPO,
        learning_rate=0.001,
        n_steps=2048,
        batch_size=64,
        gamma=0.99,
        gae_lambda=0.95,
        clip_range=0.2,
        ent_coef=0.01
    ),
    'SAC': BacktestRunner.create_custom_config(
        SAC,
        learning_rate=0.0003,
        batch_size=256,
        gamma=0.99,
        tau=0.005,
        ent_coef='auto',
        target_update_interval=1,
        train_freq=1,
        policy_kwargs={
            'net_arch': [256, 256]
        }
    )
    # A2C not specified here, so it will use presets
}

# Step 2: Define presets for A2C (and as fallback)
presets = ["explorative"] # alternatives will be "default", "conservative"

# Step 3: Run comprehensive backtest
algorithms = [PPO, A2C, SAC]

comprehensive_results = runner.run_comprehensive_backtest(
    algorithms=algorithms,
    env_configs=env_configs,
    presets=presets,           # A2C will use these presets
    custom_configs=custom_configs,  # PPO and SAC will use custom configs
    total_timesteps=50000,
    n_envs=4,
    num_eval_episodes=5
)

print("\n✅ Comprehensive backtest completed!")

In [None]:
#TODO: include the following functions in a separate module for better organization
# These functions will handle printing the results in a comprehensive and user-friendly way
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from rich.columns import Columns
from rich.text import Text

console = Console()

def print_comprehensive_results_summary(results):
    """
    Print a comprehensive summary of all results in a nicely formatted way.
    
    Args:
        results: The comprehensive_results dictionary from run_comprehensive_backtest
    """
    
    # Main title
    console.print("\n" + "="*80, style="bold blue")
    console.print("COMPREHENSIVE BACKTESTING RESULTS SUMMARY", style="bold blue", justify="center")
    console.print("="*80, style="bold blue")
    
    for algo_name, algo_results in results.items():
        # Algorithm header
        console.print(f"\n🤖 [bold cyan]{algo_name}[/bold cyan]")
        console.print("-" * 60, style="cyan")
        
        for env_name, env_results in algo_results.items():
            # Environment header
            console.print(f"\n  🌍 [bold yellow]{env_name.upper()} Environment[/bold yellow]")
            
            for config_name, result in env_results.items():
                if 'error' in result:
                    console.print(f"    ❌ [red]{config_name}: ERROR - {result['error']}[/red]")
                    continue
                
                # Create a panel for each configuration
                panel_content = create_result_panel_content(result, config_name)
                
                panel = Panel(
                    panel_content,
                    title=f"[bold]{config_name.upper()} Configuration[/bold]",
                    border_style="green" if result['test_avg_return_pct'] >= 0 else "red",
                    padding=(1, 2)
                )
                
                console.print(panel)

def create_result_panel_content(result, config_name):
    """Create formatted content for a result panel."""
    
    # Performance metrics
    train_return = result['train_avg_return_pct']
    test_return = result['test_avg_return_pct']
    
    train_color = "green" if train_return >= 0 else "red"
    test_color = "green" if test_return >= 0 else "red"
    
    # Create performance table
    perf_table = Table(show_header=True, header_style="bold magenta", box=None)
    perf_table.add_column("Metric", style="cyan", no_wrap=True)
    perf_table.add_column("Training", justify="right")
    perf_table.add_column("Testing", justify="right")
    
    perf_table.add_row(
        "Return %",
        f"[{train_color}]{train_return:.2f}%[/{train_color}]",
        f"[{test_color}]{test_return:.2f}%[/{test_color}]"
    )
    
    perf_table.add_row(
        "Avg Reward",
        f"{result['train_avg_reward']:.3f}",
        f"{result['test_avg_reward']:.3f}"
    )
    
    perf_table.add_row(
        "Reward Std",
        f"{result['train_reward_std']:.3f}",
        f"{result['test_reward_std']:.3f}"
    )
    
    # Action statistics
    train_actions = result.get('train_action_stats', {}).get('action_percentages', {})
    test_actions = result.get('test_action_stats', {}).get('action_percentages', {})
    
    action_info = ""
    if train_actions:
        top_train_action = max(train_actions, key=train_actions.get)
        top_train_pct = train_actions[top_train_action]
        action_info += f"🎯 Top Train Action: [bold]{top_train_action}[/bold] ({top_train_pct:.1f}%)\n"
    
    if test_actions:
        top_test_action = max(test_actions, key=test_actions.get)
        top_test_pct = test_actions[top_test_action]
        action_info += f"🎯 Top Test Action: [bold]{top_test_action}[/bold] ({top_test_pct:.1f}%)\n"
    
    # Configuration info
    config_info = ""
    if result.get('config'):
        config_info = f"⚙️  Custom Config: [dim]{len(result['config'])} parameters[/dim]\n"
        # Show key parameters
        key_params = ['learning_rate', 'batch_size', 'gamma', 'n_steps', 'tau', 'ent_coef']
        shown_params = []
        for param in key_params:
            if param in result['config']:
                shown_params.append(f"{param}={result['config'][param]}")
        if shown_params:
            config_info += f"   [dim]{', '.join(shown_params[:3])}[/dim]\n"
    else:
        config_info = f"⚙️  Preset: [dim]{result.get('preset', 'N/A')}[/dim]\n"
    
    # Training info
    training_info = f"🔄 Training: [dim]{result['total_timesteps']:,} timesteps[/dim]\n"
    training_info += f"📊 Episodes: [dim]{len(result['train_episodes'])} train, {len(result['test_episodes'])} test[/dim]\n"
    
    # Combine all content
    content = f"{config_info}{training_info}{action_info}\n{perf_table}"
    
    return content

def print_algorithm_comparison_table(results):
    """
    Print a comparison table across all algorithms and environments.
    """
    console.print("\n" + "="*100, style="bold magenta")
    console.print("ALGORITHM COMPARISON TABLE", style="bold magenta", justify="center")
    console.print("="*100, style="bold magenta")
    
    # Create comparison table
    table = Table(show_header=True, header_style="bold magenta")
    table.add_column("Algorithm", style="cyan", no_wrap=True)
    table.add_column("Environment", style="blue", no_wrap=True)
    table.add_column("Config", style="yellow", no_wrap=True)
    table.add_column("Test Return %", justify="right", style="bold")
    table.add_column("Train Return %", justify="right", style="bold")
    table.add_column("Test Reward", justify="right")
    table.add_column("Top Test Action", style="magenta")
    table.add_column("Status", justify="center")
    
    # Collect all results for sorting
    all_results = []
    for algo_name, algo_results in results.items():
        for env_name, env_results in algo_results.items():
            for config_name, result in env_results.items():
                all_results.append((algo_name, env_name, config_name, result))
    
    # Sort by test return percentage (descending)
    all_results.sort(key=lambda x: x[3].get('test_avg_return_pct', -float('inf')), reverse=True)
    
    for algo_name, env_name, config_name, result in all_results:
        if 'error' in result:
            table.add_row(
                algo_name, env_name, config_name,
                "[red]ERROR[/red]", "[red]ERROR[/red]", "[red]ERROR[/red]",
                "[red]ERROR[/red]", "❌"
            )
        else:
            test_return = result['test_avg_return_pct']
            train_return = result['train_avg_return_pct']
            
            test_color = "green" if test_return >= 0 else "red"
            train_color = "green" if train_return >= 0 else "red"
            
            # Get top test action
            test_actions = result.get('test_action_stats', {}).get('action_percentages', {})
            top_test_action = "N/A"
            if test_actions:
                top_action = max(test_actions, key=test_actions.get)
                top_pct = test_actions[top_action]
                top_test_action = f"{top_action} ({top_pct:.1f}%)"
            
            # Status emoji
            status = "✅" if test_return >= 0 else "❌"
            if test_return >= 5:
                status = "🚀"
            elif test_return >= 1:
                status = "📈"
            
            table.add_row(
                algo_name, env_name, config_name,
                f"[{test_color}]{test_return:.2f}%[/{test_color}]",
                f"[{train_color}]{train_return:.2f}%[/{train_color}]",
                f"{result['test_avg_reward']:.3f}",
                top_test_action,
                status
            )
    
    console.print(table)

def print_best_performers(results, top_n=3):
    """
    Print the top N best performing configurations.
    """
    console.print(f"\n🏆 [bold gold1]TOP {top_n} BEST PERFORMERS[/bold gold1]")
    console.print("="*50, style="gold1")
    
    # Collect and sort results
    all_results = []
    for algo_name, algo_results in results.items():
        for env_name, env_results in algo_results.items():
            for config_name, result in env_results.items():
                if 'error' not in result:
                    all_results.append((algo_name, env_name, config_name, result))
    
    # Sort by test return percentage
    all_results.sort(key=lambda x: x[3]['test_avg_return_pct'], reverse=True)
    
    for i, (algo_name, env_name, config_name, result) in enumerate(all_results[:top_n]):
        rank_emoji = ["🥇", "🥈", "🥉"][i] if i < 3 else f"{i+1}."
        
        test_return = result['test_avg_return_pct']
        color = "gold1" if i == 0 else "silver" if i == 1 else "orange3"
        
        console.print(f"{rank_emoji} [{color}]{algo_name}[/{color}] on [blue]{env_name}[/blue] "
                     f"with [yellow]{config_name}[/yellow]: [{color}]{test_return:.2f}%[/{color}]")

# Usage examples:
print_comprehensive_results_summary(comprehensive_results)
print_algorithm_comparison_table(comprehensive_results)
print_best_performers(comprehensive_results)

In [None]:
comprehensive_results["SAC"]["standard"]["custom"]["test_episodes"]

## BacktestRunner API Summary

### Method Overview

| Method | Parameter | Type | Description | Use Case |
|--------|-----------|------|-------------|----------|
| `run_single_experiment` | `env_config` | `Dict[str, Callable]` | Single environment config | Test one algorithm on one environment |
| `run_algorithm_comparison` | `env_config` | `Dict[str, Callable]` | Single environment config | Compare multiple algorithms on same environment |
| `run_preset_comparison` | `env_config` | `Dict[str, Callable]` | Single environment config | Compare different presets for same algorithm |
| `run_environment_comparison` | `env_configs` | `Dict[str, Dict[str, Callable]]` | Multiple environment configs | Test one algorithm across different environments |
| `run_comprehensive_backtest` | `env_configs` | `Dict[str, Dict[str, Callable]]` | Multiple environment configs | Full comparison: multiple algorithms × environments × presets |