In [None]:
# Setup and environment configuration
import sys
import os
from dotenv import load_dotenv

# Get relative paths
notebook_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in locals() else os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))

# Load environment variables
env_path = os.path.join(notebook_dir, '.env')
load_dotenv(env_path)

# Install FinRL package
%pip install -e {project_root} -q

# Add to path
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import finrl
print(f'Using finrl from: {os.path.dirname(finrl.__file__)}')
print(f'Project root: {project_root}')


Note: you may need to restart the kernel to use updated packages.
Using finrl from: /Users/ayushraj/Documents/Python/FinRL/FinRL/finrl


In [2]:
# Install required packages if not already available
%pip install optuna -q
%pip install stable-baselines3 -q

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
# Optuna Hyperparameter Tuning for FinRL
# This notebook demonstrates hyperparameter optimization for PPO, SAC, and TD3 algorithms
# Using real stock trading environments based on NeurIPS_test.ipynb

import sys
import os
import warnings
import numpy as np
import pandas as pd
import pickle
import json
import shutil
import optuna
from datetime import datetime
from stable_baselines3.common.logger import configure
warnings.filterwarnings('ignore')

# Add the parent directory to path to import finrl modules
sys.path.append('../')
sys.path.append('../finrl')

# Import FinRL modules (same as NeurIPS_test.ipynb)
from finrl.agents.stablebaselines3.tune_sb3 import TuneSB3Optuna, LoggingCallback
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl import config
from finrl.main import check_and_make_directories

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Verify that all required imports are working
print("üîç VERIFYING IMPORTS:")
try:
    import optuna
    print(f"‚úÖ Optuna version: {optuna.__version__}")
except ImportError as e:
    print(f"‚ùå Optuna import failed: {e}")
    
try:
    from finrl.agents.stablebaselines3.tune_sb3 import TuneSB3Optuna
    print("‚úÖ TuneSB3Optuna imported successfully")
except ImportError as e:
    print(f"‚ùå TuneSB3Optuna import failed: {e}")

try:
    from stable_baselines3 import PPO, SAC, TD3
    print("‚úÖ Stable-Baselines3 algorithms imported successfully")
except ImportError as e:
    print(f"‚ùå Stable-Baselines3 import failed: {e}")

print("üéØ All required imports verified!")

üîç VERIFYING IMPORTS:
‚úÖ Optuna version: 4.6.0
‚úÖ TuneSB3Optuna imported successfully
‚úÖ Stable-Baselines3 algorithms imported successfully
üéØ All required imports verified!


In [5]:
# Test hyperparameter samplers with dummy trial to verify fixes
import optuna
from finrl.agents.stablebaselines3 import hyperparams_opt as hpt

# Create dummy trial-like object with required attributes
class TestTrial:
    def __init__(self):
        self.n_actions = 30
        self.using_her_replay_buffer = False
        self.her_kwargs = {}
    
    def suggest_categorical(self, name, choices):
        return choices[0]
    
    def suggest_loguniform(self, name, a, b):
        return (a + b) / 2
    
    def suggest_uniform(self, name, a, b):
        return (a + b) / 2
    
    def suggest_int(self, name, a, b):
        return a

print("üß™ Testing hyperparameter samplers...")

# Test TD3 sampler
trial = TestTrial()
try:
    td3_params = hpt.sample_td3_params(trial)
    print("‚úÖ TD3 sampler works!")
    print(f"   TD3 params keys: {list(td3_params.keys())}")
    if 'action_noise' in td3_params:
        print(f"   action_noise: {td3_params['action_noise']} (type: {type(td3_params['action_noise'])})")
        if 'noise_std' in td3_params:
            print(f"   noise_std: {td3_params['noise_std']}")
except Exception as e:
    print(f"‚ùå TD3 sampler failed: {e}")

# Test SAC sampler
trial = TestTrial()
try:
    sac_params = hpt.sample_sac_params(trial)
    print("‚úÖ SAC sampler works!")
    print(f"   SAC params keys: {list(sac_params.keys())}")
except Exception as e:
    print(f"‚ùå SAC sampler failed: {e}")

print("üéØ Hyperparameter sampler tests completed!")

üß™ Testing hyperparameter samplers...
‚úÖ TD3 sampler works!
   TD3 params keys: ['gamma', 'learning_rate', 'batch_size', 'buffer_size', 'train_freq', 'gradient_steps', 'policy_kwargs', 'tau', 'action_noise', 'noise_std']
   action_noise: ornstein_uhlenbeck (type: <class 'str'>)
   noise_std: 0.5
‚úÖ SAC sampler works!
   SAC params keys: ['gamma', 'learning_rate', 'batch_size', 'buffer_size', 'learning_starts', 'train_freq', 'gradient_steps', 'ent_coef', 'tau', 'target_entropy', 'policy_kwargs']
üéØ Hyperparameter sampler tests completed!


In [10]:
from finrl.main import check_and_make_directories
from finrl import config

# Create necessary directories
check_and_make_directories([
    config.DATA_SAVE_DIR,
    config.TRAINED_MODEL_DIR,
    config.TENSORBOARD_LOG_DIR,
    config.RESULTS_DIR,
])

ALGORITHMS = ["ppo", "sac", "td3"]  # Algorithms to optimize

TOTAL_TIMESTEPS = 150000  # Balanced approach for 1-minute 6-month data

N_TRIALS = 15  # Number of optimization trials per algorithm

# Early stopping configuration
THRESHOLD = 0.01  # Sharpe ratio improvement threshold
TRIAL_NUMBER = 3  # Minimum trials before early stopping
PATIENCE = 2      # Patience for early stopping

In [11]:
# Model persistence and logging configuration
BEST_MODELS_DIR = os.path.join(config.TRAINED_MODEL_DIR, "best_optuna_models")
BEST_MODELS_METADATA_FILE = os.path.join(BEST_MODELS_DIR, "best_models_metadata.json")
LOGS_DIR = os.path.join(config.RESULTS_DIR, "optuna_logs")

# Create directories if they don't exist
os.makedirs(BEST_MODELS_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)

def load_best_models_metadata():
    """Load metadata about previously saved best models."""
    if os.path.exists(BEST_MODELS_METADATA_FILE):
        with open(BEST_MODELS_METADATA_FILE, 'r') as f:
            return json.load(f)
    return {}

def save_best_models_metadata(metadata):
    """Save metadata about best models."""
    with open(BEST_MODELS_METADATA_FILE, 'w') as f:
        json.dump(metadata, f, indent=2, default=str)

def get_best_model_path(algo):
    """Get the path for the best model of a given algorithm."""
    return os.path.join(BEST_MODELS_DIR, f"best_{algo}_model.zip")

def get_model_log_path(algo):
    """Get the log directory path for a given algorithm."""
    return os.path.join(LOGS_DIR, f"{algo}_logs")

def backup_previous_best_model(algo):
    """Backup the previous best model before replacing it."""
    model_path = get_best_model_path(algo)
    if os.path.exists(model_path):
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        backup_path = os.path.join(BEST_MODELS_DIR, f"backup_{algo}_model_{timestamp}.zip")
        shutil.copy2(model_path, backup_path)
        print(f"  üì¶ Previous best {algo.upper()} model backed up to: {backup_path}")
        return backup_path
    return None

def setup_model_logger(algo, trial_number=None):
    """Set up logger for model training with proper directory structure."""
    log_suffix = f"_trial_{trial_number}" if trial_number is not None else ""
    log_path = os.path.join(LOGS_DIR, f"{algo}{log_suffix}")
    os.makedirs(log_path, exist_ok=True)
    
    logger = configure(log_path, ["stdout", "csv", "tensorboard"])
    return logger, log_path

print("‚úÖ Model persistence system initialized.")
print(f"üìÅ Best models directory: {BEST_MODELS_DIR}")
print(f"üìÑ Metadata file: {BEST_MODELS_METADATA_FILE}")
print(f"üìä Logs directory: {LOGS_DIR}")

‚úÖ Model persistence system initialized.
üìÅ Best models directory: trained_models/best_optuna_models
üìÑ Metadata file: trained_models/best_optuna_models/best_models_metadata.json
üìä Logs directory: results/optuna_logs


In [None]:
def create_finrl_environments():
    """
    Create real FinRL trading environments using processed stock data.
    This follows the same pattern as NeurIPS_test.ipynb
    Filters training data to 2025-02-01 to 2025-07-30
    """
    import pandas as pd
    from finrl.config import INDICATORS
    from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
    
    # Use relative paths
    train_data_path = os.path.join(notebook_dir, 'train_data.csv')
    trade_data_path = os.path.join(notebook_dir, 'trade_data.csv')
    
    # Load training and trading data
    train = pd.read_csv(train_data_path, index_col=0)
    trade = pd.read_csv(trade_data_path, index_col=0)
    
    # Convert date column to datetime and filter train data to 2025-02-01 to 2025-08-01
    train['date'] = pd.to_datetime(train['date'])
    train = train[(train['date'] >= '2025-02-01') & (train['date'] <= '2025-08-01')]
    
    trade['date'] = pd.to_datetime(trade['date'])
    trade = trade[(trade['date'] >= '2025-08-01') & (trade['date'] <= '2025-11-01')]
    
    # Get unique dates and create day mapping
    train_dates = sorted(train['date'].unique())
    trade_dates = sorted(trade['date'].unique())
    
    # Create day index for train and trade data
    train['day'] = train['date'].map({date: i for i, date in enumerate(train_dates)})
    trade['day'] = trade['date'].map({date: i for i, date in enumerate(trade_dates)})
    
    # Set the day as index - this is what StockTradingEnv expects
    train = train.set_index('day')
    trade = trade.set_index('day')
    
    # Handle NaN values
    train = train.ffill()
    train = train.bfill()
    trade = trade.ffill()
    trade = trade.bfill()

    # Verify no NaN values
    assert not train.isna().any().any(), "NaN values found in training data"
    assert not trade.isna().any().any(), "NaN values found in trading data"
    
    # Calculate environment parameters
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
    
    # Environment parameters
    buy_cost_list = sell_cost_list = [0.001] * stock_dimension
    num_stock_shares = [0] * stock_dimension
    
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "num_stock_shares": num_stock_shares,
        "buy_cost_pct": buy_cost_list,
        "sell_cost_pct": sell_cost_list,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": INDICATORS,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    
    # Create training environment (vectorized for SB3)
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    
    # Create trading environment (raw for backtesting)
    env_trade = StockTradingEnv(df=trade, turbulence_threshold=70, risk_indicator_col='vixy', **env_kwargs)
    
    return env_train, env_trade

env_train, env_trade = create_finrl_environments()


In [None]:
# DIMENSION VERIFICATION - Run this before training to ensure compatibility
print("="*70)
print("üîç VERIFYING ENVIRONMENT DIMENSIONS")
print("="*70)

# Load the same data that create_finrl_environments uses
import pandas as pd
from finrl.config import INDICATORS

# Use relative path
train_data_path = os.path.join(notebook_dir, 'train_data.csv')
train = pd.read_csv(train_data_path, index_col=0)
train['date'] = pd.to_datetime(train['date'])
train_filtered = train[(train['date'] >= '2025-02-01') & (train['date'] <= '2025-08-01')]

# Compute dimensions
stock_dimension = len(train_filtered['tic'].unique())
n_indicators = len(INDICATORS)
expected_state_space = 1 + 2*stock_dimension + n_indicators*stock_dimension

print(f"\nüìä COMPUTED DIMENSIONS:")
print(f"  Stock dimension: {stock_dimension}")
print(f"  Number of indicators: {n_indicators}")
print(f"  Expected state space: {expected_state_space}")
print(f"    Formula: 1 + 2√ó{stock_dimension} + {n_indicators}√ó{stock_dimension} = {expected_state_space}")

# Verify against actual environment
print(f"\nüîé VERIFYING AGAINST ENVIRONMENT:")
try:
    # Check vectorized training environment
    if hasattr(env_train, 'envs') and len(env_train.envs) > 0:
        # VecEnv wrapper
        inner_env = env_train.envs[0]
        obs_space = inner_env.observation_space
        act_space = inner_env.action_space
        print(f"  Training env type: VecEnv (vectorized)")
    else:
        # Direct env
        obs_space = env_train.observation_space
        act_space = env_train.action_space
        print(f"  Training env type: Direct environment")
    
    # Get observation space shape
    obs_shape = obs_space.shape[0] if hasattr(obs_space, 'shape') else None
    print(f"  Observation space shape: {obs_space.shape if hasattr(obs_space, 'shape') else 'N/A'}")
    
    # Get action space dimension
    if hasattr(act_space, 'shape') and act_space.shape:
        act_dim = act_space.shape[0]
    elif hasattr(act_space, 'n'):
        act_dim = act_space.n
    else:
        act_dim = None
    print(f"  Action space: {act_space}")
    print(f"  Action dimension: {act_dim}")
    
    # Verification checks
    print(f"\n‚úÖ VERIFICATION RESULTS:")
    
    if obs_shape is not None:
        if obs_shape == expected_state_space:
            print(f"  ‚úÖ Observation space matches: {obs_shape} == {expected_state_space}")
        else:
            print(f"  ‚ö†Ô∏è  MISMATCH: Observation space {obs_shape} != expected {expected_state_space}")
            print(f"      Difference: {obs_shape - expected_state_space}")
    else:
        print(f"  ‚ö†Ô∏è  Could not determine observation space shape")
    
    if act_dim is not None:
        if act_dim == stock_dimension:
            print(f"  ‚úÖ Action space matches: {act_dim} == {stock_dimension}")
        else:
            print(f"  ‚ö†Ô∏è  MISMATCH: Action space {act_dim} != expected {stock_dimension}")
    else:
        print(f"  ‚ö†Ô∏è  Could not determine action space dimension")
    
    # Display indicator list for reference
    print(f"\nüìã TECHNICAL INDICATORS ({n_indicators} total):")
    for i, ind in enumerate(INDICATORS, 1):
        print(f"  {i}. {ind}")
    
    # Test a sample observation reset
    print(f"\nüß™ TESTING ENVIRONMENT RESET:")
    if hasattr(env_train, 'reset'):
        sample_obs = env_train.reset()
        if isinstance(sample_obs, tuple):
            sample_obs = sample_obs[0]  # Handle gym/gymnasium API differences
        if hasattr(sample_obs, 'shape'):
            print(f"  Sample observation shape: {sample_obs.shape}")
            print(f"  Sample observation type: {type(sample_obs)}")
        else:
            print(f"  Sample observation: {sample_obs[:10] if len(sample_obs) > 10 else sample_obs}...")
    
except Exception as e:
    print(f"\n‚ùå Verification error: {e}")
    import traceback
    traceback.print_exc()

print("\n" + "="*70)
print("‚úÖ DIMENSION VERIFICATION COMPLETE")
print("="*70)


üîç VERIFYING ENVIRONMENT DIMENSIONS

üìä COMPUTED DIMENSIONS:
  Stock dimension: 30
  Number of indicators: 8
  Expected state space: 301
    Formula: 1 + 2√ó30 + 8√ó30 = 301

üîé VERIFYING AGAINST ENVIRONMENT:
  Training env type: VecEnv (vectorized)
  Observation space shape: (301,)
  Action space: Box(-1.0, 1.0, (30,), float32)
  Action dimension: 30

‚úÖ VERIFICATION RESULTS:
  ‚úÖ Observation space matches: 301 == 301
  ‚úÖ Action space matches: 30 == 30

üìã TECHNICAL INDICATORS (8 total):
  1. macd
  2. boll_ub
  3. boll_lb
  4. rsi_30
  5. cci_30
  6. dx_30
  7. close_30_sma
  8. close_60_sma

üß™ TESTING ENVIRONMENT RESET:
  Sample observation shape: (1, 301)
  Sample observation type: <class 'numpy.ndarray'>

‚úÖ DIMENSION VERIFICATION COMPLETE

üìä COMPUTED DIMENSIONS:
  Stock dimension: 30
  Number of indicators: 8
  Expected state space: 301
    Formula: 1 + 2√ó30 + 8√ó30 = 301

üîé VERIFYING AGAINST ENVIRONMENT:
  Training env type: VecEnv (vectorized)
  Observati

In [None]:
def run_optuna_optimization(env_train, env_trade, algorithms=None):
    """
    Run Optuna hyperparameter optimization for specified algorithms.
    Now includes comprehensive model saving, logging, and best model persistence.
    
    Args:
        env_train: Training environment
        env_trade: Trading/testing environment  
        algorithms: List of algorithms to optimize (default: ["ppo", "sac", "td3"])
    
    Returns:
        dict: Results for each algorithm
    """
    if algorithms is None:
        algorithms = ALGORITHMS
    
    if env_train is None or env_trade is None:
        return {}
    
    # Load existing best models metadata
    best_models_metadata = load_best_models_metadata()
    results = {}
    
    for algo in algorithms:
        print(f"\n{'='*60}")
        print(f"üöÄ Starting {algo.upper()} optimization...")
        print(f"{'='*60}")
        
        # Check if we have a previous best model
        previous_best_sharpe = None
        if algo in best_models_metadata:
            previous_best_sharpe = best_models_metadata[algo].get('best_sharpe', None)
            if previous_best_sharpe is not None:
                print(f"üìä Previous best {algo.upper()} Sharpe ratio: {previous_best_sharpe:.4f}")
                print(f"üìÖ Saved on: {best_models_metadata[algo].get('timestamp', 'Unknown')}")
        
        try:
            # Setup logging for this optimization run
            run_logger, log_path = setup_model_logger(algo, "optuna_run")
            print(f"üìù Logs will be saved to: {log_path}")
            
            logging_cb = LoggingCallback(
                threshold=THRESHOLD,
                trial_number=TRIAL_NUMBER,
                patience=PATIENCE
            )
            
            # Create custom TuneSB3Optuna with enhanced logging and model saving
            class EnhancedTuneSB3Optuna(TuneSB3Optuna):
                def __init__(self, *args, **kwargs):
                    super().__init__(*args, **kwargs)
                    self.algo_name = kwargs.get('model_name', 'unknown')
                    self.run_logger = run_logger
                    self.trial_models = {}  # Store trial models for later reference
                
                def objective(self, trial: optuna.Trial):
                    """Override objective to add enhanced logging and model saving for each trial."""
                    print(f"  üîÑ Trial {trial.number}: Starting {self.algo_name.upper()} optimization...")

                    # Ensure trial has environment-dependent attributes that some samplers expect
                    try:
                        n_actions = None
                        if hasattr(self.env_train, "action_space") and getattr(self.env_train.action_space, "shape", None) is not None:
                            n_actions = int(self.env_train.action_space.shape[-1])
                        else:
                            if hasattr(self.env_train, "envs") and len(self.env_train.envs) > 0:
                                n_actions = int(self.env_train.envs[0].action_space.shape[-1])
                    except Exception:
                        n_actions = None

                    try:
                        setattr(trial, "n_actions", n_actions)
                    except Exception:
                        pass

                    # Default to not using HER unless explicitly set elsewhere
                    try:
                        if not hasattr(trial, "using_her_replay_buffer"):
                            setattr(trial, "using_her_replay_buffer", False)
                    except Exception:
                        pass

                    try:
                        if not hasattr(trial, "her_kwargs"):
                            setattr(trial, "her_kwargs", {})
                    except Exception:
                        pass

                    # Sample hyperparameters (same as original)
                    hyperparameters = self.default_sample_hyperparameters(trial)
                    policy_kwargs = hyperparameters.get("policy_kwargs", {})
                    if "policy_kwargs" in hyperparameters:
                        del hyperparameters["policy_kwargs"]
                    
                    print(f"  üìä Trial {trial.number} hyperparameters: {hyperparameters}")
                    
                    # Create model with trial hyperparameters
                    model = self.agent.get_model(
                        self.model_name, policy_kwargs=policy_kwargs, model_kwargs=hyperparameters
                    )
                    
                    # Setup trial-specific logger
                    trial_logger, trial_log_path = setup_model_logger(self.algo_name, trial.number)
                    model.set_logger(trial_logger)
                    
                    # Train the model
                    print(f"  üéØ Trial {trial.number}: Training for {self.total_timesteps:,} timesteps...")
                    trained_model = self.agent.train_model(
                        model=model,
                        tb_log_name=f"{self.model_name}_trial_{trial.number}",
                        total_timesteps=self.total_timesteps,
                    )
                    
                    # === SAVE TRIAL MODEL BEFORE ANY DELETION ===
                    # Save trial model (use same format as original tune_sb3.py - .pth files)
                    # This ensures compatibility with the original backtest method
                    trial_model_path = f"./{config.TRAINED_MODEL_DIR}/{self.model_name}_{trial.number}.pth"
                    print(f"  üíæ Trial {trial.number}: Saving model to {trial_model_path}...")
                    trained_model.save(trial_model_path)
                    
                    # Verify primary model file was saved successfully
                    if not os.path.exists(trial_model_path):
                        raise RuntimeError(f"Failed to save trial model to {trial_model_path}")
                    
                    print(f"  ‚úÖ Trial {trial.number} model saved to: {trial_model_path}")
                    
                    # Also save a backup copy in our enhanced format for better organization
                    backup_trial_path = os.path.join(BEST_MODELS_DIR, f"trial_{self.algo_name}_{trial.number}.zip")
                    print(f"  üíæ Trial {trial.number}: Saving backup to {backup_trial_path}...")
                    trained_model.save(backup_trial_path)
                    
                    # Verify backup was saved successfully
                    if not os.path.exists(backup_trial_path):
                        print(f"  ‚ö†Ô∏è  Warning: Backup save to {backup_trial_path} may have failed")
                    else:
                        print(f"  ‚úÖ Backup saved successfully")
                    
                    # Store model reference for potential later use
                    self.trial_models[trial.number] = {
                        'model_path': trial_model_path,  # Use original format path for compatibility
                        'backup_path': backup_trial_path,  # Our enhanced backup
                        'hyperparameters': hyperparameters,
                        'policy_kwargs': policy_kwargs
                    }
                    
                    # Backtest this trial's model on trade environment
                    print(f"  üìà Trial {trial.number}: Backtesting on trade data...")
                    df_account_value, _ = DRLAgent.DRL_prediction(
                        model=trained_model, environment=self.env_trade
                    )
                    
                    # Calculate Sharpe ratio for this trial
                    sharpe = self.calculate_sharpe(df_account_value)
                    print(f"  üìä Trial {trial.number}: Sharpe ratio = {sharpe:.4f}")
                    
                    # === MEMORY CLEANUP AFTER SAVING AND BACKTESTING ===
                    print(f"  üßπ Trial {trial.number}: Cleaning up memory...")
                    
                    # Delete model objects to free memory (models already saved to disk)
                    del trained_model
                    del model
                    if 'df_account_value' in locals():
                        del df_account_value
                    
                    # Force Python garbage collection
                    import gc
                    gc.collect()
                    
                    # Clear CUDA cache if available
                    try:
                        import torch
                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()
                            torch.cuda.synchronize()
                            print(f"  ‚úÖ CUDA memory cleared")
                    except Exception as e:
                        pass  # No CUDA or torch not available
                    
                    # Additional aggressive cleanup for PyTorch
                    try:
                        import torch
                        # Clear all cached memory
                        if hasattr(torch.cuda, 'ipc_collect'):
                            torch.cuda.ipc_collect()
                        # Reset peak memory stats
                        if hasattr(torch.cuda, 'reset_peak_memory_stats'):
                            torch.cuda.reset_peak_memory_stats()
                    except Exception:
                        pass
                    
                    print(f"  ‚úÖ Memory cleanup complete for trial {trial.number}")
                    
                    return sharpe
                
                def get_best_trial_model(self, study):
                    """Load and return the best trial model using the same method as original backtest."""
                    best_trial_num = study.best_trial.number
                    # Use the same model loading approach as the original backtest method
                    best_model_path = f"./{config.TRAINED_MODEL_DIR}/{self.model_name}_{best_trial_num}.pth"
                    
                    # Verify the best model file exists before loading
                    if not os.path.exists(best_model_path):
                        raise FileNotFoundError(f"Best trial model not found at {best_model_path}")
                    
                    # Load using the same method as original TuneSB3Optuna.backtest()
                    best_model = self.MODELS[self.model_name].load(
                        best_model_path,
                        env=self.env_train
                    )
                    return best_model
            
            tuner = EnhancedTuneSB3Optuna(
                env_train=env_train,
                model_name=algo,
                env_trade=env_trade,
                logging_callback=logging_cb,
                total_timesteps=TOTAL_TIMESTEPS,
                n_trials=N_TRIALS,
            )
            
            # Run optimization (this will call our enhanced objective method for each trial)
            print(f"üîç Running {N_TRIALS} trials for {algo.upper()}...")
            study = tuner.run_optuna()
            
            print(f"‚úÖ {algo.upper()} optimization completed!")
            print(f"üìä Best trial: {study.best_trial.number} with Sharpe: {study.best_value:.4f}")
            
            # Get the actual best model (not re-backtest, just load it)
            best_model = tuner.get_best_trial_model(study)
            
            # Perform final backtesting on best model (this is what the original backtest method does)
            print(f"üìà Final backtesting of best {algo.upper()} model...")
            df_account_value, df_actions, perf_stats = tuner.backtest(study)
            
            current_best_sharpe = study.best_value
            
            # Determine if this is a new best model
            is_new_best = True
            improvement = 0
            if previous_best_sharpe is not None:
                is_new_best = current_best_sharpe > previous_best_sharpe
                improvement = current_best_sharpe - previous_best_sharpe
            else:
                improvement = current_best_sharpe
            
            if is_new_best:
                print(f"üéâ NEW BEST {algo.upper()} MODEL FOUND!")
                print(f"   Current Sharpe: {current_best_sharpe:.4f}")
                if previous_best_sharpe is not None:
                    print(f"   Previous best: {previous_best_sharpe:.4f}")
                    print(f"   Improvement: +{improvement:.4f}")
                
                # Backup previous model if it exists
                backup_path = backup_previous_best_model(algo)
                
                # Save the actual best model (from best trial)
                best_model_path = get_best_model_path(algo)
                best_model.save(best_model_path)
                print(f"üíæ New best {algo.upper()} model saved to: {best_model_path}")
                
                # Save Optuna study
                study_path = os.path.join(BEST_MODELS_DIR, f"best_{algo}_study.pkl")
                with open(study_path, 'wb') as f:
                    pickle.dump(study, f)
                print(f"üìä Optuna study saved to: {study_path}")
                
                # Update metadata
                best_models_metadata[algo] = {
                    'best_sharpe': current_best_sharpe,
                    'best_params': study.best_params,
                    'best_trial': study.best_trial.number,
                    'model_path': best_model_path,
                    'study_path': study_path,
                    'backup_path': backup_path,
                    'log_path': log_path,
                    'timestamp': datetime.now().isoformat(),
                    'total_trials': len(study.trials),
                    'improvement': improvement,
                    'training_timesteps': TOTAL_TIMESTEPS,
                    'performance_stats': perf_stats
                }
                
                status = "NEW_BEST"
                print(f"‚úÖ {algo.upper()} optimization completed successfully!")
                
            else:
                print(f"üìâ No improvement for {algo.upper()}")
                print(f"   Current Sharpe: {current_best_sharpe:.4f}")
                print(f"   Previous best: {previous_best_sharpe:.4f}")
                print(f"   Difference: {improvement:.4f}")
                print(f"üîÑ Keeping previous best model")
                
                status = "NO_IMPROVEMENT"
            
            # Store results
            results[algo] = {
                'study': study,
                'best_params': study.best_params,
                'best_value': current_best_sharpe,
                'best_trial': study.best_trial,
                'account_value': df_account_value,
                'actions': df_actions,
                'performance_stats': perf_stats,
                'is_new_best': is_new_best,
                'previous_best_sharpe': previous_best_sharpe,
                'improvement': improvement,
                'status': status,
                'model_saved': is_new_best,
                'log_path': log_path,
                'trial_models': tuner.trial_models  # Include trial model info
            }
            
            print(f"üìã {algo.upper()} Summary: Sharpe {current_best_sharpe:.4f} ({status})")
            
            # Clean up after algorithm optimization completes
            print(f"üßπ Cleaning up memory after {algo.upper()} optimization...")
            del best_model
            import gc
            gc.collect()
            try:
                import torch
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    torch.cuda.synchronize()
            except Exception:
                pass
            
        except Exception as e:
            print(f"‚ùå {algo.upper()} optimization failed: {str(e)}")
            print(f"üìã Error details saved to logs")
            
            # Save error information
            error_info = {
                'error': str(e),
                'timestamp': datetime.now().isoformat(),
                'algorithm': algo,
                'total_timesteps': TOTAL_TIMESTEPS,
                'n_trials': N_TRIALS
            }
            
            error_log_path = os.path.join(LOGS_DIR, f"{algo}_error_log.json")
            with open(error_log_path, 'w') as f:
                json.dump(error_info, f, indent=2)
            
            results[algo] = {
                'error': str(e),
                'error_log_path': error_log_path,
                'status': 'FAILED'
            }
            continue
    
    # Save updated metadata
    save_best_models_metadata(best_models_metadata)
    print(f"\n{'='*60}")
    print("üíæ Best models metadata updated")
    print(f"üìÑ Metadata file: {BEST_MODELS_METADATA_FILE}")
    print(f"{'='*60}")
    
    return results

if env_train is not None and env_trade is not None:
    print("‚úÖ Environments ready for optimization.")
    
    # Display current best models info
    current_metadata = load_best_models_metadata()
    if current_metadata:
        print(f"\nüìä CURRENT BEST MODELS:")
        print(f"{'Algorithm':<10} {'Sharpe Ratio':<12} {'Date Saved':<12} {'Trials':<8}")
        print("-" * 50)
        for algo, info in current_metadata.items():
            date_saved = info.get('timestamp', '')[:10] if info.get('timestamp') else 'Unknown'
            trials = info.get('total_trials', 'N/A')
            sharpe = info.get('best_sharpe', 0)
            print(f"{algo.upper():<10} {sharpe:<12.4f} {date_saved:<12} {trials:<8}")
    else:
        print("‚ÑπÔ∏è  No previous best models found. Starting fresh optimization.")


‚úÖ Environments ready for optimization.
‚ÑπÔ∏è  No previous best models found. Starting fresh optimization.


In [19]:
optimization_results = run_optuna_optimization(env_train, env_trade, ALGORITHMS)

print("\nOptimization Summary:")
for algo, result in optimization_results.items():
    if 'error' in result:
        print(f"{algo.upper()}: Failed - {result['error']}")
    else:
        print(f"{algo.upper()}: Best Sharpe = {result['best_value']:.4f} (Trial {result['best_trial'].number})")

print(f"\nResults saved in:")
print(f"  Trained models: {config.TRAINED_MODEL_DIR}")
print(f"  Study files: Current directory (*_study.pkl)")
print(f"  Performance stats: {config.RESULTS_DIR}")


üöÄ Starting PPO optimization...
Logging to results/optuna_logs/ppo_trial_optuna_run


[I 2025-11-23 23:48:39,260] A new study created in memory with name: ppo_study


üìù Logs will be saved to: results/optuna_logs/ppo_trial_optuna_run
üîç Running 15 trials for PPO...
  üîÑ Trial 0: Starting PPO optimization...
  üìä Trial 0 hyperparameters: {'n_steps': 2048, 'batch_size': 256, 'gamma': 0.999, 'learning_rate': 0.00011526449540315612, 'ent_coef': 1.8740223688836284e-07, 'clip_range': 0.3, 'n_epochs': 5, 'gae_lambda': 0.92, 'max_grad_norm': 0.8, 'vf_coef': 0.4401524937396013}
{'n_steps': 2048, 'batch_size': 256, 'gamma': 0.999, 'learning_rate': 0.00011526449540315612, 'ent_coef': 1.8740223688836284e-07, 'clip_range': 0.3, 'n_epochs': 5, 'gae_lambda': 0.92, 'max_grad_norm': 0.8, 'vf_coef': 0.4401524937396013}
Using mps device
Logging to results/optuna_logs/ppo_trial_0
  üéØ Trial 0: Training for 150,000 timesteps...
  üéØ Trial 0: Training for 150,000 timesteps...
--------------------------------------
| time/              |               |
|    fps             | 18            |
|    iterations      | 1             |
|    time_elapsed    | 111    

[W 2025-11-24 00:46:58,527] Trial 0 failed with parameters: {'batch_size': 256, 'n_steps': 2048, 'gamma': 0.999, 'learning_rate': 0.00011526449540315612, 'ent_coef': 1.8740223688836284e-07, 'clip_range': 0.3, 'n_epochs': 5, 'gae_lambda': 0.92, 'max_grad_norm': 0.8, 'vf_coef': 0.4401524937396013, 'net_arch': 'medium', 'activation_fn': 'relu'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/ayushraj/Documents/Python/FinRL/venv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/qr/80j1cy4s5q542dmdc5lwzzqc0000gn/T/ipykernel_76885/3147036171.py", line 108, in objective
    trained_model = self.agent.train_model(
                    ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/ayushraj/Documents/Python/FinRL/FinRL/finrl/agents/stablebaselines3/models.py", line 147, in train_model
    model = model.learn(
            ^^^^^^^^^^

KeyboardInterrupt: 

In [None]:
# Results Analysis and Visualization
# Analyze the optimization results and create visualizations

import matplotlib.pyplot as plt
import seaborn as sns

def analyze_optimization_results(results):
    """
    Analyze and visualize optimization results.
    """
    if not results:
        print("‚ùå No results to analyze")
        return
    
    print("üìä Detailed Results Analysis:")
    print("="*60)
    
    # Create comparison dataframe
    comparison_data = []
    
    for algo, result in results.items():
        if 'error' not in result:
            comparison_data.append({
                'Algorithm': algo.upper(),
                'Best_Sharpe': result['best_value'],
                'Best_Trial': result['best_trial'].number,
                'Total_Trials': len(result['study'].trials)
            })
            
            print(f"\\nüéØ {algo.upper()} Results:")
            print(f"   - Best Sharpe Ratio: {result['best_value']:.4f}")
            print(f"   - Best Trial Number: {result['best_trial'].number}")
            print(f"   - Total Trials: {len(result['study'].trials)}")
            print(f"   - Best Parameters:")
            for param, value in result['best_params'].items():
                print(f"     ‚Ä¢ {param}: {value}")
    
    if comparison_data:
        comparison_df = pd.DataFrame(comparison_data)
        
        # Create visualization
        plt.figure(figsize=(12, 8))
        
        # Plot 1: Best Sharpe Ratio Comparison
        plt.subplot(2, 2, 1)
        sns.barplot(data=comparison_df, x='Algorithm', y='Best_Sharpe')
        plt.title('Best Sharpe Ratio by Algorithm')
        plt.ylabel('Sharpe Ratio')
        
        # Plot 2: Trial Convergence (if available)
        plt.subplot(2, 2, 2)
        for algo, result in results.items():
            if 'error' not in result:
                trial_values = [trial.value for trial in result['study'].trials if trial.value is not None]
                plt.plot(trial_values, label=algo.upper(), marker='o', alpha=0.7)
        plt.title('Optimization Progress')
        plt.xlabel('Trial')
        plt.ylabel('Sharpe Ratio')
        plt.legend()
        
        # Plot 3: Best Trial Numbers
        plt.subplot(2, 2, 3)
        sns.barplot(data=comparison_df, x='Algorithm', y='Best_Trial')
        plt.title('Best Trial Number by Algorithm')
        plt.ylabel('Trial Number')
        
        # Plot 4: Total Trials
        plt.subplot(2, 2, 4)
        sns.barplot(data=comparison_df, x='Algorithm', y='Total_Trials')
        plt.title('Total Trials by Algorithm')
        plt.ylabel('Number of Trials')
        
        plt.tight_layout()
        plt.show()
        
        # Display comparison table
        print(f"\\nüìã Summary Table:")
        print(comparison_df.to_string(index=False))
        
        # Find best overall algorithm
        best_algo = comparison_df.loc[comparison_df['Best_Sharpe'].idxmax()]
        print(f"\\nüèÜ Best Overall Algorithm: {best_algo['Algorithm']}")
        print(f"   - Sharpe Ratio: {best_algo['Best_Sharpe']:.4f}")

# Analyze results if optimization was run
if 'optimization_results' in locals() and optimization_results:
    analyze_optimization_results(optimization_results)
else:
    print("‚ö†Ô∏è  No optimization results found. Run the optimization first.")

In [None]:
# Model Management and Utilities
def load_best_model(algo):
    """Load the best saved model for a given algorithm."""
    from stable_baselines3 import PPO, SAC, TD3
    
    model_classes = {
        'ppo': PPO,
        'sac': SAC, 
        'td3': TD3
    }
    
    if algo.lower() not in model_classes:
        raise ValueError(f"Unsupported algorithm: {algo}")
    
    model_path = get_best_model_path(algo.lower())
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"No best model found for {algo.upper()}")
    
    ModelClass = model_classes[algo.lower()]
    model = ModelClass.load(model_path)
    print(f"‚úÖ Loaded best {algo.upper()} model from: {model_path}")
    return model

def get_model_performance_summary():
    """Get a comprehensive performance summary of all best models."""
    metadata = load_best_models_metadata()
    
    if not metadata:
        print("‚ÑπÔ∏è  No best models found.")
        return None
    
    print(f"\n{'='*80}")
    print("üìä COMPREHENSIVE MODEL PERFORMANCE SUMMARY")
    print(f"{'='*80}")
    
    summary_data = []
    
    for algo, info in metadata.items():
        summary_data.append({
            'Algorithm': algo.upper(),
            'Best_Sharpe': info.get('best_sharpe', 0),
            'Improvement': info.get('improvement', 0),
            'Best_Trial': info.get('best_trial', 'N/A'),
            'Total_Trials': info.get('total_trials', 'N/A'),
            'Training_Steps': info.get('training_timesteps', 'N/A'),
            'Date_Saved': info.get('timestamp', '')[:19] if info.get('timestamp') else 'Unknown'
        })
        
        print(f"\nüéØ {algo.upper()} DETAILS:")
        print(f"   Best Sharpe Ratio: {info.get('best_sharpe', 0):.4f}")
        print(f"   Improvement: +{info.get('improvement', 0):.4f}")
        print(f"   Best Trial: {info.get('best_trial', 'N/A')}")
        print(f"   Total Trials: {info.get('total_trials', 'N/A')}")
        print(f"   Training Steps: {info.get('training_timesteps', 'N/A'):,}")
        print(f"   Date Saved: {info.get('timestamp', 'Unknown')[:19]}")
        print(f"   Model Path: {info.get('model_path', 'N/A')}")
        print(f"   Log Path: {info.get('log_path', 'N/A')}")
        
        # Show best hyperparameters
        best_params = info.get('best_params', {})
        if best_params:
            print(f"   Best Parameters:")
            for param, value in best_params.items():
                print(f"     ‚Ä¢ {param}: {value}")
    
    # Create summary DataFrame
    summary_df = pd.DataFrame(summary_data)
    
    if len(summary_df) > 0:
        print(f"\nüìã SUMMARY TABLE:")
        print(summary_df.to_string(index=False))
        
        # Find best overall algorithm
        best_idx = summary_df['Best_Sharpe'].idxmax()
        best_algo_info = summary_df.loc[best_idx]
        
        print(f"\nüèÜ BEST OVERALL ALGORITHM: {best_algo_info['Algorithm']}")
        print(f"   Sharpe Ratio: {best_algo_info['Best_Sharpe']:.4f}")
        print(f"   Date Achieved: {best_algo_info['Date_Saved']}")
    
    return summary_df

def clean_old_trial_models(keep_best_n=5):
    """Clean up old trial models, keeping only the best N trials for each algorithm."""
    print(f"üßπ Cleaning old trial models (keeping top {keep_best_n} for each algorithm)...")
    
    trial_files = [f for f in os.listdir(BEST_MODELS_DIR) if f.startswith('trial_') and f.endswith('.zip')]
    
    # Group by algorithm
    algo_trials = {}
    for file in trial_files:
        parts = file.replace('trial_', '').replace('.zip', '').split('_')
        if len(parts) >= 2:
            algo = parts[0]
            trial_num = parts[1]
            if algo not in algo_trials:
                algo_trials[algo] = []
            algo_trials[algo].append((file, trial_num))
    
    cleaned_count = 0
    for algo, trials in algo_trials.items():
        if len(trials) > keep_best_n:
            # Sort by trial number (assuming higher trial numbers are more recent)
            trials.sort(key=lambda x: int(x[1]) if x[1].isdigit() else 0, reverse=True)
            
            # Remove older trials
            for file, trial_num in trials[keep_best_n:]:
                file_path = os.path.join(BEST_MODELS_DIR, file)
                if os.path.exists(file_path):
                    os.remove(file_path)
                    cleaned_count += 1
                    print(f"  üóëÔ∏è  Removed {file}")
    
    print(f"‚úÖ Cleanup complete. Removed {cleaned_count} old trial models.")

# Display current status
print("üîß Model management utilities loaded:")
print("  ‚Ä¢ load_best_model(algo) - Load best saved model")
print("  ‚Ä¢ get_model_performance_summary() - Show comprehensive performance summary")  
print("  ‚Ä¢ clean_old_trial_models(keep_best_n=5) - Clean up old trial models")

In [None]:
# Example Usage and Quick Start Guide

print("üöÄ FINRL OPTUNA ENHANCED - QUICK START GUIDE")
print("=" * 60)

print("\n1Ô∏è‚É£ RUN OPTIMIZATION:")
print("   optimization_results = run_optuna_optimization(env_train, env_trade, ALGORITHMS)")
print("   # This will automatically save best models and logs")

print("\n2Ô∏è‚É£ ANALYZE RESULTS:")
print("   analyze_optimization_results(optimization_results)")
print("   # Shows detailed analysis with visualizations")

print("\n3Ô∏è‚É£ GET PERFORMANCE SUMMARY:")
print("   summary_df = get_model_performance_summary()")
print("   # Shows comprehensive model performance across all runs")

print("\n4Ô∏è‚É£ LOAD BEST MODEL:")
print("   best_ppo = load_best_model('ppo')")
print("   # Loads the best saved PPO model")

print("\n5Ô∏è‚É£ CLEAN OLD FILES:")
print("   clean_old_trial_models(keep_best_n=3)")
print("   # Keeps only top 3 trial models per algorithm")

print("\nüìÅ KEY DIRECTORIES:")
print(f"   Best Models: {BEST_MODELS_DIR}")
print(f"   Logs: {LOGS_DIR}")
print(f"   Metadata: {BEST_MODELS_METADATA_FILE}")

print("\n‚úÖ Ready to run! Execute the cells above to start optimization.")

# Show current system status
if len(load_best_models_metadata()) > 0:
    print(f"\nüìä Current best models summary:")
    get_model_performance_summary()
else:
    print(f"\n‚ÑπÔ∏è  No previous models found. Ready for first optimization run!")

In [None]:
# INSPECT TRAINED MODEL DIMENSIONS
# Analyze the dimensions of a saved model

from stable_baselines3 import PPO, SAC
import os

# Use relative path - update this to point to your model file
model_filename = 'sac_best_model.zip'  # Change this to your model filename
model_path = os.path.join(project_root, 'trained_models', model_filename)

print("="*70)
print("üîç ANALYZING TRAINED MODEL DIMENSIONS")
print("="*70)

# Check if model file exists
if not os.path.exists(model_path):
    print(f"‚ùå Model file not found: {model_path}")
    print(f"   Please update the model_filename variable to point to your trained model")
else:
    print(f"‚úÖ Model file found: {os.path.basename(model_path)}")
    file_size = os.path.getsize(model_path) / (1024 * 1024)  # Convert to MB
    print(f"   File size: {file_size:.2f} MB")
    
    try:
        # Load the model without environment (to inspect saved parameters)
        print("\nüì¶ Loading model...")
        model = SAC.load(model_path)  # Change to PPO or TD3 if needed
        
        # Get observation and action space from model
        print("\nüìä MODEL DIMENSIONS:")
        
        # Observation space
        if hasattr(model, 'observation_space'):
            obs_space = model.observation_space
            print(f"  Observation space: {obs_space}")
            if hasattr(obs_space, 'shape'):
                print(f"  Observation dimension: {obs_space.shape[0]}")
        else:
            print("  ‚ö†Ô∏è  Could not determine observation space")
        
        # Action space
        if hasattr(model, 'action_space'):
            act_space = model.action_space
            print(f"  Action space: {act_space}")
            if hasattr(act_space, 'shape') and act_space.shape:
                print(f"  Action dimension: {act_space.shape[0]}")
            elif hasattr(act_space, 'n'):
                print(f"  Action dimension: {act_space.n}")
        else:
            print("  ‚ö†Ô∏è  Could not determine action space")
        
        # Policy network architecture
        print("\nüèóÔ∏è  POLICY NETWORK ARCHITECTURE:")
        if hasattr(model, 'policy'):
            policy = model.policy
            
            # Check for actor network (policy network)
            if hasattr(policy, 'mlp_extractor'):
                mlp = policy.mlp_extractor
                print(f"  Policy type: {type(policy).__name__}")
                
                # Get network layers
                if hasattr(mlp, 'policy_net'):
                    print(f"  Policy network: {mlp.policy_net}")
                if hasattr(mlp, 'value_net'):
                    print(f"  Value network: {mlp.value_net}")
            
            # Get network features
            if hasattr(policy, 'features_dim'):
                print(f"  Features dimension: {policy.features_dim}")
        
        # Model hyperparameters
        print("\n‚öôÔ∏è  MODEL HYPERPARAMETERS:")
        if hasattr(model, 'learning_rate'):
            lr = model.learning_rate
            if callable(lr):
                print(f"  Learning rate: {lr(1.0)} (schedule)")
            else:
                print(f"  Learning rate: {lr}")
        
        if hasattr(model, 'n_steps'):
            print(f"  N steps: {model.n_steps}")
        if hasattr(model, 'batch_size'):
            print(f"  Batch size: {model.batch_size}")
        if hasattr(model, 'n_epochs'):
            print(f"  N epochs: {model.n_epochs}")
        if hasattr(model, 'gamma'):
            print(f"  Gamma (discount): {model.gamma}")
        if hasattr(model, 'gae_lambda'):
            print(f"  GAE lambda: {model.gae_lambda}")
        if hasattr(model, 'clip_range'):
            clip = model.clip_range
            if callable(clip):
                print(f"  Clip range: {clip(1.0)} (schedule)")
            else:
                print(f"  Clip range: {clip}")
        
        # Compute expected state space from observation dimension
        if hasattr(model, 'observation_space') and hasattr(model.observation_space, 'shape'):
            obs_dim = model.observation_space.shape[0]
            
            print(f"\nüßÆ REVERSE ENGINEERING ENVIRONMENT PARAMETERS:")
            print(f"  Observation dimension: {obs_dim}")
            
            # Try common indicator counts (from FinRL)
            from finrl.config import INDICATORS
            n_indicators = len(INDICATORS)
            
            # Formula: state_space = 1 + 2*stock_dim + n_indicators*stock_dim
            # Solving for stock_dim: obs_dim = 1 + 2*stock_dim + n_indicators*stock_dim
            #                        obs_dim - 1 = stock_dim * (2 + n_indicators)
            #                        stock_dim = (obs_dim - 1) / (2 + n_indicators)
            
            if n_indicators > 0:
                stock_dim_computed = (obs_dim - 1) / (2 + n_indicators)
                print(f"  Number of indicators: {n_indicators}")
                print(f"  Computed stock dimension: {stock_dim_computed:.2f}")
                
                if stock_dim_computed == int(stock_dim_computed):
                    stock_dim = int(stock_dim_computed)
                    print(f"  ‚úÖ Stock dimension (exact): {stock_dim}")
                    print(f"  Formula check: 1 + 2√ó{stock_dim} + {n_indicators}√ó{stock_dim} = {1 + 2*stock_dim + n_indicators*stock_dim}")
                else:
                    print(f"  ‚ö†Ô∏è  Non-integer stock dimension - formula may not match")
            
            # Compare with current environment if available
            if 'env_train' in globals():
                print(f"\nüîÑ COMPARING WITH CURRENT ENVIRONMENT:")
                try:
                    if hasattr(env_train, 'envs') and len(env_train.envs) > 0:
                        current_obs = env_train.envs[0].observation_space.shape[0]
                        current_act = env_train.envs[0].action_space.shape[0]
                    else:
                        current_obs = env_train.observation_space.shape[0]
                        current_act = env_train.action_space.shape[0]
                    
                    print(f"  Current env observation dim: {current_obs}")
                    print(f"  Current env action dim: {current_act}")
                    
                    if current_obs == obs_dim:
                        print(f"  ‚úÖ Model observation matches current environment!")
                    else:
                        print(f"  ‚ö†Ô∏è  MISMATCH: Model has {obs_dim}, current env has {current_obs}")
                        print(f"      Difference: {abs(obs_dim - current_obs)}")
                    
                    model_act_dim = model.action_space.shape[0] if hasattr(model.action_space, 'shape') else None
                    if model_act_dim and model_act_dim == current_act:
                        print(f"  ‚úÖ Model action space matches current environment!")
                    elif model_act_dim:
                        print(f"  ‚ö†Ô∏è  MISMATCH: Model has {model_act_dim}, current env has {current_act}")
                except Exception as e:
                    print(f"  ‚ö†Ô∏è  Could not compare with current environment: {e}")
        
        print("\n" + "="*70)
        print("‚úÖ MODEL ANALYSIS COMPLETE")
        print("="*70)
        
    except Exception as e:
        print(f"\n‚ùå Error loading model: {e}")
        import traceback
        traceback.print_exc()


üîç ANALYZING TRAINED PPO MODEL DIMENSIONS
‚úÖ Model file found: /Users/ayushraj/Documents/Python/FinRL/FinRL/examples/trained_models/sac_best_model.zip
   File size: 0.90 MB

üì¶ Loading model...

üìä MODEL DIMENSIONS:
  Observation space: Box(-inf, inf, (211,), float32)
  Observation dimension: 211
  Action space: Box(-1.0, 1.0, (30,), float32)
  Action dimension: 30

üèóÔ∏è  POLICY NETWORK ARCHITECTURE:

‚öôÔ∏è  MODEL HYPERPARAMETERS:
  Learning rate: 0.002800682007731022
  N steps: 1
  Batch size: 128
  Gamma (discount): 0.995

üßÆ REVERSE ENGINEERING ENVIRONMENT PARAMETERS:
  Observation dimension: 211
  Number of indicators: 8
  Computed stock dimension: 21.00
  ‚úÖ Stock dimension (exact): 21
  Formula check: 1 + 2√ó21 + 8√ó21 = 211

üîÑ COMPARING WITH CURRENT ENVIRONMENT:
  Current env observation dim: 301
  Current env action dim: 30
  ‚ö†Ô∏è  MISMATCH: Model has 211, current env has 301
      Difference: 90
  ‚úÖ Model action space matches current environment!

‚úÖ MODE