# FINM RL Trading - Main Analysis Notebook

This notebook provides comprehensive analysis of all experimental results:
- Core baselines comparison (PPO vs BuyAndHold vs MACrossover)
- Hyperparameter sensitivity analysis
- State/Environment ablation studies
- Regime-based performance analysis
- Robustness testing results
- Summary dashboard


In [None]:
# Setup: Paths and Imports
import sys
from pathlib import Path
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Any, List

# Set up paths
CURRENT_DIR = Path().resolve()
PROJECT_ROOT = CURRENT_DIR.parent
RESULTS_DIR = PROJECT_ROOT / "results"

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print(f"PROJECT_ROOT = {PROJECT_ROOT}")
print(f"RESULTS_DIR = {RESULTS_DIR}")
print(f"Results available: {list(RESULTS_DIR.iterdir()) if RESULTS_DIR.exists() else 'Not found'}")


In [None]:
# Configure plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10


In [None]:
# Export utilities
def export_table_csv(df: pd.DataFrame, path: Path) -> None:
    """Export DataFrame to CSV."""
    df.to_csv(path, index=False)
    print(f"Exported table to {path}")

def export_table_latex(df: pd.DataFrame, path: Path, **kwargs) -> None:
    """Export DataFrame to LaTeX format."""
    latex_str = df.to_latex(index=False, **kwargs)
    with open(path, 'w') as f:
        f.write(latex_str)
    print(f"Exported LaTeX table to {path}")

def export_table_html(df: pd.DataFrame, path: Path, **kwargs) -> None:
    """Export DataFrame to HTML format."""
    html_str = df.to_html(index=False, **kwargs)
    with open(path, 'w') as f:
        f.write(html_str)
    print(f"Exported HTML table to {path}")

# Create exports directory
EXPORTS_DIR = PROJECT_ROOT / "notebooks" / "exports"
EXPORTS_DIR.mkdir(exist_ok=True)
print(f"Export directory: {EXPORTS_DIR}")


## 1. Core Baselines Comparison

Compare PPO performance against simple baselines: BuyAndHold and MACrossover.


# Load core baselines metrics
core_baselines_path = RESULTS_DIR / "core_baselines" / "core_metrics.json"
with open(core_baselines_path, 'r') as f:
    core_metrics = json.load(f)

# Convert to DataFrame for easier analysis
baselines_data = []
for strategy, metrics in core_metrics.items():
    row = {"strategy": strategy}
    row.update(metrics)
    baselines_data.append(row)

baselines_df = pd.DataFrame(baselines_data)
print("Core Baselines Comparison:")
print(baselines_df.to_string(index=False))


# Visualize baselines comparison
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

metrics_to_plot = ['total_return', 'annualized_return', 'sharpe', 'max_drawdown']
for idx, metric in enumerate(metrics_to_plot):
    ax = axes[idx // 2, idx % 2]
    bars = ax.bar(baselines_df['strategy'], baselines_df[metric])
    ax.set_title(f'{metric.replace("_", " ").title()}')
    ax.set_ylabel(metric.replace("_", " ").title())
    ax.tick_params(axis='x', rotation=45)
    
    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}',
                ha='center', va='bottom')

plt.tight_layout()
plt.savefig(EXPORTS_DIR / "baselines_comparison.png", dpi=150, bbox_inches='tight')
plt.show()
print(f"Saved baselines comparison plot to {EXPORTS_DIR / 'baselines_comparison.png'}")


# Export baselines comparison table
export_table_csv(baselines_df, EXPORTS_DIR / "baselines_comparison.csv")
export_table_latex(baselines_df, EXPORTS_DIR / "baselines_comparison.tex", float_format="%.4f")
export_table_html(baselines_df, EXPORTS_DIR / "baselines_comparison.html", classes='table table-striped')


## 5. Summary Dashboard


In [None]:
# Load hyperparameter results
hyperparams_dir = RESULTS_DIR / "ppo_hyperparams"
hyperparams_summary_path = hyperparams_dir / "summary_test_metrics.csv"

if hyperparams_summary_path.exists():
    hyperparams_df = pd.read_csv(hyperparams_summary_path)
    print("Hyperparameter Summary:")
    print(hyperparams_df.to_string(index=False))
    
    # Load individual run JSONs to extract config details
    run_configs = []
    for json_file in hyperparams_dir.glob("*.json"):
        if json_file.name != "summary_test_metrics.json":
            with open(json_file, 'r') as f:
                run_data = json.load(f)
                if 'config' in run_data:
                    config = run_data['config']
                    name = run_data.get('name', json_file.stem)
                    metrics = run_data.get('test_metrics', {})
                    run_configs.append({
                        'name': name,
                        'clip_epsilon': config.get('clip_epsilon', None),
                        'entropy_coef': config.get('entropy_coef', None),
                        'gamma': config.get('gamma', None),
                        'gae_lambda': config.get('gae_lambda', None),
                        **metrics
                    })
    
    if run_configs:
        hyperparams_detailed_df = pd.DataFrame(run_configs)
        print("\nDetailed Hyperparameter Configurations:")
        print(hyperparams_detailed_df.to_string(index=False))
else:
    print("Hyperparameter results not found")
    hyperparams_df = None
    hyperparams_detailed_df = None


In [None]:
# Create hyperparameter sensitivity plots
if hyperparams_df is not None and hyperparams_detailed_df is not None:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Plot 1: Sharpe vs Clip Epsilon
    if 'clip_epsilon' in hyperparams_detailed_df.columns:
        ax = axes[0, 0]
        clip_data = hyperparams_detailed_df[hyperparams_detailed_df['clip_epsilon'].notna()]
        if not clip_data.empty:
            ax.scatter(clip_data['clip_epsilon'], clip_data['sharpe'], s=100, alpha=0.7)
            ax.set_xlabel('Clip Epsilon')
            ax.set_ylabel('Sharpe Ratio')
            ax.set_title('Sharpe Ratio vs Clip Epsilon')
            ax.grid(True, alpha=0.3)
    
    # Plot 2: Sharpe vs Entropy Coefficient
    if 'entropy_coef' in hyperparams_detailed_df.columns:
        ax = axes[0, 1]
        ent_data = hyperparams_detailed_df[hyperparams_detailed_df['entropy_coef'].notna()]
        if not ent_data.empty:
            ax.scatter(ent_data['entropy_coef'], ent_data['sharpe'], s=100, alpha=0.7)
            ax.set_xlabel('Entropy Coefficient')
            ax.set_ylabel('Sharpe Ratio')
            ax.set_title('Sharpe Ratio vs Entropy Coefficient')
            ax.grid(True, alpha=0.3)
    
    # Plot 3: Total Return by Configuration
    ax = axes[1, 0]
    hyperparams_df_sorted = hyperparams_df.sort_values('total_return', ascending=False)
    bars = ax.barh(hyperparams_df_sorted['name'], hyperparams_df_sorted['total_return'])
    ax.set_xlabel('Total Return')
    ax.set_title('Total Return by Hyperparameter Configuration')
    ax.grid(True, alpha=0.3, axis='x')
    
    # Plot 4: Sharpe by Configuration
    ax = axes[1, 1]
    hyperparams_df_sorted_sharpe = hyperparams_df.sort_values('sharpe', ascending=False)
    bars = ax.barh(hyperparams_df_sorted_sharpe['name'], hyperparams_df_sorted_sharpe['sharpe'])
    ax.set_xlabel('Sharpe Ratio')
    ax.set_title('Sharpe Ratio by Hyperparameter Configuration')
    ax.grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.savefig(EXPORTS_DIR / "hyperparams_sensitivity.png", dpi=150, bbox_inches='tight')
    plt.show()
    print(f"Saved hyperparameter sensitivity plot to {EXPORTS_DIR / 'hyperparams_sensitivity.png'}")
    
    # Highlight best/worst
    print("\nBest Configuration (by Sharpe):")
    best_sharpe = hyperparams_df.loc[hyperparams_df['sharpe'].idxmax()]
    print(best_sharpe)
    
    print("\nWorst Configuration (by Sharpe):")
    worst_sharpe = hyperparams_df.loc[hyperparams_df['sharpe'].idxmin()]
    print(worst_sharpe)


In [None]:
# Export hyperparameter tables
if hyperparams_df is not None:
    export_table_csv(hyperparams_df, EXPORTS_DIR / "hyperparams_summary.csv")
    export_table_latex(hyperparams_df, EXPORTS_DIR / "hyperparams_summary.tex", float_format="%.4f")
    
if hyperparams_detailed_df is not None:
    export_table_csv(hyperparams_detailed_df, EXPORTS_DIR / "hyperparams_detailed.csv")


## 3. State/Environment Ablation Analysis

Compare performance across different state representations and environment configurations.


In [None]:
# Load state/env ablation results
states_envs_dir = RESULTS_DIR / "states_envs"
states_envs_data = []

for json_file in states_envs_dir.glob("*_metrics.json"):
    with open(json_file, 'r') as f:
        metrics = json.load(f)
        name = json_file.stem.replace("_metrics", "")
        row = {"config": name}
        # Remove shaping_config if present
        if 'shaping_config' in metrics:
            del metrics['shaping_config']
        row.update(metrics)
        states_envs_data.append(row)

if states_envs_data:
    states_envs_df = pd.DataFrame(states_envs_data)
    print("State/Environment Ablation Results:")
    print(states_envs_df.to_string(index=False))
    
    # Visualize comparison
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    metrics_to_plot = ['total_return', 'annualized_return', 'sharpe', 'max_drawdown']
    
    for idx, metric in enumerate(metrics_to_plot):
        ax = axes[idx // 2, idx % 2]
        bars = ax.bar(states_envs_df['config'], states_envs_df[metric])
        ax.set_title(f'{metric.replace("_", " ").title()}')
        ax.set_ylabel(metric.replace("_", " ").title())
        ax.tick_params(axis='x', rotation=45)
        
        # Add value labels
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                    f'{height:.3f}',
                    ha='center', va='bottom')
    
    plt.tight_layout()
    plt.savefig(EXPORTS_DIR / "states_envs_comparison.png", dpi=150, bbox_inches='tight')
    plt.show()
    print(f"Saved state/env comparison plot to {EXPORTS_DIR / 'states_envs_comparison.png'}")
    
    # Export table
    export_table_csv(states_envs_df, EXPORTS_DIR / "states_envs_comparison.csv")
    export_table_latex(states_envs_df, EXPORTS_DIR / "states_envs_comparison.tex", float_format="%.4f")
else:
    print("State/Environment results not found")
    states_envs_df = None


In [None]:
# Load robustness results
robustness_dir = RESULTS_DIR / "ppo_seed_pretrain_compare"
robustness_summary_path = robustness_dir / "summary_test_metrics.csv"
robustness_group_stats_path = robustness_dir / "summary_group_stats.csv"

if robustness_summary_path.exists():
    from eval.summarize import summarize_runs
    
    # Load individual run JSONs
    run_payloads = []
    for json_file in robustness_dir.glob("*.json"):
        if json_file.name not in ["summary_test_metrics.json"]:
            with open(json_file, 'r') as f:
                run_payloads.append(json.load(f))
    
    if run_payloads:
        robustness_df = summarize_runs(run_payloads, metric_key="test_metrics")
        print("Robustness Summary (per run):")
        print(robustness_df.to_string(index=False))
        
        # Load group statistics
        if robustness_group_stats_path.exists():
            group_stats_df = pd.read_csv(robustness_group_stats_path)
            print("\nGroup Statistics (mean/std by pretrained vs non-pretrained):")
            print(group_stats_df.to_string(index=False))
            
            # Visualize group statistics
            fig, axes = plt.subplots(2, 2, figsize=(14, 10))
            metrics = ['total_return', 'annualized_return', 'sharpe', 'max_drawdown']
            
            for idx, metric in enumerate(metrics):
                ax = axes[idx // 2, idx % 2]
                metric_data = group_stats_df[group_stats_df['metric'] == metric]
                
                groups = metric_data['group'].unique()
                means = [metric_data[metric_data['group'] == g]['mean'].values[0] for g in groups]
                stds = [metric_data[metric_data['group'] == g]['std'].values[0] for g in groups]
                
                x_pos = np.arange(len(groups))
                bars = ax.bar(x_pos, means, yerr=stds, capsize=5, alpha=0.7)
                ax.set_xticks(x_pos)
                ax.set_xticklabels(groups)
                ax.set_ylabel(metric.replace("_", " ").title())
                ax.set_title(f'{metric.replace("_", " ").title()} by Group (mean Â± std)')
                ax.grid(True, alpha=0.3, axis='y')
            
            plt.tight_layout()
            plt.savefig(EXPORTS_DIR / "robustness_comparison.png", dpi=150, bbox_inches='tight')
            plt.show()
            print(f"Saved robustness comparison plot to {EXPORTS_DIR / 'robustness_comparison.png'}")
            
            # Export tables
            export_table_csv(group_stats_df, EXPORTS_DIR / "robustness_group_stats.csv")
            export_table_latex(group_stats_df, EXPORTS_DIR / "robustness_group_stats.tex", float_format="%.4f")
    else:
        print("No robustness run data found")
        robustness_df = None
        group_stats_df = None
else:
    print("Robustness results not found")
    robustness_df = None
    group_stats_df = None


In [None]:
# Create master comparison table
master_data = []

# Add baselines
if 'baselines_df' in locals() and baselines_df is not None:
    for _, row in baselines_df.iterrows():
        master_data.append({
            'experiment_type': 'baseline',
            'name': row['strategy'],
            'total_return': row['total_return'],
            'annualized_return': row['annualized_return'],
            'sharpe': row['sharpe'],
            'max_drawdown': row['max_drawdown']
        })

# Add best hyperparameter config
if 'hyperparams_df' in locals() and hyperparams_df is not None:
    best_hyperparam = hyperparams_df.loc[hyperparams_df['sharpe'].idxmax()]
    master_data.append({
        'experiment_type': 'hyperparams',
        'name': f"Best HP: {best_hyperparam['name']}",
        'total_return': best_hyperparam['total_return'],
        'annualized_return': best_hyperparam['annualized_return'],
        'sharpe': best_hyperparam['sharpe'],
        'max_drawdown': best_hyperparam['max_drawdown']
    })

# Add best state/env config
if 'states_envs_df' in locals() and states_envs_df is not None:
    best_state_env = states_envs_df.loc[states_envs_df['sharpe'].idxmax()]
    master_data.append({
        'experiment_type': 'state_env',
        'name': f"Best State/Env: {best_state_env['config']}",
        'total_return': best_state_env['total_return'],
        'annualized_return': best_state_env['annualized_return'],
        'sharpe': best_state_env['sharpe'],
        'max_drawdown': best_state_env['max_drawdown']
    })

if master_data:
    master_df = pd.DataFrame(master_data)
    master_df = master_df.sort_values('sharpe', ascending=False)
    
    print("Master Comparison Table (sorted by Sharpe Ratio):")
    print(master_df.to_string(index=False))
    
    # Visualize master comparison
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    metrics_to_plot = ['total_return', 'annualized_return', 'sharpe', 'max_drawdown']
    
    for idx, metric in enumerate(metrics_to_plot):
        ax = axes[idx // 2, idx % 2]
        master_df_sorted = master_df.sort_values(metric, ascending=False)
        bars = ax.barh(master_df_sorted['name'], master_df_sorted[metric])
        ax.set_xlabel(metric.replace("_", " ").title())
        ax.set_title(f'{metric.replace("_", " ").title()} Comparison')
        ax.grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.savefig(EXPORTS_DIR / "master_comparison.png", dpi=150, bbox_inches='tight')
    plt.show()
    print(f"Saved master comparison plot to {EXPORTS_DIR / 'master_comparison.png'}")
    
    # Export master table
    export_table_csv(master_df, EXPORTS_DIR / "master_comparison.csv")
    export_table_latex(master_df, EXPORTS_DIR / "master_comparison.tex", float_format="%.4f")
    export_table_html(master_df, EXPORTS_DIR / "master_comparison.html", classes='table table-striped')
    
    # Best of summary
    print("\n=== Best Performers ===")
    print(f"Best Sharpe Ratio: {master_df.loc[master_df['sharpe'].idxmax(), 'name']} ({master_df['sharpe'].max():.4f})")
    print(f"Best Total Return: {master_df.loc[master_df['total_return'].idxmax(), 'name']} ({master_df['total_return'].max():.4f})")
    print(f"Lowest Max Drawdown: {master_df.loc[master_df['max_drawdown'].idxmin(), 'name']} ({master_df['max_drawdown'].min():.4f})")
else:
    print("No data available for master comparison")
