# TFT Hyperparameter Tuning Results Analysis

This notebook provides a comprehensive analysis of all TFT (Temporal Fusion Transformer) experiments run using Optuna for Henry Hub natural gas price prediction.

## Contents
1. **Experiment Overview** ‚Äî Summary statistics for each experiment
2. **Best Model Comparison** ‚Äî Metrics comparison across experiments
3. **Pruning Analysis** ‚Äî Pruning rates and efficiency per experiment
4. **Hyperparameter Analysis** ‚Äî Distributions and impact of parameters
5. **Cross-Experiment Parameter Comparison** ‚Äî What works best overall
6. **Visualizations** ‚Äî Charts and graphs for all comparisons

### Experiments Analysed:
- **Price Only** ‚Äî Univariate model using only historical prices
- **Price + Storage** ‚Äî Prices with gas storage levels
- **Price + Production** ‚Äî Prices with US dry gas production
- **Price + USD** ‚Äî Prices with USD index
- **Price + Weather** ‚Äî Prices with HDD/CDD weather data
- **All Features** ‚Äî All features combined

In [None]:
# === Cell 1: Setup and Imports ===
import os
import json
import ast
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Plotting defaults
plt.rcParams["figure.figsize"] = (12, 6)
plt.rcParams["axes.grid"] = True
plt.rcParams["font.size"] = 10
sns.set_style("whitegrid")

# Color palette for experiments
EXPERIMENT_COLORS = {
    "Price Only": "#1f77b4",
    "Price + Storage": "#ff7f0e", 
    "Price + Production": "#2ca02c",
    "Price + USD": "#d62728",
    "Price + Weather": "#9467bd",
    "All Features": "#8c564b"
}

print("‚úÖ Libraries loaded successfully")

## 1. Load All Experiment Data

Define paths to each experiment's trial summary CSV and load the data.

In [None]:
# === Cell 2: Define Experiment Paths and Load Data ===

SAVED_RESULTS_DIR = Path("saved_results")

# Auto-discover TFT experiment folders
# Pattern: <timestamp>_<experimentname>_tft
def discover_tft_experiments(base_dir: Path) -> Dict[str, str]:
    """Automatically discover TFT experiment folders."""
    experiments = {}
    if not base_dir.exists():
        print(f"‚ö†Ô∏è Directory not found: {base_dir}")
        return experiments
    
    # Map folder patterns to experiment names
    name_mapping = {
        "PriceOnly_tft": "Price Only",
        "Price+Storage_tft": "Price + Storage",
        "Price+Production_tft": "Price + Production",
        "Price+USD_tft": "Price + USD",
        "Price+Weather_tft": "Price + Weather",
        "AllFeatures_tft": "All Features",
    }
    
    for folder in sorted(base_dir.iterdir()):
        if folder.is_dir() and "_tft" in folder.name:
            # Try to match with known experiment names
            for pattern, name in name_mapping.items():
                if pattern in folder.name:
                    # If multiple runs exist, keep the latest (highest timestamp)
                    if name not in experiments or folder.name > experiments[name]:
                        experiments[name] = folder.name
                    break
    
    return experiments

# Discover experiments
EXPERIMENTS = discover_tft_experiments(SAVED_RESULTS_DIR)
print(f"Discovered TFT experiments: {EXPERIMENTS}")

# If no experiments found, you can manually specify:
if not EXPERIMENTS:
    print("\n‚ö†Ô∏è No TFT experiments discovered. Please specify manually:")
    print("""EXPERIMENTS = {
    "Price Only": "YYYYMMDD-HHMMSS_PriceOnly_tft",
    "Price + Storage": "YYYYMMDD-HHMMSS_Price+Storage_tft",
    ....
}""")

def load_trial_summary(experiment_dir: Path) -> pd.DataFrame:
    """Load trial_summary.csv and parse the params JSON column."""
    csv_path = experiment_dir / "trial_summary.csv"
    if not csv_path.exists():
        print(f"‚ö†Ô∏è Not found: {csv_path}")
        return None
    
    df = pd.read_csv(csv_path)
    
    # Parse params column from JSON string to dict
    if 'params' in df.columns:
        df['params_dict'] = df['params'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else {})
        
        # Extract individual TFT hyperparameters
        df['lookback'] = df['params_dict'].apply(lambda x: x.get('max_encoder_length'))
        df['batch_size'] = df['params_dict'].apply(lambda x: x.get('batch_size'))
        df['hidden_size'] = df['params_dict'].apply(lambda x: x.get('hidden_size'))
        df['attention_head_size'] = df['params_dict'].apply(lambda x: x.get('attention_head_size'))
        df['hidden_continuous_size'] = df['params_dict'].apply(lambda x: x.get('hidden_continuous_size'))
        df['dropout'] = df['params_dict'].apply(lambda x: x.get('dropout'))
        df['lstm_layers'] = df['params_dict'].apply(lambda x: x.get('lstm_layers'))
        df['learning_rate'] = df['params_dict'].apply(lambda x: x.get('learning_rate'))
        df['gradient_clip_val'] = df['params_dict'].apply(lambda x: x.get('gradient_clip_val'))
        df['weight_decay'] = df['params_dict'].apply(lambda x: x.get('weight_decay'))
    
    return df

# Load all experiments
experiment_data = {}
for name, folder in EXPERIMENTS.items():
    exp_path = SAVED_RESULTS_DIR / folder
    df = load_trial_summary(exp_path)
    if df is not None:
        experiment_data[name] = df
        print(f"‚úÖ Loaded {name}: {len(df)} trials")

print(f"\nüìä Total experiments loaded: {len(experiment_data)}")

## 2. Experiment Overview & Summary Statistics

Summary of each experiment including total trials, completion rates, and basic statistics.

In [None]:
# === Cell 3: Generate Experiment Summary Statistics ===

def get_experiment_summary(df: pd.DataFrame, name: str) -> dict:
    """Extract summary statistics for an experiment."""
    completed = df[df['state'] == 'COMPLETE']
    pruned = df[df['state'] == 'PRUNED']
    
    summary = {
        'Experiment': name,
        'Total Trials': len(df),
        'Completed': len(completed),
        'Pruned': len(pruned),
        'Completion Rate': f"{len(completed)/len(df)*100:.1f}%" if len(df) > 0 else "N/A",
        'Pruning Rate': f"{len(pruned)/len(df)*100:.1f}%" if len(df) > 0 else "N/A",
        'Best Val Loss': f"{completed['value'].min():.6f}" if len(completed) > 0 else "N/A",
        'Mean Val Loss': f"{completed['value'].mean():.6f}" if len(completed) > 0 else "N/A",
        'Std Val Loss': f"{completed['value'].std():.6f}" if len(completed) > 0 else "N/A"
    }
    
    # Add test metrics if available
    if 'test_mae' in completed.columns:
        best_idx = completed['value'].idxmin()
        summary['Best Test MAE'] = f"{completed.loc[best_idx, 'test_mae']:.4f}" if pd.notna(completed.loc[best_idx, 'test_mae']) else "N/A"
        summary['Best Test RMSE'] = f"{completed.loc[best_idx, 'test_rmse']:.4f}" if pd.notna(completed.loc[best_idx, 'test_rmse']) else "N/A"
        summary['Best Test MAPE'] = f"{completed.loc[best_idx, 'test_mape']:.2f}%" if pd.notna(completed.loc[best_idx, 'test_mape']) else "N/A"
        summary['Best Dir Acc'] = f"{completed.loc[best_idx, 'test_directional_accuracy']:.2f}%" if pd.notna(completed.loc[best_idx, 'test_directional_accuracy']) else "N/A"
    
    return summary

# Generate summary table
summaries = []
for name, df in experiment_data.items():
    summaries.append(get_experiment_summary(df, name))

summary_df = pd.DataFrame(summaries)
print("\nüìä EXPERIMENT OVERVIEW")
print("=" * 80)
display(summary_df)

## 3. Pruning Analysis

Analyze pruning rates and efficiency across experiments.

In [None]:
# === Cell 4: Pruning Analysis ===

def analyze_pruning(experiment_data: Dict[str, pd.DataFrame]):
    """Create pruning analysis visualization."""
    pruning_stats = []
    
    for name, df in experiment_data.items():
        total = len(df)
        pruned = len(df[df['state'] == 'PRUNED'])
        completed = len(df[df['state'] == 'COMPLETE'])
        
        pruning_stats.append({
            'Experiment': name,
            'Total': total,
            'Completed': completed,
            'Pruned': pruned,
            'Pruning Rate (%)': pruned / total * 100 if total > 0 else 0
        })
    
    pruning_df = pd.DataFrame(pruning_stats)
    
    # Create visualization
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Stacked bar chart
    x = range(len(pruning_df))
    width = 0.6
    
    axes[0].bar(x, pruning_df['Completed'], width, label='Completed', color='#2ecc71')
    axes[0].bar(x, pruning_df['Pruned'], width, bottom=pruning_df['Completed'], label='Pruned', color='#e74c3c')
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(pruning_df['Experiment'], rotation=45, ha='right')
    axes[0].set_ylabel('Number of Trials')
    axes[0].set_title('Trial Outcomes by Experiment')
    axes[0].legend()
    
    # Pruning rate comparison
    colors = [EXPERIMENT_COLORS.get(name, '#333333') for name in pruning_df['Experiment']]
    axes[1].barh(pruning_df['Experiment'], pruning_df['Pruning Rate (%)'], color=colors)
    axes[1].set_xlabel('Pruning Rate (%)')
    axes[1].set_title('Pruning Rate by Experiment')
    axes[1].set_xlim(0, 100)
    
    for i, v in enumerate(pruning_df['Pruning Rate (%)']):
        axes[1].text(v + 1, i, f'{v:.1f}%', va='center')
    
    plt.tight_layout()
    plt.show()
    
    return pruning_df

pruning_df = analyze_pruning(experiment_data)
print("\nüìä Pruning Statistics:")
display(pruning_df)

## 4. Best Model Comparison

Compare the best models from each experiment using key metrics.

In [None]:
# === Cell 5: Best Model Comparison with Zoomed Charts ===

def get_best_models(experiment_data: Dict[str, pd.DataFrame]) -> pd.DataFrame:
    """Extract best model from each experiment."""
    best_models = []
    
    for name, df in experiment_data.items():
        completed = df[df['state'] == 'COMPLETE']
        if len(completed) == 0:
            continue
            
        best_idx = completed['value'].idxmin()
        best = completed.loc[best_idx]
        
        model_info = {
            'Experiment': name,
            'Trial': best.get('trial_number', 'N/A'),
            'Val Loss': best['value'],
            'Test MAE': best.get('test_mae'),
            'Test RMSE': best.get('test_rmse'),
            'Test MAPE': best.get('test_mape'),
            'Dir Accuracy': best.get('test_directional_accuracy'),
            'Lookback': best.get('lookback'),
            'Batch Size': best.get('batch_size'),
            'Hidden Size': best.get('hidden_size'),
            'Attention Heads': best.get('attention_head_size'),
            'LSTM Layers': best.get('lstm_layers'),
            'Dropout': best.get('dropout'),
            'Learning Rate': best.get('learning_rate')
        }
        best_models.append(model_info)
    
    return pd.DataFrame(best_models)

best_models_df = get_best_models(experiment_data)
print("\nüèÜ BEST MODELS BY EXPERIMENT")
print("=" * 80)
display(best_models_df)

# Visualization with zoomed axes
if len(best_models_df) > 0:
    metrics = ['Test MAE', 'Test RMSE', 'Test MAPE', 'Dir Accuracy']
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    axes = axes.flatten()
    
    for i, metric in enumerate(metrics):
        if metric in best_models_df.columns:
            data = best_models_df[['Experiment', metric]].dropna()
            if len(data) > 0:
                colors = [EXPERIMENT_COLORS.get(exp, '#333333') for exp in data['Experiment']]
                bars = axes[i].barh(data['Experiment'], data[metric], color=colors)
                axes[i].set_xlabel(metric)
                axes[i].set_title(f'{metric} by Experiment')
                
                # Zoom to data range for better visibility
                data_min = data[metric].min()
                data_max = data[metric].max()
                data_range = data_max - data_min
                
                if data_range > 0 and data_min > 0:
                    # Zoom: start from slightly below min
                    zoom_min = max(0, data_min - data_range * 0.5)
                    zoom_max = data_max + data_range * 0.2
                    axes[i].set_xlim(zoom_min, zoom_max)
                    axes[i].annotate('‚ö†Ô∏è Axis zoomed to show differences', 
                                     xy=(0.02, 0.98), xycoords='axes fraction',
                                     fontsize=8, va='top', alpha=0.7)
                
                for bar, val in zip(bars, data[metric]):
                    axes[i].text(bar.get_width() + (data_range * 0.02 if data_range > 0 else 0.01), 
                                bar.get_y() + bar.get_height()/2, 
                                f'{val:.4f}' if metric != 'Dir Accuracy' else f'{val:.2f}%',
                                va='center', fontsize=9)
    
    plt.tight_layout()
    plt.savefig('saved_results/tft_best_model_comparison.png', dpi=150, bbox_inches='tight')
    plt.show()

## 5. Relative Performance Comparison

Show percentage differences from the best model for each metric.

In [None]:
# === Cell 6: Relative Performance Charts ===

def plot_relative_performance(best_models_df: pd.DataFrame):
    """Show relative performance as % worse than best for each metric."""
    if len(best_models_df) == 0:
        print("No data to plot")
        return
    
    metrics = ['Test MAE', 'Test RMSE', 'Test MAPE']
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    for i, metric in enumerate(metrics):
        if metric not in best_models_df.columns:
            continue
            
        data = best_models_df[['Experiment', metric]].dropna()
        if len(data) == 0:
            continue
        
        # Calculate % worse than best (lower is better for these metrics)
        best_val = data[metric].min()
        data['Pct Worse'] = ((data[metric] - best_val) / best_val * 100).round(2)
        
        colors = [EXPERIMENT_COLORS.get(exp, '#333333') for exp in data['Experiment']]
        bars = axes[i].barh(data['Experiment'], data['Pct Worse'], color=colors)
        axes[i].set_xlabel('% Worse Than Best')
        axes[i].set_title(f'{metric} - Relative Performance')
        axes[i].axvline(x=0, color='green', linestyle='--', linewidth=2, label='Best')
        
        for bar, pct in zip(bars, data['Pct Worse']):
            label = 'BEST' if pct == 0 else f'+{pct:.1f}%'
            color = 'green' if pct == 0 else 'black'
            axes[i].text(bar.get_width() + 0.5, bar.get_y() + bar.get_height()/2, 
                        label, va='center', fontsize=9, color=color, fontweight='bold' if pct == 0 else 'normal')
    
    plt.tight_layout()
    plt.savefig('saved_results/tft_relative_performance.png', dpi=150, bbox_inches='tight')
    plt.show()

plot_relative_performance(best_models_df)

## 6. Hyperparameter Analysis

Analyze the distribution and impact of different hyperparameters across experiments.

In [None]:
# === Cell 7: Hyperparameter Distributions ===

def plot_hyperparameter_distributions(experiment_data: Dict[str, pd.DataFrame]):
    """Plot distributions of key TFT hyperparameters."""
    # Combine all completed trials
    all_completed = []
    for name, df in experiment_data.items():
        completed = df[df['state'] == 'COMPLETE'].copy()
        completed['Experiment'] = name
        all_completed.append(completed)
    
    if not all_completed:
        print("No completed trials to analyze")
        return
    
    combined_df = pd.concat(all_completed, ignore_index=True)
    
    # TFT-specific hyperparameters to analyze
    params_to_plot = ['lookback', 'hidden_size', 'lstm_layers', 'dropout', 'learning_rate', 'batch_size']
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()
    
    for i, param in enumerate(params_to_plot):
        if param in combined_df.columns:
            data = combined_df[[param, 'Experiment']].dropna()
            if len(data) > 0:
                for exp_name in data['Experiment'].unique():
                    exp_data = data[data['Experiment'] == exp_name][param]
                    if len(exp_data) > 0:
                        axes[i].hist(exp_data, alpha=0.5, label=exp_name, 
                                    color=EXPERIMENT_COLORS.get(exp_name, '#333333'),
                                    bins=min(20, len(exp_data.unique())))
                
                axes[i].set_xlabel(param)
                axes[i].set_ylabel('Count')
                axes[i].set_title(f'{param} Distribution')
                axes[i].legend(fontsize=8)
    
    plt.tight_layout()
    plt.savefig('saved_results/tft_hyperparameter_distributions.png', dpi=150, bbox_inches='tight')
    plt.show()

plot_hyperparameter_distributions(experiment_data)

## 7. Hyperparameter Impact Analysis

Analyze how different hyperparameter values correlate with model performance.

In [None]:
# === Cell 8: Hyperparameter Impact on Val Loss ===

def analyze_param_impact(experiment_data: Dict[str, pd.DataFrame]):
    """Analyze impact of hyperparameters on validation loss."""
    # Combine all completed trials
    all_completed = []
    for name, df in experiment_data.items():
        completed = df[df['state'] == 'COMPLETE'].copy()
        completed['Experiment'] = name
        all_completed.append(completed)
    
    if not all_completed:
        print("No completed trials to analyze")
        return
    
    combined_df = pd.concat(all_completed, ignore_index=True)
    
    # Key TFT parameters to analyze
    categorical_params = ['lookback', 'hidden_size', 'lstm_layers', 'batch_size']
    continuous_params = ['dropout', 'learning_rate']
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()
    
    plot_idx = 0
    
    # Box plots for categorical params
    for param in categorical_params:
        if param in combined_df.columns and plot_idx < len(axes):
            data = combined_df[[param, 'value']].dropna()
            if len(data) > 0:
                # Group by param value and calculate mean val_loss
                grouped = data.groupby(param)['value'].agg(['mean', 'std', 'count']).reset_index()
                grouped = grouped.sort_values(param)
                
                axes[plot_idx].bar(grouped[param].astype(str), grouped['mean'], 
                                  yerr=grouped['std'], capsize=5, color='steelblue', alpha=0.7)
                axes[plot_idx].set_xlabel(param)
                axes[plot_idx].set_ylabel('Mean Val Loss')
                axes[plot_idx].set_title(f'Val Loss by {param}')
                axes[plot_idx].tick_params(axis='x', rotation=45)
                plot_idx += 1
    
    # Scatter plots for continuous params
    for param in continuous_params:
        if param in combined_df.columns and plot_idx < len(axes):
            data = combined_df[[param, 'value']].dropna()
            if len(data) > 0:
                axes[plot_idx].scatter(data[param], data['value'], alpha=0.3, s=10)
                axes[plot_idx].set_xlabel(param)
                axes[plot_idx].set_ylabel('Val Loss')
                axes[plot_idx].set_title(f'Val Loss vs {param}')
                
                # Add trend line
                try:
                    z = np.polyfit(data[param], data['value'], 1)
                    p = np.poly1d(z)
                    x_line = np.linspace(data[param].min(), data[param].max(), 100)
                    axes[plot_idx].plot(x_line, p(x_line), 'r--', alpha=0.8, label='Trend')
                    axes[plot_idx].legend()
                except:
                    pass
                plot_idx += 1
    
    plt.tight_layout()
    plt.savefig('saved_results/tft_param_impact.png', dpi=150, bbox_inches='tight')
    plt.show()

analyze_param_impact(experiment_data)

## 8. Cross-Experiment Optimal Parameters

Find the optimal hyperparameters that work best across all experiments.

In [None]:
# === Cell 9: Cross-Experiment Optimal Parameters ===

def find_optimal_params(experiment_data: Dict[str, pd.DataFrame]):
    """Find optimal hyperparameters across all experiments."""
    # Combine top N trials from each experiment
    top_n = 10
    all_top = []
    
    for name, df in experiment_data.items():
        completed = df[df['state'] == 'COMPLETE'].copy()
        if len(completed) > 0:
            top_trials = completed.nsmallest(min(top_n, len(completed)), 'value')
            top_trials['Experiment'] = name
            all_top.append(top_trials)
    
    if not all_top:
        print("No completed trials to analyze")
        return
    
    top_df = pd.concat(all_top, ignore_index=True)
    
    # Analyze most common parameter values in top models
    params = ['lookback', 'hidden_size', 'lstm_layers', 'batch_size', 'attention_head_size']
    
    print("\nüéØ OPTIMAL PARAMETER VALUES (from top performers)")
    print("=" * 60)
    
    optimal_params = {}
    for param in params:
        if param in top_df.columns:
            data = top_df[param].dropna()
            if len(data) > 0:
                most_common = data.mode().iloc[0] if len(data.mode()) > 0 else data.iloc[0]
                mean_val = data.mean()
                optimal_params[param] = most_common
                print(f"{param}: Most common = {most_common}, Mean = {mean_val:.2f}")
    
    # Visualize
    fig, axes = plt.subplots(1, len(params), figsize=(4*len(params), 4))
    if len(params) == 1:
        axes = [axes]
    
    for i, param in enumerate(params):
        if param in top_df.columns:
            data = top_df[param].dropna()
            if len(data) > 0:
                value_counts = data.value_counts().sort_index()
                axes[i].bar(value_counts.index.astype(str), value_counts.values, color='steelblue')
                axes[i].set_xlabel(param)
                axes[i].set_ylabel('Count in Top Models')
                axes[i].set_title(f'{param} in Top {top_n} Models')
                axes[i].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig('saved_results/tft_optimal_params.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    return optimal_params

optimal_params = find_optimal_params(experiment_data)

## 9. Convergence Analysis

Analyze how quickly each experiment converged to good solutions.

In [None]:
# === Cell 10: Convergence Analysis ===

def plot_convergence(experiment_data: Dict[str, pd.DataFrame]):
    """Plot convergence curves for each experiment."""
    fig, ax = plt.subplots(figsize=(12, 6))
    
    for name, df in experiment_data.items():
        completed = df[df['state'] == 'COMPLETE'].copy()
        if len(completed) == 0:
            continue
        
        # Sort by trial number and compute cumulative best
        completed = completed.sort_values('trial_number')
        completed['cummin_val_loss'] = completed['value'].cummin()
        
        ax.plot(completed['trial_number'], completed['cummin_val_loss'], 
               label=name, color=EXPERIMENT_COLORS.get(name, '#333333'), linewidth=2)
    
    ax.set_xlabel('Trial Number')
    ax.set_ylabel('Best Val Loss So Far')
    ax.set_title('Convergence Analysis - Best Val Loss Over Trials')
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('saved_results/tft_convergence.png', dpi=150, bbox_inches='tight')
    plt.show()

plot_convergence(experiment_data)

## 10. Final Summary & Recommendations

Summary of findings and recommendations for model selection.

In [None]:
# === Cell 11: Final Summary ===

def generate_final_summary(best_models_df: pd.DataFrame, experiment_data: Dict[str, pd.DataFrame]):
    """Generate final summary and recommendations."""
    if len(best_models_df) == 0:
        print("No data available for summary")
        return
    
    print("\n" + "=" * 80)
    print("üìä FINAL SUMMARY & RECOMMENDATIONS")
    print("=" * 80)
    
    # Best overall model (by val loss)
    best_val = best_models_df.loc[best_models_df['Val Loss'].idxmin()]
    print(f"\nüèÜ Best by Validation Loss: {best_val['Experiment']}")
    print(f"   Val Loss: {best_val['Val Loss']:.6f}")
    if pd.notna(best_val.get('Test MAE')):
        print(f"   Test MAE: {best_val['Test MAE']:.4f}")
    
    # Best by test metrics
    if 'Test MAE' in best_models_df.columns:
        mae_valid = best_models_df.dropna(subset=['Test MAE'])
        if len(mae_valid) > 0:
            best_mae = mae_valid.loc[mae_valid['Test MAE'].idxmin()]
            print(f"\nüéØ Best by Test MAE: {best_mae['Experiment']}")
            print(f"   Test MAE: {best_mae['Test MAE']:.4f}")
    
    if 'Dir Accuracy' in best_models_df.columns:
        da_valid = best_models_df.dropna(subset=['Dir Accuracy'])
        if len(da_valid) > 0:
            best_da = da_valid.loc[da_valid['Dir Accuracy'].idxmax()]
            print(f"\nüéØ Best by Directional Accuracy: {best_da['Experiment']}")
            print(f"   Dir Accuracy: {best_da['Dir Accuracy']:.2f}%")
    
    # Total trials analyzed
    total_trials = sum(len(df) for df in experiment_data.values())
    total_completed = sum(len(df[df['state'] == 'COMPLETE']) for df in experiment_data.values())
    
    print(f"\nüìà Total Trials Analyzed: {total_trials}")
    print(f"   Completed: {total_completed} ({total_completed/total_trials*100:.1f}%)")
    print(f"   Pruned: {total_trials - total_completed} ({(total_trials-total_completed)/total_trials*100:.1f}%)")
    
    print("\n" + "=" * 80)
    
    # Create summary table for export
    return best_models_df

final_summary = generate_final_summary(best_models_df, experiment_data)

## 11. Radar Chart Comparison

Visual comparison of best models using a radar/spider chart.

In [None]:
# === Cell 12: Radar Chart ===

def plot_radar_chart(best_models_df: pd.DataFrame):
    """Create radar chart comparing experiments."""
    metrics = ['Test MAE', 'Test RMSE', 'Test MAPE']
    
    # Filter to experiments with all metrics
    valid_df = best_models_df.dropna(subset=metrics)
    if len(valid_df) == 0:
        print("Not enough data for radar chart")
        return
    
    # Normalize metrics (lower is better, so invert)
    normalized = valid_df.copy()
    for metric in metrics:
        max_val = normalized[metric].max()
        min_val = normalized[metric].min()
        if max_val != min_val:
            # Invert so higher is better on the chart
            normalized[metric] = 1 - (normalized[metric] - min_val) / (max_val - min_val)
        else:
            normalized[metric] = 1
    
    # Add directional accuracy (higher is better, no inversion needed)
    if 'Dir Accuracy' in valid_df.columns:
        metrics.append('Dir Accuracy')
        da_data = valid_df['Dir Accuracy'].dropna()
        if len(da_data) > 0:
            max_da = da_data.max()
            min_da = da_data.min()
            if max_da != min_da:
                normalized['Dir Accuracy'] = (valid_df['Dir Accuracy'] - min_da) / (max_da - min_da)
            else:
                normalized['Dir Accuracy'] = 1
    
    # Create radar chart
    num_vars = len(metrics)
    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
    angles += angles[:1]
    
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))
    
    for _, row in normalized.iterrows():
        values = [row[m] for m in metrics]
        values += values[:1]
        ax.plot(angles, values, linewidth=2, 
               label=row['Experiment'], color=EXPERIMENT_COLORS.get(row['Experiment'], '#333333'))
        ax.fill(angles, values, alpha=0.1, color=EXPERIMENT_COLORS.get(row['Experiment'], '#333333'))
    
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(metrics)
    ax.set_title('TFT Model Comparison (Higher = Better)', size=14, y=1.1)
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    
    plt.tight_layout()
    plt.savefig('saved_results/tft_radar_chart.png', dpi=150, bbox_inches='tight')
    plt.show()

plot_radar_chart(best_models_df)

## 12. Export Results

Export summary tables and charts for the thesis.

In [None]:
# === Cell 13: Export Results ===

# Create output directory
output_dir = Path('saved_results/tft_analysis_output')
output_dir.mkdir(parents=True, exist_ok=True)

# Export summary dataframe
if len(summary_df) > 0:
    summary_df.to_csv(output_dir / 'tft_experiment_summary.csv', index=False)
    print(f"‚úÖ Saved experiment summary to {output_dir / 'tft_experiment_summary.csv'}")

# Export best models dataframe
if len(best_models_df) > 0:
    best_models_df.to_csv(output_dir / 'tft_best_models.csv', index=False)
    print(f"‚úÖ Saved best models to {output_dir / 'tft_best_models.csv'}")

# Export pruning stats
if len(pruning_df) > 0:
    pruning_df.to_csv(output_dir / 'tft_pruning_stats.csv', index=False)
    print(f"‚úÖ Saved pruning stats to {output_dir / 'tft_pruning_stats.csv'}")

print(f"\nüìÅ All results exported to: {output_dir}")