# Hyperparameter Search Results Analysis

This notebook analyzes the results from the hyperparameter search experiments.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd() / 'src'))

from experiment_tracker import ExperimentTracker

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

## 1. Load Experiment Results

In [None]:
# Initialize tracker
tracker = ExperimentTracker(experiment_dir='experiments')

# Print summary
tracker.print_summary()

In [None]:
# Load all results
results = tracker.load_results()

print(f"Total experiments: {len(results)}")
print(f"\nColumns: {results.columns.tolist()}")

results.head()

## 2. Overall Best Models

In [None]:
# Best regression models (by Test MAE)
print("="*80)
print("TOP 10 REGRESSION MODELS (by Test MAE)")
print("="*80)

# Filter for regression targets
regression_cols = [c for c in results.columns if 'intensity' in str(c).lower() or 'count' in str(c).lower() or 'time' in str(c).lower()]
reg_results = results[results['target_type'].str.contains('intensity|count|time', na=False)]

if 'metric_test_mae' in reg_results.columns and len(reg_results) > 0:
    top_reg = reg_results.nsmallest(10, 'metric_test_mae')
    display_cols = ['run_id', 'model_name', 'target_type', 'metric_test_mae', 'metric_test_rmse']
    display_cols = [c for c in display_cols if c in top_reg.columns]
    print(top_reg[display_cols].to_string(index=False))
else:
    print("No regression results found.")

In [None]:
# Best classification models (by Test F1)
print("="*80)
print("TOP 10 CLASSIFICATION MODELS (by Test F1)")
print("="*80)

clf_results = results[results['target_type'].str.contains('high', na=False)]

if 'metric_test_f1' in clf_results.columns and len(clf_results) > 0:
    top_clf = clf_results.nlargest(10, 'metric_test_f1')
    display_cols = ['run_id', 'model_name', 'target_type', 'metric_test_f1', 'metric_test_precision', 'metric_test_recall']
    display_cols = [c for c in display_cols if c in top_clf.columns]
    print(top_clf[display_cols].to_string(index=False))
else:
    print("No classification results found.")

## 3. Model Comparison

In [None]:
# Compare models across different targets
if 'model_name' in results.columns and 'target_type' in results.columns:
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Regression: MAE by model and target
    if 'metric_test_mae' in reg_results.columns and len(reg_results) > 0:
        sns.boxplot(data=reg_results, x='model_name', y='metric_test_mae', ax=axes[0])
        axes[0].set_title('Test MAE by Model (Regression Tasks)', fontsize=14, fontweight='bold')
        axes[0].set_xlabel('Model')
        axes[0].set_ylabel('Test MAE')
        axes[0].tick_params(axis='x', rotation=45)
    
    # Classification: F1 by model
    if 'metric_test_f1' in clf_results.columns and len(clf_results) > 0:
        sns.boxplot(data=clf_results, x='model_name', y='metric_test_f1', ax=axes[1])
        axes[1].set_title('Test F1 by Model (Classification Tasks)', fontsize=14, fontweight='bold')
        axes[1].set_xlabel('Model')
        axes[1].set_ylabel('Test F1')
        axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()

## 4. Hyperparameter Sensitivity Analysis

In [None]:
# Analyze impact of key hyperparameters on performance
# Example: n_estimators for Random Forest

rf_results = results[results['model_name'] == 'random_forest']

if len(rf_results) > 0 and 'config_n_estimators' in rf_results.columns:
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # MAE vs n_estimators
    rf_reg = rf_results[rf_results['target_type'].str.contains('intensity', na=False)]
    if len(rf_reg) > 0 and 'metric_test_mae' in rf_reg.columns:
        sns.scatterplot(data=rf_reg, x='config_n_estimators', y='metric_test_mae', 
                       hue='target_type', ax=axes[0], alpha=0.6)
        axes[0].set_title('Random Forest: MAE vs n_estimators', fontsize=14, fontweight='bold')
        axes[0].set_xlabel('n_estimators')
        axes[0].set_ylabel('Test MAE')
    
    # MAE vs max_depth
    if 'config_max_depth' in rf_reg.columns:
        rf_reg_depth = rf_reg[rf_reg['config_max_depth'].notna()]
        if len(rf_reg_depth) > 0:
            sns.scatterplot(data=rf_reg_depth, x='config_max_depth', y='metric_test_mae',
                           hue='target_type', ax=axes[1], alpha=0.6)
            axes[1].set_title('Random Forest: MAE vs max_depth', fontsize=14, fontweight='bold')
            axes[1].set_xlabel('max_depth')
            axes[1].set_ylabel('Test MAE')
    
    plt.tight_layout()
    plt.show()
else:
    print("No Random Forest results found for hyperparameter analysis.")

## 5. Feature Set Comparison

In [None]:
# Compare performance across different feature sets
if 'config_feature_set' in results.columns:
    feature_set_results = results[results['config_feature_set'].notna()]
    
    if len(feature_set_results) > 0:
        fig, axes = plt.subplots(1, 2, figsize=(16, 6))
        
        # Regression
        fs_reg = feature_set_results[feature_set_results['target_type'].str.contains('intensity', na=False)]
        if len(fs_reg) > 0 and 'metric_test_mae' in fs_reg.columns:
            sns.boxplot(data=fs_reg, x='config_feature_set', y='metric_test_mae', ax=axes[0])
            axes[0].set_title('Test MAE by Feature Set', fontsize=14, fontweight='bold')
            axes[0].set_xlabel('Feature Set')
            axes[0].set_ylabel('Test MAE')
            axes[0].tick_params(axis='x', rotation=45)
        
        # Classification
        fs_clf = feature_set_results[feature_set_results['target_type'].str.contains('high', na=False)]
        if len(fs_clf) > 0 and 'metric_test_f1' in fs_clf.columns:
            sns.boxplot(data=fs_clf, x='config_feature_set', y='metric_test_f1', ax=axes[1])
            axes[1].set_title('Test F1 by Feature Set', fontsize=14, fontweight='bold')
            axes[1].set_xlabel('Feature Set')
            axes[1].set_ylabel('Test F1')
            axes[1].tick_params(axis='x', rotation=45)
        
        plt.tight_layout()
        plt.show()
    else:
        print("No feature set comparison data available.")
else:
    print("Feature set information not found in results.")

## 6. Prediction Window Analysis

In [None]:
# How does performance vary with prediction window (k days)?
# Extract k from target_type (e.g., target_high_count_next_7d -> 7)

import re

def extract_k_days(target):
    match = re.search(r'(\d+)d', str(target))
    return int(match.group(1)) if match else None

results['k_days'] = results['target_type'].apply(extract_k_days)

k_results = results[results['k_days'].notna()]

if len(k_results) > 0 and 'metric_test_mae' in k_results.columns:
    plt.figure(figsize=(10, 6))
    sns.lineplot(data=k_results, x='k_days', y='metric_test_mae', 
                 hue='model_name', marker='o', ci=95)
    plt.title('Test MAE vs Prediction Window (k days)', fontsize=14, fontweight='bold')
    plt.xlabel('Prediction Window (k days)')
    plt.ylabel('Test MAE')
    plt.legend(title='Model')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("No prediction window data available.")

## 7. Target Type Comparison

In [None]:
# Compare performance across different prediction targets
if 'target_type' in results.columns:
    target_summary = results.groupby('target_type').agg({
        'metric_test_mae': ['mean', 'std', 'min'],
        'metric_test_f1': ['mean', 'std', 'max'],
        'run_id': 'count'
    }).round(4)
    
    print("="*80)
    print("PERFORMANCE BY TARGET TYPE")
    print("="*80)
    print(target_summary)

## 8. Training Time Analysis

In [None]:
# Analyze training time vs performance trade-offs
if 'metric_train_time_seconds' in results.columns and 'metric_test_mae' in results.columns:
    time_results = results[results['metric_train_time_seconds'].notna()]
    
    if len(time_results) > 0:
        plt.figure(figsize=(10, 6))
        sns.scatterplot(data=time_results, x='metric_train_time_seconds', y='metric_test_mae',
                       hue='model_name', size='config_n_train_samples', alpha=0.6)
        plt.title('Training Time vs Test MAE', fontsize=14, fontweight='bold')
        plt.xlabel('Training Time (seconds)')
        plt.ylabel('Test MAE')
        plt.xscale('log')
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()
else:
    print("Training time data not available.")

## 9. Export Best Models for Further Analysis

In [None]:
# Export top 5 models for each target type
best_models = []

for target in results['target_type'].unique():
    target_results = results[results['target_type'] == target]
    
    # Determine metric based on task type
    if 'high' in str(target).lower() and 'count' not in str(target).lower():
        # Classification
        if 'metric_test_f1' in target_results.columns:
            best = target_results.nlargest(5, 'metric_test_f1')
            best_models.append(best)
    else:
        # Regression
        if 'metric_test_mae' in target_results.columns:
            best = target_results.nsmallest(5, 'metric_test_mae')
            best_models.append(best)

if best_models:
    best_models_df = pd.concat(best_models)
    best_models_df.to_csv('experiments/best_models.csv', index=False)
    print(f"Exported {len(best_models_df)} best models to experiments/best_models.csv")
    print(f"\nBest models:")
    display_cols = ['run_id', 'model_name', 'target_type', 'metric_test_mae', 'metric_test_f1']
    display_cols = [c for c in display_cols if c in best_models_df.columns]
    print(best_models_df[display_cols].to_string(index=False))
else:
    print("No best models to export.")

## 10. Recommendations

In [None]:
print("="*80)
print("RECOMMENDATIONS BASED ON RESULTS")
print("="*80)

if len(results) > 0:
    # Best overall model
    if 'metric_test_mae' in reg_results.columns and len(reg_results) > 0:
        best_reg = reg_results.loc[reg_results['metric_test_mae'].idxmin()]
        print(f"\n‚úÖ Best Regression Model:")
        print(f"   Model: {best_reg['model_name']}")
        print(f"   Target: {best_reg['target_type']}")
        print(f"   Test MAE: {best_reg['metric_test_mae']:.4f}")
        if 'config_n_estimators' in best_reg:
            print(f"   Key params: n_estimators={best_reg.get('config_n_estimators', 'N/A')}, "
                  f"max_depth={best_reg.get('config_max_depth', 'N/A')}")
    
    if 'metric_test_f1' in clf_results.columns and len(clf_results) > 0:
        best_clf = clf_results.loc[clf_results['metric_test_f1'].idxmax()]
        print(f"\n‚úÖ Best Classification Model:")
        print(f"   Model: {best_clf['model_name']}")
        print(f"   Target: {best_clf['target_type']}")
        print(f"   Test F1: {best_clf['metric_test_f1']:.4f}")
        if 'config_n_estimators' in best_clf:
            print(f"   Key params: n_estimators={best_clf.get('config_n_estimators', 'N/A')}, "
                  f"max_depth={best_clf.get('config_max_depth', 'N/A')}")
    
    # Feature set recommendation
    if 'config_feature_set' in results.columns:
        fs_performance = results.groupby('config_feature_set')['metric_test_mae'].mean().sort_values()
        if len(fs_performance) > 0:
            best_fs = fs_performance.index[0]
            print(f"\n‚úÖ Best Feature Set: {best_fs}")
            print(f"   Average Test MAE: {fs_performance.iloc[0]:.4f}")
    
    print("\nüìä Next Steps:")
    print("   1. Retrain best models on full dataset")
    print("   2. Perform per-user error analysis")
    print("   3. Investigate feature importance for best models")
    print("   4. Test on holdout set or new data")
    print("   5. Consider ensemble of top models")
else:
    print("\n‚ö†Ô∏è  No results available. Run hyperparameter search first.")
    print("   python run_hyperparameter_search.py --mode quick")