In [17]:
import pandas as pd
import numpy as np
import glob
import os
import shutil

# Selection based on validation RMSE + generalization gap (lambda = 1)
selection_basis = 'Training-Validation Delta'
lambda_val = 1.0
n_top = 3
# Setup paths
runtyp = 'RAS_multirun_pinn'
current_directory = os.getcwd()
working_dir = os.path.dirname(current_directory)
csv_directory = f"{working_dir}/{runtyp}/metric_csv"

# Load all CSV files
csv_files = glob.glob(os.path.join(csv_directory, "*.csv"))
all_results = []

for file_path in csv_files:
    filename = os.path.basename(file_path)
    parts = filename.replace('.csv', '').split('_')
    
    try:
        seed = int(parts[1])
        timestamp = parts[2] + '_' + parts[3]
    except:
        seed = None
        timestamp = filename
    
    df = pd.read_csv(file_path)
    df['filename'] = filename
    df['seed'] = seed
    df['model_id'] = f"model_{seed}_{timestamp}"
    
    all_results.append(df)

# Combine all results
combined_df = pd.concat(all_results, ignore_index=True)

# Calculate scores for each model
model_scores = []

for model_id in combined_df['model_id'].unique():
    model_data = combined_df[combined_df['model_id'] == model_id]
    
    # Get metrics for all datasets
    train_rmse = model_data[model_data['Dataset'] == 'Training']['RMSE'].iloc[0]
    train_r2 = model_data[model_data['Dataset'] == 'Training']['R2'].iloc[0]
    val_rmse = model_data[model_data['Dataset'] == 'Validation']['RMSE'].iloc[0]
    val_r2 = model_data[model_data['Dataset'] == 'Validation']['R2'].iloc[0]
    test_rmse = model_data[model_data['Dataset'] == 'Test']['RMSE'].iloc[0]
    test_r2 = model_data[model_data['Dataset'] == 'Test']['R2'].iloc[0]
    
    # Calculate scores
    generalization_gap = abs(train_rmse - val_rmse)
    selection_score = val_rmse + lambda_val * generalization_gap  # For top N
    overall_rmse = (train_rmse + val_rmse) / 2  # For median (no test set)
    overall_r2 = (train_r2 + val_r2) / 2
    
    model_scores.append({
        'model_id': model_id,
        'seed': model_data['seed'].iloc[0],
        'train_rmse': train_rmse,
        'train_r2': train_r2,
        'val_rmse': val_rmse,
        'val_r2': val_r2,
        'test_rmse': test_rmse,
        'test_r2': test_r2,
        'generalization_gap': generalization_gap,
        'selection_score': selection_score,
        'overall_rmse': overall_rmse,
        'overall_r2': overall_r2
    })

# Create DataFrame
scores_df = pd.DataFrame(model_scores)

print(f"Total models analyzed: {len(scores_df)}")
print(f"Selection Strategy:")
print(f"  - Top {n_top}: Validation RMSE + {lambda_val} × |Train RMSE - Val RMSE|")
print(f"  - Median: Overall RMSE = (Train RMSE + Val RMSE) / 2")
print("="*80)

# GET TOP N MODELS (by selection score)
top_scores = scores_df.nsmallest(n_top, 'selection_score').reset_index(drop=True)
top_n_models = top_scores['model_id'].tolist()

print(f"TOP {n_top} MODELS (by Selection Score):")
print("="*50)
for i, (_, row) in enumerate(top_scores.iterrows(), 1):
    print(f"{i}. {row['model_id']} (seed {row['seed']}):")
    print(f"   Selection Score: {row['selection_score']:.6f}")
    print(f"   Validation RMSE: {row['val_rmse']:.6f}")
    print(f"   Generalization Gap: {row['generalization_gap']:.6f}")
    print(f"   Overall RMSE (train+val): {row['overall_rmse']:.6f}")
    print(f"   Test RMSE: {row['test_rmse']:.6f}")
    print()

# GET MEDIAN MODEL (by overall RMSE)
median_scores = scores_df.sort_values('overall_rmse').reset_index(drop=True)
total_models = len(scores_df)
median_rank = total_models // 2  # For 20 models, this gives index 10 (rank 11)
median_model_info = median_scores.iloc[median_rank]
median_model_id = median_model_info['model_id']

print("="*80)
print(f"MEDIAN MODEL (by Overall RMSE - Rank {median_rank+1}/{total_models}):")
print("="*50)
print(f"Model: {median_model_info['model_id']} (seed {median_model_info['seed']})")
print(f"Overall RMSE (train+val): {median_model_info['overall_rmse']:.6f}")
print(f"Overall R² (train+val): {median_model_info['overall_r2']:.6f}")
print(f"Train RMSE: {median_model_info['train_rmse']:.6f}")
print(f"Validation RMSE: {median_model_info['val_rmse']:.6f}")
print(f"Test RMSE: {median_model_info['test_rmse']:.6f}")
print(f"Selection Score: {median_model_info['selection_score']:.6f}")
print()

# Combine selected models
all_selected_models = top_n_models + [median_model_id]
all_selected_models = list(set(all_selected_models))  # Remove duplicates

# Performance summaries
selected_combined_df = combined_df[combined_df['model_id'].isin(all_selected_models)]

print("="*80)
print("PERFORMANCE STATISTICS (ALL MODELS)")
print("="*80)

# ALL MODELS summary
print(f"\nALL {total_models} MODELS SUMMARY:")
print("-" * 40)
for dataset in ['Training', 'Validation', 'Test']:
    subset = combined_df[combined_df['Dataset'] == dataset]
    r2_mean = subset['R2'].mean()
    r2_std = subset['R2'].std()
    rmse_mean = subset['RMSE'].mean()
    rmse_std = subset['RMSE'].std()
    print(f"{dataset:12} | R²: {r2_mean:.4f}±{r2_std:.4f} | RMSE: {rmse_mean:.4f}±{rmse_std:.4f}")

print("="*80)
print("SELECTED MODELS COMPARISON")
print("="*80)

# Top N summary
top_n_subset = combined_df[combined_df['model_id'].isin(top_n_models)]
print(f"\nTOP {n_top} MODELS:")
print("-" * 40)
for dataset in ['Training', 'Validation', 'Test']:
    subset = top_n_subset[top_n_subset['Dataset'] == dataset]
    r2_mean = subset['R2'].mean()
    r2_std = subset['R2'].std()
    rmse_mean = subset['RMSE'].mean()
    rmse_std = subset['RMSE'].std()
    print(f"{dataset:12} | R²: {r2_mean:.4f}±{r2_std:.4f} | RMSE: {rmse_mean:.4f}±{rmse_std:.4f}")

# Median model summary
median_subset = combined_df[combined_df['model_id'] == median_model_id]
print(f"\nMEDIAN MODEL PERFORMANCE:")
print("-" * 40)
for dataset in ['Training', 'Validation', 'Test']:
    subset = median_subset[median_subset['Dataset'] == dataset]
    if len(subset) > 0:
        r2_val = subset['R2'].iloc[0]
        rmse_val = subset['RMSE'].iloc[0]
        print(f"{dataset:12} | R²: {r2_val:.4f}        | RMSE: {rmse_val:.4f}")

# Handle model weights
model_weights_dir = os.path.join(working_dir, runtyp, "model_weights")

print("\n" + "="*80)
print("RETRIEVING MODEL WEIGHT FILES")
print("="*80)

def find_weights(model_ids, category_name):
    found_weights = []
    missing_weights = []
    
    print(f"\n{category_name.upper()}:")
    print("-" * 50)
    
    for model_id in model_ids:
        parts = model_id.split('_')
        seed = parts[1]
        timestamp = parts[2] + '_' + parts[3]
        
        weight_filename = f"model_{seed}_{timestamp}.pth"
        weight_filepath = os.path.join(model_weights_dir, weight_filename)
        
        if os.path.exists(weight_filepath):
            found_weights.append({
                'model_id': model_id,
                'seed': seed,
                'weight_file': weight_filename,
                'weight_path': weight_filepath,
                'category': category_name
            })
            print(f"✓ Found: {weight_filename}")
        else:
            missing_weights.append({
                'model_id': model_id,
                'seed': seed,
                'expected_file': weight_filename,
                'category': category_name
            })
            print(f"✗ Missing: {weight_filename}")
    
    return found_weights, missing_weights

# Find weights
top_n_weights, top_n_missing = find_weights(top_n_models, f"top{n_top}")
median_weights, median_missing = find_weights([median_model_id], "median")

all_found_weights = top_n_weights + median_weights
all_missing_weights = top_n_missing + median_missing

print(f"\nSUMMARY: Found {len(all_found_weights)}/{len(all_selected_models)} weight files")

# Copy weight files
output_folder = 'selected_models'
os.makedirs(output_folder, exist_ok=True)

print(f"\nCopying weight files to {output_folder}/")
print("-" * 50)

for weight_info in all_found_weights:
    src_path = weight_info['weight_path']
    
    if weight_info['category'] == f'top{n_top}':
        model_rank = top_n_models.index(weight_info['model_id']) + 1
        new_filename = f"rank{model_rank}_top{n_top}_{runtyp}_{weight_info['seed']}.pth"
    else:  # median
        new_filename = f"median_rank{median_rank+1}_{runtyp}_{weight_info['seed']}.pth"
    
    dst_path = os.path.join(output_folder, new_filename)
    shutil.copy2(src_path, dst_path)
    print(f"✓ Copied: {weight_info['weight_file']} -> {new_filename}")
    weight_info['renamed_file'] = new_filename

# Save detailed information
if all_found_weights:
    detailed_info = []
    for weight in all_found_weights:
        model_id = weight['model_id']
        score_info = scores_df[scores_df['model_id'] == model_id].iloc[0]
        
        combined_info = {
            **weight,
            'train_rmse': score_info['train_rmse'],
            'val_rmse': score_info['val_rmse'],
            'test_rmse': score_info['test_rmse'],
            'generalization_gap': score_info['generalization_gap'],
            'selection_score': score_info['selection_score'],
            'overall_rmse': score_info['overall_rmse'],
            'overall_r2': score_info['overall_r2']
        }
        detailed_info.append(combined_info)
    
    detailed_df = pd.DataFrame(detailed_info)
    weight_info_path = os.path.join(output_folder, 'selected_models_detailed_info.csv')
    detailed_df.to_csv(weight_info_path, index=False)
    print(f"\nDetailed model info saved to: {weight_info_path}")

# Save performance results
results_wide = []
for model_id in all_selected_models:
    if model_id in [w['model_id'] for w in all_found_weights]:
        model_data = combined_df[combined_df['model_id'] == model_id]
        score_data = scores_df[scores_df['model_id'] == model_id].iloc[0]
        
        if model_id in top_n_models:
            category = f"top{n_top}"
            rank_in_category = top_n_models.index(model_id) + 1
        else:
            category = "median"
            rank_in_category = median_rank + 1
        
        train_data = model_data[model_data['Dataset'] == 'Training']
        val_data = model_data[model_data['Dataset'] == 'Validation']
        test_data = model_data[model_data['Dataset'] == 'Test']
        
        row = {
            'category': category,
            'rank_in_category': rank_in_category,
            'model_id': model_id,
            'seed': model_data['seed'].iloc[0],
            'training_r2': train_data['R2'].iloc[0],
            'training_rmse': train_data['RMSE'].iloc[0],
            'val_r2': val_data['R2'].iloc[0],
            'val_rmse': val_data['RMSE'].iloc[0],
            'test_r2': test_data['R2'].iloc[0],
            'test_rmse': test_data['RMSE'].iloc[0],
            'generalization_gap': score_data['generalization_gap'],
            'selection_score': score_data['selection_score'],
            'overall_rmse': score_data['overall_rmse'],
            'overall_r2': score_data['overall_r2'],
            'filename': model_data['filename'].iloc[0]
        }
        results_wide.append(row)

if results_wide:
    results_wide_df = pd.DataFrame(results_wide)
    results_wide_df = results_wide_df.sort_values(['category', 'rank_in_category'])
    results_path = os.path.join(output_folder, 'selected_models_performance.csv')
    results_wide_df.to_csv(results_path, index=False)
    print(f"Performance results saved to: {results_path}")

print(f"\n" + "="*80)
print(f"ALL FILES SAVED IN FOLDER: {output_folder}/")
print("="*80)

# Final comparison
print(f"\nSELECTED MODELS vs ALL MODELS COMPARISON:")
print("-" * 50)

# All models stats
all_models_selection_mean = scores_df['selection_score'].mean()
all_models_overall_mean = scores_df['overall_rmse'].mean()

# Top N stats  
top_n_avg_selection = top_scores['selection_score'].mean()
top_n_avg_overall = top_scores['overall_rmse'].mean()

print(f"All {total_models} models avg selection score: {all_models_selection_mean:.6f}")
print(f"All {total_models} models avg overall RMSE: {all_models_overall_mean:.6f}")
print(f"Top {n_top} avg selection score: {top_n_avg_selection:.6f}")
print(f"Top {n_top} avg overall RMSE: {top_n_avg_overall:.6f}")
print(f"Median selection score: {median_model_info['selection_score']:.6f}")
print(f"Median overall RMSE: {median_model_info['overall_rmse']:.6f}")

print(f"\nPerformance gaps from population mean:")
top_n_selection_gap = ((all_models_selection_mean - top_n_avg_selection) / all_models_selection_mean) * 100
median_selection_gap = ((median_model_info['selection_score'] - all_models_selection_mean) / all_models_selection_mean) * 100

print(f"Top {n_top} selection score improvement: {top_n_selection_gap:.1f}%")
print(f"Median vs population selection score: {median_selection_gap:+.1f}%")

print(f"\nStrategy comparison:")
print(f"- Top {n_top}: Optimized for validation performance + low overfitting")
print(f"- Median: Represents typical train+val performance")
print(f"- Population stats show overall model quality and variability")

Total models analyzed: 60
Selection Strategy:
  - Top 3: Validation RMSE + 1.0 × |Train RMSE - Val RMSE|
  - Median: Overall RMSE = (Train RMSE + Val RMSE) / 2
TOP 3 MODELS (by Selection Score):
1. model_260_20250910_195252 (seed 260):
   Selection Score: 0.142637
   Validation RMSE: 0.130501
   Generalization Gap: 0.012136
   Overall RMSE (train+val): 0.124433
   Test RMSE: 0.110151

2. model_1201_20250910_205611 (seed 1201):
   Selection Score: 0.147257
   Validation RMSE: 0.140671
   Generalization Gap: 0.006585
   Overall RMSE (train+val): 0.137379
   Test RMSE: 0.128202

3. model_212_20250910_193823 (seed 212):
   Selection Score: 0.147458
   Validation RMSE: 0.137788
   Generalization Gap: 0.009670
   Overall RMSE (train+val): 0.132954
   Test RMSE: 0.105959

MEDIAN MODEL (by Overall RMSE - Rank 31/60):
Model: model_206_20250910_221942 (seed 206)
Overall RMSE (train+val): 0.142169
Overall R² (train+val): 0.892392
Train RMSE: 0.129442
Validation RMSE: 0.154896
Test RMSE: 0.104712
