# Personalized VF Prediction Models - Interactive Analysis

This notebook provides an interactive interface for:
1. Running personalized model experiments
2. Analyzing results
3. Comparing with generalized models
4. Visualizing patient-specific patterns

## Setup

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import pickle
import json
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

## 1. Run Personalized Model Experiment

**Note:** This may take 5-15 minutes depending on your system.

In [None]:
# Option 1: Run from command line (recommended)
# !python exp_personalized_models.py

# Option 2: Import and run (alternative)
# from exp_personalized_models import main
# main()

## 2. Load Results

In [None]:
def load_latest_results(results_dir='./personalized_models_output'):
    """Load the most recent personalized model results"""
    json_files = glob(os.path.join(results_dir, 'personalized_results_*.json'))
    
    if not json_files:
        raise FileNotFoundError(f"No results found in {results_dir}")
    
    latest_file = max(json_files, key=os.path.getmtime)
    
    with open(latest_file, 'r') as f:
        results = json.load(f)
    
    print(f"Loaded: {latest_file}")
    return results

# Load results
results = load_latest_results()

print(f"\nSuccessful patients: {len(results['successful_patients'])}")
print(f"Failed patients: {len(results['failed_patients'])}")

## 3. Create Results DataFrame

In [None]:
def extract_metrics_df(results):
    """Extract metrics into a pandas DataFrame"""
    data = []
    
    for patient_id in results['successful_patients']:
        result = results['patient_results'][patient_id]
        metrics = result['metrics']
        
        row = {
            'patient_id': patient_id,
            'accuracy': metrics['accuracy'],
            'train_size': result['train_size'],
            'test_size': result['test_size'],
            'train_onset': result['train_onset'],
            'train_normal': result['train_normal'],
            'test_onset': result['test_onset'],
            'test_normal': result['test_normal']
        }
        
        if metrics.get('both_classes', False):
            row.update({
                'roc_auc': metrics['roc_auc'],
                'onset_precision': metrics['onset_precision'],
                'onset_recall': metrics['onset_recall'],
                'onset_f1': metrics['onset_f1'],
                'normal_precision': metrics['normal_precision'],
                'normal_recall': metrics['normal_recall'],
                'normal_f1': metrics['normal_f1']
            })
        
        data.append(row)
    
    return pd.DataFrame(data)

df = extract_metrics_df(results)
df.head()

## 4. Summary Statistics

In [None]:
print("PERFORMANCE SUMMARY")
print("=" * 60)
print(f"\nAccuracy:")
print(f"  Mean:   {df['accuracy'].mean():.4f} ± {df['accuracy'].std():.4f}")
print(f"  Median: {df['accuracy'].median():.4f}")
print(f"  Range:  {df['accuracy'].min():.4f} - {df['accuracy'].max():.4f}")

if 'roc_auc' in df.columns:
    print(f"\nROC-AUC (patients with both classes):")
    print(f"  Mean:   {df['roc_auc'].mean():.4f} ± {df['roc_auc'].std():.4f}")
    print(f"  Median: {df['roc_auc'].median():.4f}")

if 'onset_f1' in df.columns:
    print(f"\nOnset Detection F1-Score:")
    print(f"  Mean:   {df['onset_f1'].mean():.4f} ± {df['onset_f1'].std():.4f}")
    print(f"  Median: {df['onset_f1'].median():.4f}")

print(f"\nData Distribution:")
print(f"  Total segments: {df['train_size'].sum() + df['test_size'].sum()}")
print(f"  Avg train size: {df['train_size'].mean():.1f} ± {df['train_size'].std():.1f}")
print(f"  Avg test size:  {df['test_size'].mean():.1f} ± {df['test_size'].std():.1f}")

## 5. Visualization: Performance by Patient

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Accuracy
axes[0, 0].bar(df['patient_id'], df['accuracy'], color='steelblue', alpha=0.7)
axes[0, 0].axhline(df['accuracy'].mean(), color='red', linestyle='--', label='Mean')
axes[0, 0].set_xlabel('Patient ID')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].set_title('Accuracy by Patient')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].legend()
axes[0, 0].grid(axis='y', alpha=0.3)

# ROC-AUC
if 'roc_auc' in df.columns:
    axes[0, 1].bar(df['patient_id'], df['roc_auc'], color='coral', alpha=0.7)
    axes[0, 1].axhline(df['roc_auc'].mean(), color='red', linestyle='--', label='Mean')
    axes[0, 1].set_xlabel('Patient ID')
    axes[0, 1].set_ylabel('ROC-AUC')
    axes[0, 1].set_title('ROC-AUC by Patient')
    axes[0, 1].tick_params(axis='x', rotation=45)
    axes[0, 1].legend()
    axes[0, 1].grid(axis='y', alpha=0.3)

# Train/Test sizes
x = np.arange(len(df))
width = 0.35
axes[1, 0].bar(x - width/2, df['train_size'], width, label='Train', alpha=0.7)
axes[1, 0].bar(x + width/2, df['test_size'], width, label='Test', alpha=0.7)
axes[1, 0].set_xlabel('Patient ID')
axes[1, 0].set_ylabel('Number of Segments')
axes[1, 0].set_title('Train/Test Split Size by Patient')
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(df['patient_id'], rotation=45)
axes[1, 0].legend()
axes[1, 0].grid(axis='y', alpha=0.3)

# Onset F1-Score
if 'onset_f1' in df.columns:
    axes[1, 1].bar(df['patient_id'], df['onset_f1'], color='lightgreen', alpha=0.7)
    axes[1, 1].axhline(df['onset_f1'].mean(), color='red', linestyle='--', label='Mean')
    axes[1, 1].set_xlabel('Patient ID')
    axes[1, 1].set_ylabel('F1-Score')
    axes[1, 1].set_title('Onset Detection F1-Score by Patient')
    axes[1, 1].tick_params(axis='x', rotation=45)
    axes[1, 1].legend()
    axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Distribution Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Accuracy distribution
axes[0].hist(df['accuracy'], bins=15, color='steelblue', alpha=0.7, edgecolor='black')
axes[0].axvline(df['accuracy'].mean(), color='red', linestyle='--', linewidth=2, label='Mean')
axes[0].axvline(df['accuracy'].median(), color='orange', linestyle='--', linewidth=2, label='Median')
axes[0].set_xlabel('Accuracy')
axes[0].set_ylabel('Number of Patients')
axes[0].set_title('Distribution of Accuracy Across Patients')
axes[0].legend()
axes[0].grid(axis='y', alpha=0.3)

# ROC-AUC distribution
if 'roc_auc' in df.columns:
    axes[1].hist(df['roc_auc'].dropna(), bins=15, color='coral', alpha=0.7, edgecolor='black')
    axes[1].axvline(df['roc_auc'].mean(), color='red', linestyle='--', linewidth=2, label='Mean')
    axes[1].axvline(df['roc_auc'].median(), color='orange', linestyle='--', linewidth=2, label='Median')
    axes[1].set_xlabel('ROC-AUC')
    axes[1].set_ylabel('Number of Patients')
    axes[1].set_title('Distribution of ROC-AUC Across Patients')
    axes[1].legend()
    axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Top and Bottom Performers

In [None]:
print("TOP 5 PERFORMERS (by Accuracy):")
print("=" * 80)
top5 = df.nlargest(5, 'accuracy')[['patient_id', 'accuracy', 'roc_auc', 'onset_f1', 'train_size', 'test_size']]
print(top5.to_string(index=False))

print("\n\nBOTTOM 5 PERFORMERS (by Accuracy):")
print("=" * 80)
bottom5 = df.nsmallest(5, 'accuracy')[['patient_id', 'accuracy', 'roc_auc', 'onset_f1', 'train_size', 'test_size']]
print(bottom5.to_string(index=False))

## 8. Correlation Analysis

In [None]:
# Analyze correlation between data size and performance
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Train size vs Accuracy
axes[0].scatter(df['train_size'], df['accuracy'], alpha=0.6, s=100)
axes[0].set_xlabel('Training Set Size')
axes[0].set_ylabel('Accuracy')
axes[0].set_title('Training Set Size vs Accuracy')
axes[0].grid(alpha=0.3)

# Add correlation coefficient
corr = df['train_size'].corr(df['accuracy'])
axes[0].text(0.05, 0.95, f'Correlation: {corr:.3f}', 
             transform=axes[0].transAxes, verticalalignment='top',
             bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# Onset proportion vs Accuracy
df['onset_ratio'] = df['train_onset'] / df['train_size']
axes[1].scatter(df['onset_ratio'], df['accuracy'], alpha=0.6, s=100)
axes[1].set_xlabel('Onset Proportion in Training')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Onset Proportion vs Accuracy')
axes[1].grid(alpha=0.3)

corr2 = df['onset_ratio'].corr(df['accuracy'])
axes[1].text(0.05, 0.95, f'Correlation: {corr2:.3f}', 
             transform=axes[1].transAxes, verticalalignment='top',
             bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.show()

## 9. Export Results Summary

In [None]:
# Export to CSV for further analysis
output_file = 'personalized_models_summary.csv'
df.to_csv(output_file, index=False)
print(f"✓ Summary exported to: {output_file}")

# Create summary statistics file
summary_stats = {
    'Total Patients': len(df),
    'Mean Accuracy': df['accuracy'].mean(),
    'Std Accuracy': df['accuracy'].std(),
    'Median Accuracy': df['accuracy'].median(),
    'Min Accuracy': df['accuracy'].min(),
    'Max Accuracy': df['accuracy'].max()
}

if 'roc_auc' in df.columns:
    summary_stats.update({
        'Mean ROC-AUC': df['roc_auc'].mean(),
        'Std ROC-AUC': df['roc_auc'].std(),
        'Mean Onset F1': df['onset_f1'].mean()
    })

summary_df = pd.DataFrame([summary_stats])
summary_file = 'personalized_models_statistics.csv'
summary_df.to_csv(summary_file, index=False)
print(f"✓ Statistics exported to: {summary_file}")

## 10. Next Steps

After reviewing these results, consider:

1. **Compare with generalized models**: Run `compare_personalized_vs_generalized.py`
2. **Analyze failure cases**: Why did some patients fail? Data quality? Class imbalance?
3. **Feature importance**: Which features are most predictive for each patient?
4. **Temporal validation**: Try chronological train/test splits instead of random
5. **Transfer learning**: Pre-train on all patients, fine-tune on individual

### Key Questions to Answer:

- Do personalized models outperform generalized models?
- Which patients benefit most from personalization?
- Is there sufficient data per patient for reliable models?
- What is the relationship between data size and performance?