In [1]:

# Cell 1 - Setup and Data Loading
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.metrics import precision_recall_curve, average_precision_score, f1_score
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN
import warnings
warnings.filterwarnings('ignore')

print("🤖 XO Project - Model Training & Evaluation Phase")
print("="*65)
print("Objective: Train robust ML models for exoplanet habitability prediction")
print("="*65)

# Load optimized dataset from Phase 4
df_ml = pd.read_csv('../data/processed/ml_optimized_dataset.csv')
print(f"Loaded ML-optimized dataset: {len(df_ml):,} planets, {len(df_ml.columns)} features")

# Display dataset info
print(f"\nDataset overview:")
print(f"Shape: {df_ml.shape}")
print(f"Target variable: 'ml_target'")

# Check class distribution
target_counts = df_ml['ml_target'].value_counts()
print(f"\nClass distribution:")
print(f"Not Habitable (0): {target_counts[0]:,} planets ({target_counts[0]/len(df_ml)*100:.1f}%)")
print(f"Habitable (1):     {target_counts[1]:,} planets ({target_counts[1]/len(df_ml)*100:.1f}%)")
print(f"Class ratio: {target_counts[0]/target_counts[1]:.1f}:1")

🤖 XO Project - Model Training & Evaluation Phase
Objective: Train robust ML models for exoplanet habitability prediction
Loaded ML-optimized dataset: 1,729 planets, 22 features

Dataset overview:
Shape: (1729, 22)
Target variable: 'ml_target'

Class distribution:
Not Habitable (0): 1,319 planets (76.3%)
Habitable (1):     410 planets (23.7%)
Class ratio: 3.2:1


In [2]:
# Cell 2 - Feature Selection and Data Preparation
print("\n🔧 Feature Engineering and Data Preparation")
print("="*50)

# Define feature columns (excluding target and metadata)
feature_columns = [
    'pl_rade', 'pl_bmasse', 'pl_orbsmax', 'st_teff', 'st_mass', 'pl_eqt',
    'stellar_luminosity', 'hz_position', 'in_habitable_zone',
    'esi_radius', 'esi_mass', 'esi_temperature', 'esi_surface',
    'escape_velocity_ratio', 'stellar_flux', 'habitability_score'
]

# Filter to available features
available_features = [col for col in feature_columns if col in df_ml.columns]
print(f"Available features: {len(available_features)}")

# Analyze missing values
print(f"\nMissing value analysis:")
missing_summary = df_ml[available_features].isnull().sum()
missing_features = missing_summary[missing_summary > 0]

if len(missing_features) > 0:
    print("Features with missing values:")
    for feature, count in missing_features.items():
        percentage = (count / len(df_ml)) * 100
        print(f"  {feature:20} | {count:4,} missing ({percentage:5.1f}%)")
else:
    print("✅ No missing values detected")

# Create feature matrix and target vector
X = df_ml[available_features].copy()
y = df_ml['ml_target'].copy()

print(f"\nFeature matrix shape: {X.shape}")
print(f"Target vector shape: {y.shape}")


🔧 Feature Engineering and Data Preparation
Available features: 16

Missing value analysis:
Features with missing values:
  pl_bmasse            | 1,427 missing ( 82.5%)
  pl_eqt               | 1,361 missing ( 78.7%)
  esi_mass             | 1,427 missing ( 82.5%)
  esi_temperature      | 1,361 missing ( 78.7%)
  esi_surface          | 1,361 missing ( 78.7%)
  escape_velocity_ratio | 1,427 missing ( 82.5%)
  stellar_flux         |  265 missing ( 15.3%)

Feature matrix shape: (1729, 16)
Target vector shape: (1729,)


In [3]:
# Cell 3 - Missing Value Imputation Strategy
print("\n🔬 Missing Value Imputation")
print("="*40)

# Strategy: Different imputation for different feature types
def impute_features(X):
    """Smart imputation based on feature characteristics"""
    X_imputed = X.copy()
    
    # Physics-based imputation
    imputation_strategy = {
        # Use median for astronomical measurements (robust to outliers)
        'pl_bmasse': 'median',
        'pl_eqt': 'median', 
        'esi_mass': 'median',
        'esi_temperature': 'median',
        'esi_surface': 'median',
        'escape_velocity_ratio': 'median',
        'stellar_flux': 'median'
    }
    
    for feature, strategy in imputation_strategy.items():
        if feature in X_imputed.columns:
            if X_imputed[feature].isnull().sum() > 0:
                imputer = SimpleImputer(strategy=strategy)
                X_imputed[feature] = imputer.fit_transform(X_imputed[[feature]]).ravel()
                missing_count = X[feature].isnull().sum()
                print(f"✅ Imputed {missing_count:,} values in {feature} using {strategy}")
    
    return X_imputed

# Apply imputation
X_imputed = impute_features(X)

# Verify no missing values remain
remaining_missing = X_imputed.isnull().sum().sum()
print(f"\nRemaining missing values: {remaining_missing}")

if remaining_missing == 0:
    print("✅ All missing values successfully imputed")
else:
    print("⚠️ Some missing values remain - check imputation strategy")


🔬 Missing Value Imputation
✅ Imputed 1,427 values in pl_bmasse using median
✅ Imputed 1,361 values in pl_eqt using median
✅ Imputed 1,427 values in esi_mass using median
✅ Imputed 1,361 values in esi_temperature using median
✅ Imputed 1,361 values in esi_surface using median
✅ Imputed 1,427 values in escape_velocity_ratio using median
✅ Imputed 265 values in stellar_flux using median

Remaining missing values: 0
✅ All missing values successfully imputed


In [4]:
# Cell 4 - Train-Test Split and Scaling
print("\n🎯 Train-Test Split and Feature Scaling")
print("="*45)

# Stratified split to maintain class distribution
X_train, X_test, y_train, y_test = train_test_split(
    X_imputed, y, 
    test_size=0.2, 
    random_state=42, 
    stratify=y
)

print(f"Training set: {X_train.shape[0]:,} samples")
print(f"Test set:     {X_test.shape[0]:,} samples")

# Check class distribution in splits
train_dist = y_train.value_counts()
test_dist = y_test.value_counts()

print(f"\nTraining set distribution:")
print(f"  Not Habitable: {train_dist[0]:,} ({train_dist[0]/len(y_train)*100:.1f}%)")
print(f"  Habitable:     {train_dist[1]:,} ({train_dist[1]/len(y_train)*100:.1f}%)")

print(f"\nTest set distribution:")
print(f"  Not Habitable: {test_dist[0]:,} ({test_dist[0]/len(y_test)*100:.1f}%)")
print(f"  Habitable:     {test_dist[1]:,} ({test_dist[1]/len(y_test)*100:.1f}%)")

# Feature scaling (important for SVM, Neural Networks, Logistic Regression)
scaler = RobustScaler()  # Robust to outliers (better for astronomical data)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"\n✅ Features scaled using RobustScaler")
print(f"   - Training features: {X_train_scaled.shape}")
print(f"   - Test features: {X_test_scaled.shape}")


🎯 Train-Test Split and Feature Scaling
Training set: 1,383 samples
Test set:     346 samples

Training set distribution:
  Not Habitable: 1,055 (76.3%)
  Habitable:     328 (23.7%)

Test set distribution:
  Not Habitable: 264 (76.3%)
  Habitable:     82 (23.7%)

✅ Features scaled using RobustScaler
   - Training features: (1383, 16)
   - Test features: (346, 16)


In [5]:
# Cell 5 - Baseline Models Training
print("\n🎯 Baseline Model Training")
print("="*35)

# Define baseline models with appropriate parameters for imbalanced data
baseline_models = {
    'Logistic Regression': LogisticRegression(
        random_state=42, 
        class_weight='balanced',
        max_iter=1000
    ),
    'Random Forest': RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        class_weight='balanced',
        n_jobs=-1
    ),
    'Gradient Boosting': GradientBoostingClassifier(
        n_estimators=100,
        random_state=42,
        learning_rate=0.1
    ),
    'SVM': SVC(
        random_state=42,
        class_weight='balanced',
        probability=True,
        kernel='rbf'
    )
}

# Train and evaluate baseline models
baseline_results = {}
cv_scores = {}

# Cross-validation setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

print("Training baseline models with 5-fold cross-validation...")

for name, model in baseline_models.items():
    print(f"\n🔥 Training {name}...")
    
    # Determine if model needs scaled features
    if name in ['Logistic Regression', 'SVM']:
        X_train_model = X_train_scaled
        X_test_model = X_test_scaled
    else:
        X_train_model = X_train
        X_test_model = X_test
    
    # Cross-validation scores
    cv_f1_scores = cross_val_score(model, X_train_model, y_train, cv=cv, scoring='f1')
    cv_roc_scores = cross_val_score(model, X_train_model, y_train, cv=cv, scoring='roc_auc')
    cv_precision_scores = cross_val_score(model, X_train_model, y_train, cv=cv, scoring='precision')
    cv_recall_scores = cross_val_score(model, X_train_model, y_train, cv=cv, scoring='recall')
    
    # Store CV results
    cv_scores[name] = {
        'F1': cv_f1_scores,
        'ROC-AUC': cv_roc_scores,
        'Precision': cv_precision_scores,
        'Recall': cv_recall_scores
    }
    
    # Train on full training set
    model.fit(X_train_model, y_train)
    
    # Predictions
    y_pred = model.predict(X_test_model)
    y_pred_proba = model.predict_proba(X_test_model)[:, 1]
    
    # Calculate metrics
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    avg_precision = average_precision_score(y_test, y_pred_proba)
    
    baseline_results[name] = {
        'model': model,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba,
        'f1_score': f1,
        'roc_auc': roc_auc,
        'avg_precision': avg_precision,
        'cv_f1_mean': cv_f1_scores.mean(),
        'cv_f1_std': cv_f1_scores.std()
    }
    
    print(f"   F1 Score: {f1:.3f}")
    print(f"   ROC-AUC: {roc_auc:.3f}")
    print(f"   CV F1: {cv_f1_scores.mean():.3f} ± {cv_f1_scores.std():.3f}")


🎯 Baseline Model Training
Training baseline models with 5-fold cross-validation...

🔥 Training Logistic Regression...
   F1 Score: 0.777
   ROC-AUC: 0.950
   CV F1: 0.851 ± 0.030

🔥 Training Random Forest...
   F1 Score: 0.975
   ROC-AUC: 0.998
   CV F1: 0.963 ± 0.016

🔥 Training Gradient Boosting...
   F1 Score: 0.969
   ROC-AUC: 0.998
   CV F1: 0.968 ± 0.013

🔥 Training SVM...
   F1 Score: 0.422
   ROC-AUC: 0.770
   CV F1: 0.409 ± 0.012


In [6]:
# Cell 6 - Baseline Results Analysis
print("\n📊 Baseline Model Results Summary")
print("="*40)

# Create results DataFrame for easy comparison
results_df = pd.DataFrame({
    'Model': list(baseline_results.keys()),
    'F1_Score': [results['f1_score'] for results in baseline_results.values()],
    'ROC_AUC': [results['roc_auc'] for results in baseline_results.values()],
    'Avg_Precision': [results['avg_precision'] for results in baseline_results.values()],
    'CV_F1_Mean': [results['cv_f1_mean'] for results in baseline_results.values()],
    'CV_F1_Std': [results['cv_f1_std'] for results in baseline_results.values()]
})

# Sort by F1 score
results_df = results_df.sort_values('F1_Score', ascending=False)

print("Baseline Model Performance:")
print(results_df.round(3))

# Identify best performing model
best_model_name = results_df.iloc[0]['Model']
best_f1 = results_df.iloc[0]['F1_Score']

print(f"\n🏆 Best baseline model: {best_model_name}")
print(f"   F1 Score: {best_f1:.3f}")


📊 Baseline Model Results Summary
Baseline Model Performance:
                 Model  F1_Score  ROC_AUC  Avg_Precision  CV_F1_Mean  \
1        Random Forest     0.975    0.998          0.994       0.963   
2    Gradient Boosting     0.969    0.998          0.995       0.968   
0  Logistic Regression     0.777    0.950          0.795       0.851   
3                  SVM     0.422    0.770          0.471       0.409   

   CV_F1_Std  
1      0.016  
2      0.013  
0      0.030  
3      0.012  

🏆 Best baseline model: Random Forest
   F1 Score: 0.975


In [7]:
# Cell 7 - Advanced Sampling Techniques
print("\n⚖️ Advanced Sampling for Class Imbalance")
print("="*45)

# Test different sampling strategies
sampling_strategies = {
    'SMOTE': SMOTE(random_state=42),
    'ADASYN': ADASYN(random_state=42),
    'SMOTEENN': SMOTEENN(random_state=42),
    'Random_Undersample': RandomUnderSampler(random_state=42)
}

sampling_results = {}

print("Testing sampling strategies with Random Forest (best tree-based model)...")

for strategy_name, sampler in sampling_strategies.items():
    print(f"\n🔄 Testing {strategy_name}...")
    
    try:
        # Apply sampling
        X_resampled, y_resampled = sampler.fit_resample(X_train, y_train)
        
        print(f"   Original: {len(X_train):,} samples")
        print(f"   Resampled: {len(X_resampled):,} samples")
        
        # Check new class distribution
        resampled_dist = pd.Series(y_resampled).value_counts()
        print(f"   New ratio: {resampled_dist[0]/resampled_dist[1]:.1f}:1")
        
        # Train Random Forest on resampled data
        rf_sampled = RandomForestClassifier(
            n_estimators=100, 
            random_state=42, 
            n_jobs=-1
        )
        rf_sampled.fit(X_resampled, y_resampled)
        
        # Evaluate on original test set
        y_pred_sampled = rf_sampled.predict(X_test)
        y_pred_proba_sampled = rf_sampled.predict_proba(X_test)[:, 1]
        
        f1_sampled = f1_score(y_test, y_pred_sampled)
        roc_auc_sampled = roc_auc_score(y_test, y_pred_proba_sampled)
        
        sampling_results[strategy_name] = {
            'f1_score': f1_sampled,
            'roc_auc': roc_auc_sampled,
            'model': rf_sampled,
            'y_pred': y_pred_sampled,
            'y_pred_proba': y_pred_proba_sampled
        }
        
        print(f"   F1 Score: {f1_sampled:.3f}")
        print(f"   ROC-AUC: {roc_auc_sampled:.3f}")
        
    except Exception as e:
        print(f"   ❌ Failed: {str(e)}")

# Compare sampling strategies
if sampling_results:
    print(f"\n📈 Sampling Strategy Comparison:")
    for strategy, results in sampling_results.items():
        print(f"{strategy:15} | F1: {results['f1_score']:.3f} | ROC-AUC: {results['roc_auc']:.3f}")


⚖️ Advanced Sampling for Class Imbalance
Testing sampling strategies with Random Forest (best tree-based model)...

🔄 Testing SMOTE...
   Original: 1,383 samples
   Resampled: 2,110 samples
   New ratio: 1.0:1
   F1 Score: 0.970
   ROC-AUC: 0.999

🔄 Testing ADASYN...
   Original: 1,383 samples
   Resampled: 2,126 samples
   New ratio: 1.0:1
   F1 Score: 0.969
   ROC-AUC: 0.998

🔄 Testing SMOTEENN...
   Original: 1,383 samples
   Resampled: 1,078 samples
   New ratio: 0.9:1
   F1 Score: 0.963
   ROC-AUC: 0.998

🔄 Testing Random_Undersample...
   Original: 1,383 samples
   Resampled: 656 samples
   New ratio: 1.0:1
   F1 Score: 0.910
   ROC-AUC: 0.995

📈 Sampling Strategy Comparison:
SMOTE           | F1: 0.970 | ROC-AUC: 0.999
ADASYN          | F1: 0.969 | ROC-AUC: 0.998
SMOTEENN        | F1: 0.963 | ROC-AUC: 0.998
Random_Undersample | F1: 0.910 | ROC-AUC: 0.995


In [8]:
# Cell 8 - Hyperparameter Tuning for Best Models
print("\n🎛️ Hyperparameter Tuning")
print("="*30)

# Focus on top 2 performing baseline models for tuning
top_models = results_df.head(2)['Model'].tolist()

tuning_results = {}

for model_name in top_models:
    print(f"\n🔧 Tuning {model_name}...")
    
    if model_name == 'Random Forest':
        param_grid = {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 20, None],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'class_weight': ['balanced', 'balanced_subsample']
        }
        base_model = RandomForestClassifier(random_state=42, n_jobs=-1)
        X_model = X_train
        
    elif model_name == 'Gradient Boosting':
        param_grid = {
            'n_estimators': [100, 200],
            'learning_rate': [0.05, 0.1, 0.2],
            'max_depth': [3, 5, 7],
            'subsample': [0.8, 0.9, 1.0]
        }
        base_model = GradientBoostingClassifier(random_state=42)
        X_model = X_train
        
    elif model_name == 'Logistic Regression':
        param_grid = {
            'C': [0.1, 1.0, 10.0],
            'penalty': ['l1', 'l2'],
            'solver': ['liblinear', 'saga'],
            'class_weight': ['balanced', None]
        }
        base_model = LogisticRegression(random_state=42, max_iter=1000)
        X_model = X_train_scaled
        
    elif model_name == 'SVM':
        param_grid = {
            'C': [0.1, 1.0, 10.0],
            'kernel': ['rbf', 'poly'],
            'gamma': ['scale', 'auto'],
            'class_weight': ['balanced', None]
        }
        base_model = SVC(random_state=42, probability=True)
        X_model = X_train_scaled
    
    # Grid search with cross-validation
    grid_search = GridSearchCV(
        base_model,
        param_grid,
        cv=cv,
        scoring='f1',
        n_jobs=-1,
        verbose=0
    )
    
    grid_search.fit(X_model, y_train)
    
    # Get best model
    best_model = grid_search.best_estimator_
    
    # Evaluate on test set
    if model_name in ['Logistic Regression', 'SVM']:
        y_pred_tuned = best_model.predict(X_test_scaled)
        y_pred_proba_tuned = best_model.predict_proba(X_test_scaled)[:, 1]
    else:
        y_pred_tuned = best_model.predict(X_test)
        y_pred_proba_tuned = best_model.predict_proba(X_test)[:, 1]
    
    f1_tuned = f1_score(y_test, y_pred_tuned)
    roc_auc_tuned = roc_auc_score(y_test, y_pred_proba_tuned)
    
    tuning_results[model_name] = {
        'best_model': best_model,
        'best_params': grid_search.best_params_,
        'best_cv_score': grid_search.best_score_,
        'f1_score': f1_tuned,
        'roc_auc': roc_auc_tuned,
        'y_pred': y_pred_tuned,
        'y_pred_proba': y_pred_proba_tuned
    }
    
    print(f"   Best CV F1: {grid_search.best_score_:.3f}")
    print(f"   Test F1: {f1_tuned:.3f}")
    print(f"   Test ROC-AUC: {roc_auc_tuned:.3f}")
    print(f"   Best params: {grid_search.best_params_}")


🎛️ Hyperparameter Tuning

🔧 Tuning Random Forest...
   Best CV F1: 0.967
   Test F1: 0.964
   Test ROC-AUC: 0.998
   Best params: {'class_weight': 'balanced', 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}

🔧 Tuning Gradient Boosting...
   Best CV F1: 0.974
   Test F1: 0.969
   Test ROC-AUC: 0.992
   Best params: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 200, 'subsample': 1.0}


In [9]:
# Cell 9 - Ensemble Model Creation
print("\n🎭 Ensemble Model Creation")
print("="*30)

# Create ensemble of best performing models
ensemble_models = []
ensemble_names = []

# Add tuned models to ensemble
for name, results in tuning_results.items():
    ensemble_models.append((name.lower().replace(' ', '_'), results['best_model']))
    ensemble_names.append(name)

# Create voting classifier
if len(ensemble_models) >= 2:
    voting_classifier = VotingClassifier(
        estimators=ensemble_models,
        voting='soft'  # Use probability averages
    )
    
    print(f"Created ensemble with {len(ensemble_models)} models:")
    for name in ensemble_names:
        print(f"  - {name}")
    
    # Train ensemble
    print(f"\n🔥 Training ensemble model...")
    
    # Need to determine which features to use (scaled vs unscaled)
    # Use original features for tree-based, scaled for others
    voting_classifier.fit(X_train, y_train)
    
    # Evaluate ensemble
    y_pred_ensemble = voting_classifier.predict(X_test)
    y_pred_proba_ensemble = voting_classifier.predict_proba(X_test)[:, 1]
    
    f1_ensemble = f1_score(y_test, y_pred_ensemble)
    roc_auc_ensemble = roc_auc_score(y_test, y_pred_proba_ensemble)
    
    print(f"   Ensemble F1: {f1_ensemble:.3f}")
    print(f"   Ensemble ROC-AUC: {roc_auc_ensemble:.3f}")
    
    # Add ensemble to results
    tuning_results['Ensemble'] = {
        'best_model': voting_classifier,
        'f1_score': f1_ensemble,
        'roc_auc': roc_auc_ensemble,
        'y_pred': y_pred_ensemble,
        'y_pred_proba': y_pred_proba_ensemble
    }

else:
    print("⚠️ Not enough models for ensemble (need at least 2)")

print(f"\n🎯 Model Training Complete!")
print(f"Ready for evaluation and interpretability analysis...")


🎭 Ensemble Model Creation
Created ensemble with 2 models:
  - Random Forest
  - Gradient Boosting

🔥 Training ensemble model...
   Ensemble F1: 0.969
   Ensemble ROC-AUC: 0.998

🎯 Model Training Complete!
Ready for evaluation and interpretability analysis...


In [10]:
# Cell 10 - Model Evaluation Summary
print("\n📊 FINAL MODEL PERFORMANCE SUMMARY")
print("="*50)

# Compile all results for comparison
all_results = {}

# Add baseline results
for name, results in baseline_results.items():
    all_results[f"Baseline_{name}"] = {
        'F1': results['f1_score'],
        'ROC_AUC': results['roc_auc'],
        'Type': 'Baseline'
    }

# Add tuned results
for name, results in tuning_results.items():
    all_results[f"Tuned_{name}"] = {
        'F1': results['f1_score'],
        'ROC_AUC': results['roc_auc'],
        'Type': 'Tuned'
    }

# Add best sampling result if available
if sampling_results:
    best_sampling = max(sampling_results.items(), key=lambda x: x[1]['f1_score'])
    all_results[f"Sampled_{best_sampling[0]}"] = {
        'F1': best_sampling[1]['f1_score'],
        'ROC_AUC': best_sampling[1]['roc_auc'],
        'Type': 'Sampled'
    }

# Create comprehensive results DataFrame
final_results_df = pd.DataFrame.from_dict(all_results, orient='index')
final_results_df = final_results_df.sort_values('F1', ascending=False)

print("🏆 COMPREHENSIVE MODEL RANKING:")
print(final_results_df.round(3))

# Identify overall best model
best_overall = final_results_df.index[0]
best_f1_final = final_results_df.iloc[0]['F1']
best_roc_final = final_results_df.iloc[0]['ROC_AUC']

print(f"\n🥇 CHAMPION MODEL: {best_overall}")
print(f"   F1 Score: {best_f1_final:.3f}")
print(f"   ROC-AUC:  {best_roc_final:.3f}")

# Success criteria check
print(f"\n✅ SUCCESS CRITERIA CHECK:")
success_criteria = {
    'F1 Score > 0.65': best_f1_final > 0.65,
    'ROC-AUC > 0.85': best_roc_final > 0.85,
    'Multiple Models Trained': len(all_results) >= 5
}

for criterion, passed in success_criteria.items():
    status = "✅ PASS" if passed else "❌ NEEDS IMPROVEMENT"
    print(f"   {criterion}: {status}")

print(f"\n🚀 PHASE 5 COMPLETE!")
print("="*30)
print("Next steps:")
print("1. Proceed to 06_model_evaluation.ipynb for detailed analysis")
print("2. SHAP interpretability analysis")
print("3. Feature importance and physics validation")
print("4. Error analysis and model insights")


📊 FINAL MODEL PERFORMANCE SUMMARY
🏆 COMPREHENSIVE MODEL RANKING:
                                 F1  ROC_AUC      Type
Baseline_Random Forest        0.975    0.998  Baseline
Sampled_SMOTE                 0.970    0.999   Sampled
Tuned_Gradient Boosting       0.969    0.992     Tuned
Baseline_Gradient Boosting    0.969    0.998  Baseline
Tuned_Ensemble                0.969    0.998     Tuned
Tuned_Random Forest           0.964    0.998     Tuned
Baseline_Logistic Regression  0.777    0.950  Baseline
Baseline_SVM                  0.422    0.770  Baseline

🥇 CHAMPION MODEL: Baseline_Random Forest
   F1 Score: 0.975
   ROC-AUC:  0.998

✅ SUCCESS CRITERIA CHECK:
   F1 Score > 0.65: ✅ PASS
   ROC-AUC > 0.85: ✅ PASS
   Multiple Models Trained: ✅ PASS

🚀 PHASE 5 COMPLETE!
Next steps:
1. Proceed to 06_model_evaluation.ipynb for detailed analysis
2. SHAP interpretability analysis
3. Feature importance and physics validation
4. Error analysis and model insights
