# XGBOOST EXOPLANET CLASSIFICATION PIPELINE

In [9]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix, 
    f1_score, precision_score, recall_score
)
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime
import json
import os
from itertools import product

warnings.filterwarnings('ignore')

# CONFIGURATION GLOBALE

In [10]:
# Style des graphiques
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Chemins des fichiers
BASE_PATH = 'C:/Users/chaym/Desktop/NasaChallenge'
DATA_PATH = f'{BASE_PATH}/data/processed'
MODEL_PATH = f'{BASE_PATH}/models/xgboost_model'

# Noms des classes
CLASS_NAMES = {
    0: 'False Positive',
    1: 'Candidate',
    2: 'Confirmed'
}

print("=" * 80)
print("XGBOOST - HYPERPARAMETER TUNING + FINAL TRAINING")
print("=" * 80)

XGBOOST - HYPERPARAMETER TUNING + FINAL TRAINING


# VERIFICATION CUDA/GPU

In [11]:
def check_cuda_availability():
    """Vérifie la disponibilité de CUDA pour XGBoost"""
    print("\n" + "=" * 80)
    print("1. VÉRIFICATION CUDA")
    print("=" * 80)
    
    print(f"XGBoost version: {xgb.__version__}")
    build_info = xgb.build_info()
    
    if not build_info.get('USE_CUDA', False):
        print("⚠️  ATTENTION: XGBoost n'a pas été compilé avec CUDA")
        print("📌 Le modèle utilisera le CPU")
        device = 'cpu'
    else:
        print("✅ Support GPU: ACTIVÉ")
        device = 'cuda'
    
    return device

device = check_cuda_availability()


1. VÉRIFICATION CUDA
XGBoost version: 2.1.4
✅ Support GPU: ACTIVÉ


# Data Load

In [12]:
def load_datasets():
    """Charge les datasets train, validation et test"""
    print("\n" + "=" * 80)
    print("2. CHARGEMENT DES DONNÉES")
    print("=" * 80)
    
    # Chargement
    X_train = pd.read_csv(f'{DATA_PATH}/step6_X_train.csv')
    y_train = pd.read_csv(f'{DATA_PATH}/step6_y_train.csv').squeeze()
    
    X_validate = pd.read_csv(f'{DATA_PATH}/step6_X_val.csv')
    y_validate = pd.read_csv(f'{DATA_PATH}/step6_y_val.csv').squeeze()
    
    X_test = pd.read_csv(f'{DATA_PATH}/step6_X_test.csv')
    y_test = pd.read_csv(f'{DATA_PATH}/step6_y_test.csv').squeeze()
    
    # Affichage des dimensions
    print(f"\n📊 Dimensions des datasets:")
    print(f"   Train:      {X_train.shape[0]:>6} samples, {X_train.shape[1]:>3} features")
    print(f"   Validation: {X_validate.shape[0]:>6} samples, {X_validate.shape[1]:>3} features")
    print(f"   Test:       {X_test.shape[0]:>6} samples, {X_test.shape[1]:>3} features")
    
    # Distribution des classes
    print(f"\n📈 Distribution des classes (Train):")
    for cls in [0, 1, 2]:
        count = (y_train == cls).sum()
        pct = count / len(y_train) * 100
        print(f"   {CLASS_NAMES[cls]:20s}: {count:>5} ({pct:>5.2f}%)")
    
    return X_train, y_train, X_validate, y_validate, X_test, y_test

X_train, y_train, X_validate, y_validate, X_test, y_test = load_datasets()



2. CHARGEMENT DES DONNÉES

📊 Dimensions des datasets:
   Train:        6694 samples,  13 features
   Validation:    956 samples,  13 features
   Test:          957 samples,  13 features

📈 Distribution des classes (Train):
   False Positive      :  3387 (50.60%)
   Candidate           :  1385 (20.69%)
   Confirmed           :  1922 (28.71%)


# Features Verification


In [13]:
def verify_required_features(X):
    """Vérifie la présence des features critiques"""
    print("\n" + "=" * 80)
    print("3. VÉRIFICATION DES FEATURES CRITIQUES")
    print("=" * 80)
    
    required_features = [
        'koi_duration',
        'koi_duration_err1',
        'koi_depth',
        'koi_depth_err1',
        'koi_model_snr'
    ]
    
    print("\n🔍 Features requises:")
    all_present = True
    for feat in required_features:
        if feat in X.columns:
            print(f"   ✅ {feat}")
        else:
            print(f"   ❌ {feat} - MANQUANTE!")
            all_present = False
    
    if not all_present:
        raise ValueError("❌ Features critiques manquantes!")
    
    print("\n✅ Toutes les features requises sont présentes")

verify_required_features(X_train)


3. VÉRIFICATION DES FEATURES CRITIQUES

🔍 Features requises:
   ✅ koi_duration
   ✅ koi_duration_err1
   ✅ koi_depth
   ✅ koi_depth_err1
   ✅ koi_model_snr

✅ Toutes les features requises sont présentes


# FEATURE ENGINEERING

In [14]:
def engineer_transit_features(X):
    """
    Crée des features supplémentaires basées sur les transits planétaires
    
    Features créées:
    1. transit_depth_duration_ratio: Ratio profondeur/durée du transit
    2. snr_log: Log du SNR pour normalisation
    3. snr_squared: SNR au carré pour capturer les effets non-linéaires
    4. transit_detectability: Index de détectabilité combinant profondeur, SNR et durée
    """
    print("\n" + "=" * 80)
    print("4. FEATURE ENGINEERING")
    print("=" * 80)
    
    X_enhanced = X.copy()
    features_created = []
    
    # Feature 1: Ratio Profondeur/Durée
    if 'koi_depth' in X.columns and 'koi_duration' in X.columns:
        X_enhanced['transit_depth_duration_ratio'] = (
            X['koi_depth'] / (X['koi_duration'] + 1e-6)
        )
        features_created.append('transit_depth_duration_ratio')
        print("   ✅ transit_depth_duration_ratio")
    
    # Features 2-3: Transformations du SNR
    if 'koi_model_snr' in X.columns:
        X_enhanced['snr_log'] = np.log1p(X['koi_model_snr'])
        X_enhanced['snr_squared'] = X['koi_model_snr'] ** 2
        features_created.extend(['snr_log', 'snr_squared'])
        print("   ✅ snr_log")
        print("   ✅ snr_squared")
    
    # Feature 4: Transit Detectability Index
    if all(f in X.columns for f in ['koi_duration', 'koi_depth', 'koi_model_snr']):
        X_enhanced['transit_detectability'] = (
            X['koi_depth'] * X['koi_model_snr'] / (X['koi_duration'] + 1)
        )
        features_created.append('transit_detectability')
        print("   ✅ transit_detectability")
    
    print(f"\n📊 Features créées: {len(features_created)}")
    print(f"📊 Features totales: {X_enhanced.shape[1]} (original: {X.shape[1]})")
    
    return X_enhanced

# Application du feature engineering
X_train_enh = engineer_transit_features(X_train)
X_val_enh = engineer_transit_features(X_validate)
X_test_enh = engineer_transit_features(X_test)


4. FEATURE ENGINEERING
   ✅ transit_depth_duration_ratio
   ✅ snr_log
   ✅ snr_squared
   ✅ transit_detectability

📊 Features créées: 4
📊 Features totales: 17 (original: 13)

4. FEATURE ENGINEERING
   ✅ transit_depth_duration_ratio
   ✅ snr_log
   ✅ snr_squared
   ✅ transit_detectability

📊 Features créées: 4
📊 Features totales: 17 (original: 13)

4. FEATURE ENGINEERING
   ✅ transit_depth_duration_ratio
   ✅ snr_log
   ✅ snr_squared
   ✅ transit_detectability

📊 Features créées: 4
📊 Features totales: 17 (original: 13)


# CALCULATION OF CLASS WEIGHTS

In [15]:
def compute_balanced_class_weights(y):
    """
    Calcule les poids de classes pour gérer le déséquilibre
    Boost spécial pour la classe 1 (Candidate)
    """
    print("\n" + "=" * 80)
    print("5. CALCUL DES POIDS DE CLASSES")
    print("=" * 80)
    
    all_classes = np.array([0, 1, 2])
    auto_weights = compute_class_weight(
        class_weight='balanced',
        classes=all_classes,
        y=y
    )
    
    # Ajustement manuel des poids
    class_weights = {
        0: auto_weights[0] * 0.8,   # False Positive: légère réduction
        1: auto_weights[1] * 1.5,   # Candidate: BOOST +50%
        2: auto_weights[2] * 1.0    # Confirmed: poids standard
    }
    
    print("\n⚖️  Poids calculés:")
    for cls, weight in class_weights.items():
        boost = " [BOOST +50%]" if cls == 1 else ""
        print(f"   {CLASS_NAMES[cls]:20s}: {weight:.4f}x{boost}")
    
    return class_weights

class_weights = compute_balanced_class_weights(y_train)
sample_weights_train = np.array([class_weights[cls] for cls in y_train])


5. CALCUL DES POIDS DE CLASSES

⚖️  Poids calculés:
   False Positive      : 0.5270x
   Candidate           : 2.4166x [BOOST +50%]
   Confirmed           : 1.1609x


# HYPERPARAMETER TUNING (Train/Val)

In [16]:
def perform_hyperparameter_tuning(X_train, y_train, X_val, y_val, 
                                   sample_weights, device):
    """
    Effectue un Grid Search sur les hyperparamètres
    Utilise Train pour l'entraînement et Val pour la validation
    """
    print("\n" + "=" * 80)
    print("PHASE 1: HYPERPARAMETER TUNING")
    print("=" * 80)
    
    # Grille d'hyperparamètres
    param_grid = {
        'learning_rate': [0.01, 0.03, 0.05],
        'max_depth': [8, 10, 12],
        'min_child_weight': [20, 30, 40],
        'subsample': [0.7, 0.8, 0.9],
        'colsample_bytree': [0.7, 0.8, 0.9]
    }
    
    total_combinations = np.prod([len(v) for v in param_grid.values()])
    print(f"\n🔬 Nombre total de combinaisons: {total_combinations}")
    print("⏳ Test en cours...\n")
    
    # Préparation des datasets
    dtrain = xgb.DMatrix(X_train, label=y_train, weight=sample_weights)
    dval = xgb.DMatrix(X_val, label=y_val)
    
    best_score = float('inf')
    best_params = None
    results = []
    
    # Grid Search
    for lr, md, mcw, sub, col in product(
        param_grid['learning_rate'],
        param_grid['max_depth'],
        param_grid['min_child_weight'],
        param_grid['subsample'],
        param_grid['colsample_bytree']
    ):
        params = {
            'objective': 'multi:softprob',
            'num_class': 3,
            'eval_metric': 'mlogloss',
            'tree_method': 'hist' if device == 'cpu' else 'gpu_hist',
            'device': device,
            'learning_rate': lr,
            'max_depth': md,
            'min_child_weight': mcw,
            'subsample': sub,
            'colsample_bytree': col,
            'alpha': 0.1,
            'lambda': 0.1,
            'gamma': 0.001,
            'random_state': 42,
            'verbosity': 0
        }
        
        # Entraînement
        evals = [(dtrain, 'train'), (dval, 'valid')]
        model_temp = xgb.train(
            params,
            dtrain,
            num_boost_round=1000,
            evals=evals,
            early_stopping_rounds=50,
            verbose_eval=False
        )
        
        val_score = model_temp.best_score
        
        results.append({
            'learning_rate': lr,
            'max_depth': md,
            'min_child_weight': mcw,
            'subsample': sub,
            'colsample_bytree': col,
            'val_score': val_score,
            'best_iteration': model_temp.best_iteration
        })
        
        if val_score < best_score:
            best_score = val_score
            best_params = params.copy()
        
        print(f"lr={lr:.2f}, md={md}, mcw={mcw}, sub={sub:.1f}, "
              f"col={col:.1f} → val_loss={val_score:.6f}")
    
    # Affichage des meilleurs résultats
    print("\n" + "=" * 80)
    print("🏆 MEILLEURS HYPERPARAMÈTRES TROUVÉS")
    print("=" * 80)
    print(f"\n🎯 Meilleur score validation: {best_score:.6f}")
    print("\n📋 Hyperparamètres optimaux:")
    print(f"   learning_rate:     {best_params['learning_rate']}")
    print(f"   max_depth:         {best_params['max_depth']}")
    print(f"   min_child_weight:  {best_params['min_child_weight']}")
    print(f"   subsample:         {best_params['subsample']}")
    print(f"   colsample_bytree:  {best_params['colsample_bytree']}")
    
    # Sauvegarde des résultats
    results_df = pd.DataFrame(results).sort_values('val_score')
    os.makedirs(MODEL_PATH, exist_ok=True)
    results_df.to_csv(f'{MODEL_PATH}/hyperparameter_tuning_results.csv', 
                      index=False)
    print(f"\n💾 Résultats sauvegardés: hyperparameter_tuning_results.csv")
    
    return best_params, best_score

best_params, best_score = perform_hyperparameter_tuning(
    X_train_enh, y_train, X_val_enh, y_validate,
    sample_weights_train, device
)



PHASE 1: HYPERPARAMETER TUNING

🔬 Nombre total de combinaisons: 243
⏳ Test en cours...

lr=0.01, md=8, mcw=20, sub=0.7, col=0.7 → val_loss=0.523210
lr=0.01, md=8, mcw=20, sub=0.7, col=0.8 → val_loss=0.521443
lr=0.01, md=8, mcw=20, sub=0.7, col=0.9 → val_loss=0.520415
lr=0.01, md=8, mcw=20, sub=0.8, col=0.7 → val_loss=0.519092
lr=0.01, md=8, mcw=20, sub=0.8, col=0.8 → val_loss=0.518899
lr=0.01, md=8, mcw=20, sub=0.8, col=0.9 → val_loss=0.519555
lr=0.01, md=8, mcw=20, sub=0.9, col=0.7 → val_loss=0.518344
lr=0.01, md=8, mcw=20, sub=0.9, col=0.8 → val_loss=0.516290
lr=0.01, md=8, mcw=20, sub=0.9, col=0.9 → val_loss=0.517444
lr=0.01, md=8, mcw=30, sub=0.7, col=0.7 → val_loss=0.534345
lr=0.01, md=8, mcw=30, sub=0.7, col=0.8 → val_loss=0.533988
lr=0.01, md=8, mcw=30, sub=0.7, col=0.9 → val_loss=0.533704
lr=0.01, md=8, mcw=30, sub=0.8, col=0.7 → val_loss=0.530052
lr=0.01, md=8, mcw=30, sub=0.8, col=0.8 → val_loss=0.530225
lr=0.01, md=8, mcw=30, sub=0.8, col=0.9 → val_loss=0.530042
lr=0.01, md

# FINAL TRAINING (Train + Val)

In [17]:
def train_final_model(X_train, y_train, X_val, y_val, X_test, y_test,
                      best_params, class_weights, device):
    """
    Entraîne le modèle final en combinant Train et Val
    Évalue sur le Test Set
    """
    print("\n" + "=" * 80)
    print("PHASE 2: ENTRAÎNEMENT FINAL")
    print("=" * 80)
    
    # Combiner Train et Val
    X_train_val = pd.concat([X_train, X_val], axis=0)
    y_train_val = pd.concat([y_train, y_val], axis=0)
    
    # Recalculer les poids
    sample_weights_combined = np.array([class_weights[cls] for cls in y_train_val])
    
    print(f"\n📊 Dataset combiné (Train+Val): {X_train_val.shape[0]} samples")
    
    # Préparation des datasets
    dtrain_final = xgb.DMatrix(X_train_val, label=y_train_val, 
                                weight=sample_weights_combined)
    dtest_final = xgb.DMatrix(X_test, label=y_test)
    
    # Entraînement final
    print("\n⏳ Entraînement final en cours...\n")
    
    evals_final = [(dtrain_final, 'train'), (dtest_final, 'test')]
    
    model_final = xgb.train(
        best_params,
        dtrain_final,
        num_boost_round=3000,
        evals=evals_final,
        early_stopping_rounds=100,
        verbose_eval=100
    )
    
    print(f"\n✅ Entraînement terminé!")
    print(f"   Best Iteration: {model_final.best_iteration}")
    print(f"   Best Test Loss: {model_final.best_score:.6f}")
    
    return model_final, X_train_val, y_train_val

model_final, X_train_val, y_train_val = train_final_model(
    X_train_enh, y_train, X_val_enh, y_validate, X_test_enh, y_test,
    best_params, class_weights, device
)


PHASE 2: ENTRAÎNEMENT FINAL

📊 Dataset combiné (Train+Val): 7650 samples

⏳ Entraînement final en cours...

[0]	train-mlogloss:1.07581	test-mlogloss:1.07869
[100]	train-mlogloss:0.45049	test-mlogloss:0.61102
[200]	train-mlogloss:0.35567	test-mlogloss:0.57265
[300]	train-mlogloss:0.30371	test-mlogloss:0.55954
[400]	train-mlogloss:0.26578	test-mlogloss:0.55399
[500]	train-mlogloss:0.23610	test-mlogloss:0.55258
[589]	train-mlogloss:0.21410	test-mlogloss:0.55353

✅ Entraînement terminé!
   Best Iteration: 490
   Best Test Loss: 0.551759


# FINAL EVALUATION ON TEST SET

In [18]:
def evaluate_final_model(model, X_test, y_test):
    """Évalue le modèle final sur le Test Set"""
    print("\n" + "=" * 80)
    print("ÉVALUATION FINALE SUR TEST SET")
    print("=" * 80)
    
    # Préparation
    dtest = xgb.DMatrix(X_test, label=y_test)
    
    # Prédictions
    y_pred_proba = model.predict(dtest, 
                                  iteration_range=(0, model.best_iteration))
    y_pred = y_pred_proba.argmax(axis=1)
    
    # Métriques globales
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', 
                                zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', 
                         zero_division=0)
    f1_macro = f1_score(y_test, y_pred, average='macro', zero_division=0)
    f1_weighted = f1_score(y_test, y_pred, average='weighted', 
                          zero_division=0)
    
    print("\n📊 Performance Metrics:")
    print(f"   Accuracy:           {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"   Precision (Avg):    {precision:.4f}")
    print(f"   Recall (Avg):       {recall:.4f}")
    print(f"   F1-Score (Macro):   {f1_macro:.4f}")
    print(f"   F1-Score (Weighted):{f1_weighted:.4f}")
    
    print("\n📋 Classification Report:")
    print(classification_report(
        y_test, y_pred,
        target_names=['False Positive', 'Candidate', 'Confirmed'],
        digits=4,
        zero_division=0
    ))
    
    # Performance classe 1 (Candidate)
    class1_mask = y_test == 1
    if class1_mask.sum() > 0:
        class1_acc = accuracy_score(y_test[class1_mask], y_pred[class1_mask])
        print(f"\n🎯 PERFORMANCE CLASSE 1 (Candidate): "
              f"{class1_acc:.4f} ({class1_acc*100:.2f}%)")
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted,
        'y_pred': y_pred
    }

metrics = evaluate_final_model(model_final, X_test_enh, y_test)


ÉVALUATION FINALE SUR TEST SET

📊 Performance Metrics:
   Accuracy:           0.7513 (75.13%)
   Precision (Avg):    0.7879
   Recall (Avg):       0.7513
   F1-Score (Macro):   0.7358
   F1-Score (Weighted):0.7610

📋 Classification Report:
                precision    recall  f1-score   support

False Positive     0.8878    0.7190    0.7945       484
     Candidate     0.4930    0.7121    0.5826       198
     Confirmed     0.8244    0.8364    0.8303       275

      accuracy                         0.7513       957
     macro avg     0.7350    0.7558    0.7358       957
  weighted avg     0.7879    0.7513    0.7610       957


🎯 PERFORMANCE CLASSE 1 (Candidate): 0.7121 (71.21%)


# VISUALISATIONS

In [19]:
def create_visualizations(model, y_test, y_pred):
    """Crée les visualisations du modèle"""
    print("\n" + "=" * 80)
    print("9. VISUALISATIONS")
    print("=" * 80)
    
    # 1. Matrice de confusion
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['False Positive', 'Candidate', 'Confirmed'],
                yticklabels=['False Positive', 'Candidate', 'Confirmed'])
    plt.title('Confusion Matrix - Test Set', fontsize=16, fontweight='bold')
    plt.ylabel('True Class')
    plt.xlabel('Predicted Class')
    plt.tight_layout()
    plt.savefig(f'{MODEL_PATH}/confusion_matrix_final.png', dpi=300)
    print("   ✅ Matrice de confusion: confusion_matrix_final.png")
    plt.close()
    
    # 2. Feature importance
    importance_dict = model.get_score(importance_type='gain')
    feature_importance = pd.DataFrame({
        'feature': list(importance_dict.keys()),
        'importance': list(importance_dict.values())
    }).sort_values('importance', ascending=False)
    
    print("\n🏆 TOP 15 FEATURES:")
    print(feature_importance.head(15).to_string(index=False))
    
    plt.figure(figsize=(12, 8))
    top_20 = feature_importance.head(20)
    plt.barh(top_20['feature'], top_20['importance'], color='steelblue')
    plt.xlabel('Importance (Gain)')
    plt.title('Top 20 Features', fontsize=16, fontweight='bold')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.savefig(f'{MODEL_PATH}/feature_importance_final.png', dpi=300)
    print("   ✅ Feature importance: feature_importance_final.png")
    plt.close()

create_visualizations(model_final, y_test, metrics['y_pred'])


9. VISUALISATIONS
   ✅ Matrice de confusion: confusion_matrix_final.png

🏆 TOP 15 FEATURES:
              feature  importance
        koi_model_snr    7.934336
              snr_log    7.769337
             koi_prad    6.345869
    koi_duration_err1    4.268429
          snr_squared    4.063711
        koi_prad_err1    3.747423
           koi_period    3.699139
      koi_period_err1    2.630286
        koi_prad_err2    2.568449
         koi_duration    2.511777
        koi_srad_err1    2.163721
       koi_depth_err1    2.036059
            koi_depth    1.847937
transit_detectability    1.789605
             koi_srad    1.777681
   ✅ Feature importance: feature_importance_final.png


# SAVE

In [20]:
def save_model_and_metadata(model, best_params, metrics, class_weights, 
                            X_train_val, X_test):
    """Sauvegarde le modèle et ses métadonnées"""
    print("\n" + "=" * 80)
    print("10. SAUVEGARDE")
    print("=" * 80)
    
    # Sauvegarde du modèle
    model.save_model(f'{MODEL_PATH}/exoplanet_xgboost_final.json')
    print("   ✅ Modèle: exoplanet_xgboost_final.json")
    
    # Métadonnées
    metadata = {
        'timestamp': datetime.now().isoformat(),
        'xgboost_version': xgb.__version__,
        'device': device,
        'training_strategy': 'Train+Val combined for final model',
        'best_hyperparameters': {
            'learning_rate': best_params['learning_rate'],
            'max_depth': best_params['max_depth'],
            'min_child_weight': best_params['min_child_weight'],
            'subsample': best_params['subsample'],
            'colsample_bytree': best_params['colsample_bytree']
        },
        'best_iteration': int(model.best_iteration),
        'best_score': float(model.best_score),
        'test_metrics': {
            'accuracy': float(metrics['accuracy']),
            'precision': float(metrics['precision']),
            'recall': float(metrics['recall']),
            'f1_macro': float(metrics['f1_macro']),
            'f1_weighted': float(metrics['f1_weighted'])
        },
        'class_weights': {str(k): float(v) for k, v in class_weights.items()},
        'features_engineered': [
            'transit_depth_duration_ratio',
            'snr_log',
            'snr_squared',
            'transit_detectability'
        ],
        'training_samples': int(X_train_val.shape[0]),
        'test_samples': int(X_test.shape[0])
    }
    
    with open(f'{MODEL_PATH}/model_metadata_final.json', 'w') as f:
        json.dump(metadata, f, indent=4)
    print("   ✅ Métadonnées: model_metadata_final.json")

save_model_and_metadata(model_final, best_params, metrics, class_weights,
                        X_train_val, X_test_enh)


10. SAUVEGARDE
   ✅ Modèle: exoplanet_xgboost_final.json
   ✅ Métadonnées: model_metadata_final.json


In [22]:
import pickle
# Save model
model_path = f'./xgboost_model_v1.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(model, f)
print(f"✅ Model saved: {model_path}")

✅ Model saved: ./xgboost_model_v1.pkl
