In [1]:
import pandas as pd
import numpy as np
import glob
import pytz
import os
from scipy import stats
from sklearn.preprocessing import StandardScaler, RobustScaler, LabelEncoder
from sklearn.model_selection import LeaveOneGroupOut, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, balanced_accuracy_score
from sklearn.utils.class_weight import compute_class_weight
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, RandomOverSampler
from imblearn.under_sampling import TomekLinks
from imblearn.combine import SMOTETomek
import xgboost as xgb
import optuna
import shap
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from collections import Counter
from catboost import CatBoostClassifier

warnings.filterwarnings('ignore', category=UserWarning, module='xgboost')
warnings.filterwarnings('ignore', category=FutureWarning)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load in Data
EVENTS_FOLDER = '../../data/bishkek_csr/03_train_ready/event_exports' 
RESPECK_FOLDER = '../../data/bishkek_csr/03_train_ready/respeck'
NASAL_FOLDER = '../../data/bishkek_csr/03_train_ready/nasal_files'

# --- STRATEGIC FIX: Re-group the rarest classes ---
# We combine Central/Mixed Apnea and RERA into a single "Other Events" class
# This ensures every class has enough samples for robust modeling.
EVENT_GROUP_TO_LABEL = {
    1: ['Obstructive Apnea'],
    2: ['Hypopnea', 'Central Hypopnea', 'Obstructive Hypopnea'],
    3: ['Central Apnea', 'Mixed Apnea'], # Combined rare events
    4: ['Desaturation'] # Note: Relabeled from 5 to 4
}

LABEL_TO_EVENT_GROUP_NAME = {
    0: 'Normal',
    1: 'Obstructive Apnea',
    2: 'Hypopnea Events',
    3: 'Other Apnea', # New combined name
    4: 'Desaturation'
}

# --- Feature Generation Functions (Unchanged) ---
def generate_RRV(sliced_signal):
    sliced_signal = sliced_signal.dropna()
    if sliced_signal.empty: return np.nan
    breathingSignal = sliced_signal.values
    N = len(breathingSignal)
    if N < 2: return 0.0
    yf = np.fft.fft(breathingSignal)
    yff = 2.0 / N * np.abs(yf[:N//2])
    if len(yff) < 2: return 0.0
    dc_component_value = np.amax(yff)
    if dc_component_value == 0: return 0.0
    temp_dc_removed = np.delete(yff, np.argmax(yff))
    h1_value = np.amax(temp_dc_removed)
    rrv = 100 - (h1_value / dc_component_value) * 100
    return rrv

def extract_respeck_features(df):
    resampled_df = pd.DataFrame()
    br_resampler = df["breathingRate"].resample('30s')
    resampled_df["BR_median"], resampled_df["BR_mean"], resampled_df["BR_std"] = br_resampler.median(), br_resampler.mean(), br_resampler.std()
    resampled_df["BR_cov"] = resampled_df["BR_std"] / resampled_df["BR_mean"]
    al_resampler = df["activityLevel"].resample('30s')
    resampled_df["AL_median"], resampled_df["AL_mean"], resampled_df["AL_std"] = al_resampler.median(), al_resampler.mean(), al_resampler.std()
    resampled_df["AL_cov"] = resampled_df["AL_std"] / resampled_df["AL_mean"]
    resampled_df["RRV"] = df["breathingSignal"].resample('30s').apply(generate_RRV)
    resampled_df["RRV"] = resampled_df["RRV"].replace(0, np.nan).ffill().bfill()
    resampled_df["RRV3ANN"] = resampled_df["RRV"].rolling(window=3, center=True).mean()
    resampled_df["RRV3ANN"] = resampled_df["RRV3ANN"] * 0.65
    resampled_df['Label'] = df['Label'].resample('30s').apply(lambda x: stats.mode(x)[0] if not x.empty else 0)
    return resampled_df

# --- Data Preparation Loop ---
all_sessions_df_list = []
event_files = glob.glob(os.path.join(EVENTS_FOLDER, '*_event_export.csv'))
for event_file_path in event_files:
    base_name = os.path.basename(event_file_path)
    session_id = base_name.split('_event_export.csv')[0]
    respeck_file_path = os.path.join(RESPECK_FOLDER, f'{session_id}_respeck.csv')
    if not os.path.exists(respeck_file_path): continue
    
    df_events = pd.read_csv(event_file_path, decimal=',')
    df_respeck = pd.read_csv(respeck_file_path)
    
    df_events.rename(columns={'UnixTimestamp': 'timestamp_unix'}, inplace=True)
    df_respeck.rename(columns={'alignedTimestamp': 'timestamp_unix'}, inplace=True)
    for df_ in [df_events, df_respeck]:
        df_['timestamp_unix'] = pd.to_numeric(df_['timestamp_unix'], errors='coerce')
        df_.dropna(subset=['timestamp_unix'], inplace=True)
        df_['timestamp_unix'] = df_['timestamp_unix'].astype('int64')

    df_respeck['timestamp'] = pd.to_datetime(df_respeck['timestamp_unix'], unit='ms').dt.tz_localize('UTC').dt.tz_convert('Asia/Bishkek')
    df_respeck['Label'] = 0
    df_events['Duration_ms'] = (df_events['Duration'] * 1000).astype('int64')
    df_events['end_time_unix'] = df_events['timestamp_unix'] + df_events['Duration_ms']
    
    for label_id, event_names_in_group in EVENT_GROUP_TO_LABEL.items():
        df_filtered_events = df_events[df_events['Event'].isin(event_names_in_group)]
        for _, event in df_filtered_events.iterrows():
            df_respeck.loc[df_respeck['timestamp_unix'].between(event['timestamp_unix'], event['end_time_unix']), 'Label'] = label_id
    
    df_respeck.set_index('timestamp', inplace=True)
    features_df = extract_respeck_features(df_respeck).dropna()
    features_df['SessionID'] = session_id
    all_sessions_df_list.append(features_df)

final_df = pd.concat(all_sessions_df_list).reset_index().rename(columns={'index': 'timestamp'})
print(f"Data loading and feature generation complete. Final shape: {final_df.shape}")
print(f"Final class distribution:\n{final_df['Label'].value_counts(normalize=True)}")

# --- Prepare data for the model ---
FEATURE_COLUMNS = ['BR_median', 'BR_mean', 'BR_std', 'BR_cov', 'AL_median', 'AL_mean', 'AL_std', 'AL_cov', 'RRV', 'RRV3ANN']
X = final_df[FEATURE_COLUMNS].values
y = final_df['Label'].values
groups = final_df['SessionID'].values
X = SimpleImputer(strategy='mean').fit_transform(X)

# --- Use LabelEncoder for robust label handling ---
le = LabelEncoder()
y_encoded = le.fit_transform(y)
N_OUTPUTS = len(le.classes_)
CLASS_NAMES = [LABEL_TO_EVENT_GROUP_NAME[c] for c in le.classes_]

print(f"\nTotal classes for model: {N_OUTPUTS}. Names: {CLASS_NAMES}")

Data loading and feature generation complete. Final shape: (7796, 13)
Final class distribution:
Label
0    0.908415
4    0.061570
2    0.026552
1    0.003335
3    0.000128
Name: proportion, dtype: float64

Total classes for model: 5. Names: ['Normal', 'Obstructive Apnea', 'Hypopnea Events', 'Other Apnea', 'Desaturation']


In [3]:
from catboost import CatBoostClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import StratifiedKFold
from collections import Counter
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Make sure you have these imports if they are not global
from sklearn.preprocessing import RobustScaler, LabelEncoder
from imblearn.over_sampling import SMOTE, RandomOverSampler

class ImprovedCatBoostClassifier:
    def __init__(self, feature_columns, n_outputs, random_state=42):
        self.feature_columns = feature_columns
        self.n_outputs = n_outputs
        self.random_state = random_state
        self.scaler = RobustScaler()
        self.best_params = None
        self.model = None

    def create_advanced_features(self, X_df):
        X_advanced = X_df.copy()
        X_advanced['BR_AL_interaction'] = X_advanced['BR_mean'] * X_advanced['AL_mean']
        X_advanced['BR_RRV_interaction'] = X_advanced['BR_mean'] * X_advanced['RRV']
        X_advanced['BR_median_mean_ratio'] = X_advanced['BR_median'] / (X_advanced['BR_mean'] + 1e-8)
        X_advanced['BR_mean_squared'] = X_advanced['BR_mean'] ** 2
        X_advanced['AL_mean_squared'] = X_advanced['AL_mean'] ** 2
        X_advanced['RRV_squared'] = X_advanced['RRV'] ** 2
        X_advanced['BR_stability'] = 1 / (1 + X_advanced['BR_cov'])
        X_advanced['activity_breathing_score'] = (X_advanced['AL_mean'] * 0.6 + X_advanced['BR_mean'] * 0.4)
        X_advanced = X_advanced.replace([np.inf, -np.inf], np.nan).fillna(X_advanced.median())
        return X_advanced

    def robust_resampling(self, X_train, y_train):
        print(f"  - Original distribution: {Counter(y_train)}")
        class_counts = Counter(y_train)
        min_samples = min(class_counts.values()) if class_counts else 0

        if min_samples >= 6:
            k = min(5, min_samples - 1)
            sampler = SMOTE(random_state=self.random_state, k_neighbors=k)
            print(f"  - Using SMOTE with k_neighbors={k}")
        else:
            sampler = RandomOverSampler(random_state=self.random_state)
            print("  - Using RandomOverSampler due to small class size.")
        
        X_res, y_res = sampler.fit_resample(X_train, y_train)
        print(f"  - After resampling: {Counter(y_res)}")
        return X_res, y_res

    def optimize_hyperparameters(self, X_train_scaled, y_train_encoded, n_trials=50):
        print("  - Optimizing CatBoost hyperparameters with Optuna...")
        X_train_resampled, y_train_resampled = self.robust_resampling(X_train_scaled, y_train_encoded)
        
        optuna_le = LabelEncoder()
        y_train_optuna = optuna_le.fit_transform(y_train_resampled)
        
        def objective(trial):
            params = {
                'objective': 'MultiClass',
                'eval_metric': 'TotalF1',  # FIX: Use a multi-class compatible metric
                'random_seed': self.random_state,
                'verbose': 0,
                'iterations': trial.suggest_int('iterations', 200, 1500, step=100),
                'depth': trial.suggest_int('depth', 4, 10),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2, log=True),
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10.0, log=True),
                'border_count': trial.suggest_int('border_count', 32, 255),
                'subsample': trial.suggest_float('subsample', 0.6, 1.0),
                'bootstrap_type': 'Bernoulli'
            }
            skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=self.random_state)
            scores = []
            for tr_idx, val_idx in skf.split(X_train_resampled, y_train_optuna):
                X_tr, X_val = X_train_resampled[tr_idx], X_train_resampled[val_idx]
                y_tr, y_val = y_train_optuna[tr_idx], y_train_optuna[val_idx]

                model = CatBoostClassifier(**params)
                model.fit(X_tr, y_tr)
                y_pred = model.predict(X_val)
                scores.append(balanced_accuracy_score(y_val, y_pred))
            return np.mean(scores)
        
        study = optuna.create_study(direction='maximize', pruner=optuna.pruners.MedianPruner())
        study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
        
        self.best_params = study.best_params
        print(f"  - Best balanced accuracy from optimization: {study.best_value:.4f}")
        return self.best_params

    def cross_validate(self, X, y_encoded, le, groups, use_optimization=True):
        X_df = pd.DataFrame(X, columns=self.feature_columns)
        X_engineered = self.create_advanced_features(X_df)
        print(f"  - Feature engineering applied: {X.shape[1]} -> {X_engineered.shape[1]} features")

        all_preds_encoded, all_true_encoded, all_importances = [], [], []
        logo = LeaveOneGroupOut()
        n_folds = logo.get_n_splits(X_engineered, y_encoded, groups=groups)
        
        print(f"Starting LONO CV with {n_folds} folds...")
        print("=" * 60)
        
        for fold, (train_idx, test_idx) in enumerate(logo.split(X_engineered, y_encoded, groups)):
            test_night = np.unique(groups[test_idx])[0]
            print(f"\n--- FOLD {fold + 1}/{n_folds} (Testing: {test_night}) ---")

            X_train, X_test = X_engineered.iloc[train_idx].values, X_engineered.iloc[test_idx].values
            y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]

            scaler = RobustScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            if use_optimization and self.best_params is None:
                self.best_params = self.optimize_hyperparameters(X_train_scaled, y_train, n_trials=50)

            params = self.best_params or {}
            params.update({
                'objective': 'MultiClass', 'random_seed': self.random_state, 'verbose': 0
            })
            
            X_train_aug, y_train_aug = X_train_scaled, y_train
            missing_classes = set(range(self.n_outputs)) - set(np.unique(y_train_aug))
            if missing_classes:
                print(f"  - Injecting dummy samples for missing classes: {missing_classes}")
                dummy_sample_X = X_train_aug[0:1]
                for mc in missing_classes:
                    X_train_aug = np.vstack([X_train_aug, dummy_sample_X])
                    y_train_aug = np.hstack([y_train_aug, [mc]])

            X_train_resampled, y_train_resampled = self.robust_resampling(X_train_aug, y_train_aug)
            
            model = CatBoostClassifier(**params)
            model.fit(X_train_resampled, y_train_resampled)

            y_pred_encoded = model.predict(X_test_scaled).flatten()
            all_preds_encoded.extend(y_pred_encoded)
            all_true_encoded.extend(y_test)
            
            print(f"  - Fold balanced accuracy: {balanced_accuracy_score(y_test, y_pred_encoded):.4f}")
            
            importance_df = pd.DataFrame({
                'feature': X_engineered.columns,
                'importance': model.get_feature_importance()
            }).sort_values('importance', ascending=False)
            all_importances.append(importance_df)

        self.model = model
        self._display_results(le, all_true_encoded, all_preds_encoded, all_importances)

    def _display_results(self, le, true_encoded, pred_encoded, importances):
        print("\n" + "=" * 60 + "\nAGGREGATED CATBOOST RESULTS\n" + "=" * 60)
        
        # This assumes your global LABEL_TO_EVENT_GROUP_NAME is available
        class_names_ordered = [LABEL_TO_EVENT_GROUP_NAME[c] for c in le.classes_]
        
        print(f"Overall Balanced Accuracy: {balanced_accuracy_score(true_encoded, pred_encoded):.4f}\n")
        print(classification_report(true_encoded, pred_encoded, target_names=class_names_ordered, labels=le.classes_, zero_division=0))
        
        cm = confusion_matrix(true_encoded, pred_encoded, labels=le.classes_)
        cm_norm = np.nan_to_num(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])
        
        plt.figure(figsize=(12, 10))
        sns.heatmap(cm_norm, annot=True, fmt='.2%', cmap='Blues', xticklabels=class_names_ordered, yticklabels=class_names_ordered)
        plt.title('Aggregated Normalized Confusion Matrix (LONO)')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.xticks(rotation=45, ha="right")
        plt.tight_layout()
        plt.show()
        
        self._plot_feature_importance(importances)
        
    def _plot_feature_importance(self, importances):
        all_imp_df = pd.concat(importances)
        mean_imp_df = all_imp_df.groupby('feature')['importance'].mean().sort_values(ascending=False).reset_index()
        
        plt.figure(figsize=(10, 8))
        sns.barplot(x='importance', y='feature', data=mean_imp_df.head(15), palette='viridis')
        plt.xlabel('Mean Feature Importance (PredictionValueChange)')
        plt.ylabel('Feature')
        plt.title('Top 15 Feature Importances (Averaged Across Folds)')
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()

In [4]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer

# --- Define data for the model ---
df = final_df.copy()

FEATURE_COLUMNS = [
    'BR_median', 'BR_mean', 'BR_std', 'BR_cov', 
    'AL_median', 'AL_mean', 'AL_std', 'AL_cov',
    'RRV', 'RRV3ANN'
]
LABEL_COLUMN = 'Label' 
SESSION_ID_COLUMN = 'SessionID'
RANDOM_STATE = 42

# --- Create final X, y, and groups arrays ---
X = df[FEATURE_COLUMNS].values
y = df[LABEL_COLUMN].values
groups = df[SESSION_ID_COLUMN].values

# --- Impute any remaining NaNs ---
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# --- NEW: Initialize and fit LabelEncoder on the entire dataset's labels ---
# This makes the encoder aware of ALL possible classes (0 through 5)
le = LabelEncoder()
y_encoded = le.fit_transform(y)
# N_OUTPUTS should now be based on the number of classes found by the encoder
N_OUTPUTS = len(le.classes_) 
# Update CLASS_NAMES to match the order found by the encoder
CLASS_NAMES = [LABEL_TO_EVENT_GROUP_NAME[c] for c in le.classes_]


print("\nData preparation complete.")
print(f"Shape of X (features): {X.shape} -> (Num_Windows, Num_Engineered_Features)")
print(f"Shape of y (labels):   {y_encoded.shape}")
print(f"Original class distribution: {Counter(y)}")
print(f"Encoded class distribution: {Counter(y_encoded)}")
print(f"Total number of classes detected: {N_OUTPUTS}")


Data preparation complete.
Shape of X (features): (7796, 10) -> (Num_Windows, Num_Engineered_Features)
Shape of y (labels):   (7796,)
Original class distribution: Counter({np.int64(0): 7082, np.int64(4): 480, np.int64(2): 207, np.int64(1): 26, np.int64(3): 1})
Encoded class distribution: Counter({np.int64(0): 7082, np.int64(4): 480, np.int64(2): 207, np.int64(1): 26, np.int64(3): 1})
Total number of classes detected: 5


In [5]:
print("\nChecking for and imputing missing values (NaNs)...")
for col in df:
    if col in df.columns:
        nan_count = df[col].isnull().sum()
        if nan_count > 0:
            print(f"  - Found {nan_count} NaNs in '{col}'. Applying forward-fill and backward-fill.")
            
            # Step 1: Forward-fill handles all NaNs except leading ones.
            df[col].ffill(inplace=True) 
            
            # Step 2: Backward-fill handles any remaining NaNs at the beginning of the file.
            df[col].bfill(inplace=True) 

# Add a final check to ensure everything is clean
final_nan_count = df[FEATURE_COLUMNS].isnull().sum().sum()
if final_nan_count > 0:
    print(f"\nWARNING: {final_nan_count} NaNs still remain in feature columns after imputation. Please investigate.")
else:
    print("\nImputation complete. No NaNs remain in feature columns.")


Checking for and imputing missing values (NaNs)...

Imputation complete. No NaNs remain in feature columns.


In [6]:
# device = torch.device(
#     "cuda" if torch.cuda.is_available()
#     else ("mps" if torch.backends.mps.is_available() else "cpu")
# )
# print(f"Using device: {device}")

In [7]:
# --- DRIVER SCRIPT to run the improved XGBoost model ---
def run_improved_model(X, y_encoded, le, groups, feature_columns, n_outputs):
    improved_classifier =  ImprovedCatBoostClassifier(
        feature_columns=feature_columns,
        n_outputs=n_outputs,
        random_state=42
    )
    
    # Run the full cross-validation and training pipeline
    # Set use_optimization=False for much faster runs during debugging.
    improved_classifier.cross_validate(
        X=X, 
        y_encoded=y_encoded,
        le=le,
        groups=groups,
        use_optimization=True # Set to False for speed, True for best performance
    )
    
    return improved_classifier

# Execute the pipeline
# The base FEATURE_COLUMNS from the data prep cell is passed here
improved_model = run_improved_model(
    X=X, 
    y_encoded=y_encoded, 
    le=le,
    groups=groups, 
    feature_columns=FEATURE_COLUMNS,
    n_outputs=N_OUTPUTS
)

[I 2025-07-14 23:22:32,222] A new study created in memory with name: no-name-8e7da3d0-adb1-4c5e-ab0f-3a07e4e9df67


  - Feature engineering applied: 10 -> 18 features
Starting LONO CV with 9 folds...

--- FOLD 1/9 (Testing: 04-04-2025) ---
  - Optimizing CatBoost hyperparameters with Optuna...
  - Original distribution: Counter({np.int64(0): 6043, np.int64(4): 414, np.int64(2): 201, np.int64(1): 24, np.int64(3): 1})
  - Using RandomOverSampler due to small class size.
  - After resampling: Counter({np.int64(0): 6043, np.int64(2): 6043, np.int64(4): 6043, np.int64(1): 6043, np.int64(3): 6043})


[I 2025-07-14 23:22:46,581] Trial 0 finished with value: 0.9723976991497892 and parameters: {'iterations': 400, 'depth': 6, 'learning_rate': 0.09809543099062999, 'l2_leaf_reg': 0.8589837300232939, 'border_count': 170, 'subsample': 0.7496568878171402}. Best is trial 0 with value: 0.9723976991497892.
[I 2025-07-14 23:23:04,058] Trial 1 finished with value: 0.9662417338350324 and parameters: {'iterations': 800, 'depth': 5, 'learning_rate': 0.05706347680830313, 'l2_leaf_reg': 3.039128554536146e-08, 'border_count': 69, 'subsample': 0.931144558706718}. Best is trial 0 with value: 0.9723976991497892.
[I 2025-07-14 23:25:41,371] Trial 2 finished with value: 0.9920568099400146 and parameters: {'iterations': 1200, 'depth': 9, 'learning_rate': 0.041183756645177316, 'l2_leaf_reg': 0.07743788103129064, 'border_count': 242, 'subsample': 0.670105231227473}. Best is trial 2 with value: 0.9920568099400146.
[I 2025-07-14 23:27:47,532] Trial 3 finished with value: 0.9869269620514135 and parameters: {'ite

  - Best balanced accuracy from optimization: 0.9969
  - Original distribution: Counter({np.int64(0): 6043, np.int64(4): 414, np.int64(2): 201, np.int64(1): 24, np.int64(3): 1})
  - Using RandomOverSampler due to small class size.
  - After resampling: Counter({np.int64(0): 6043, np.int64(2): 6043, np.int64(4): 6043, np.int64(1): 6043, np.int64(3): 6043})


CatBoostError: catboost/private/libs/options/catboost_options.cpp:794: Error: default bootstrap type (bayesian) doesn't support 'subsample' option

In [None]:
# import xgboost as xgb
# from imblearn.over_sampling import SMOTE, RandomOverSampler
# from sklearn.model_selection import LeaveOneGroupOut
# from sklearn.metrics import classification_report, confusion_matrix
# from collections import Counter
# import seaborn as sns
# import matplotlib.pyplot as plt
# from sklearn.inspection import permutation_importance

# # --- 1. Initialize lists to store results from all folds ---
# all_fold_predictions = []
# all_fold_true_labels = []
# all_fold_importances = []

# # --- 2. Setup Leave-One-Night-Out cross-validator ---
# logo = LeaveOneGroupOut()
# n_folds = logo.get_n_splits(X, y_encoded, groups=groups)
# print(f"Starting XGBoost Leave-One-Night-Out cross-validation with {n_folds} folds...")
# print("----------------------------------------------------\n")

# # --- 3. Loop through each fold ---
# for fold, (train_idx, test_idx) in enumerate(logo.split(X, y_encoded, groups)):
    
#     test_night = np.unique(groups[test_idx])[0]
#     print(f"--- FOLD {fold + 1}/{n_folds} (Testing on Night: {test_night}) ---")

#     X_train, X_test = X[train_idx], X[test_idx]
#     y_train_fold_encoded, y_test_fold = y_encoded[train_idx], y_encoded[test_idx]
    
#     # --- FINAL FIX: Robust Resampling with Dummy Sample Injection ---
#     print(f"  - Original training distribution (encoded): {Counter(y_train_fold_encoded)}")
    
#     # Find classes missing from this fold's training data
#     all_possible_classes = set(range(N_OUTPUTS))
#     present_classes = set(np.unique(y_train_fold_encoded))
#     missing_classes = all_possible_classes - present_classes
    
#     X_train_augmented = X_train
#     y_train_augmented = y_train_fold_encoded
    
#     # If any classes are missing, inject a dummy sample for each
#     if missing_classes:
#         print(f"  - Injecting dummy samples for missing classes: {missing_classes}")
#         dummy_sample_X = X_train[0:1] # Take the first sample as a template
#         for mc in missing_classes:
#             X_train_augmented = np.vstack([X_train_augmented, dummy_sample_X])
#             y_train_augmented = np.hstack([y_train_augmented, [mc]])

#     # Now, use RandomOverSampler which is safe and guaranteed to work
#     ros = RandomOverSampler(random_state=RANDOM_STATE)
#     X_res, y_res = ros.fit_resample(X_train_augmented, y_train_augmented)
    
#     print(f"  - Resampled training distribution: {Counter(y_res)}")

#     # --- Initialize and Train the XGBoost model ---
#     model = xgb.XGBClassifier(
#         objective='multi:softmax',
#         num_class=N_OUTPUTS,
#         n_estimators=500,
#         learning_rate=0.0001,
#         max_depth=4,
#         eval_metric='mlogloss',
#         random_state=RANDOM_STATE,
#         n_jobs=-1
#     )
    
#     model.fit(X_res, y_res)
#     print(f"  - Training complete.")

#     # --- Evaluate and store results ---
#     fold_preds_encoded = model.predict(X_test)
    
#     # Inverse transform predictions and true labels to their original values for reporting
#     fold_preds_original = le.inverse_transform(fold_preds_encoded)
#     fold_true_original = le.inverse_transform(y_test_fold)

#     all_fold_predictions.extend(fold_preds_original)
#     all_fold_true_labels.extend(fold_true_original)
    
#     # --- Calculate and store feature importance ---
#     result = permutation_importance(
#         model, X_test, y_test_fold, n_repeats=10, random_state=RANDOM_STATE, n_jobs=-1, scoring='f1_weighted'
#     )
#     perm_importance = pd.DataFrame({'feature': FEATURE_COLUMNS, 'importance': result.importances_mean})
#     all_fold_importances.append(perm_importance)
    
#     print(f"  - Evaluation complete for fold {fold + 1}.\n")

# # --- FINAL AGGREGATED EVALUATION (after all folds are complete) ---
# # This part of the code remains the same as the previous correct version
# print("\n====================================================")
# print("XGBoost Leave-One-Night-Out Cross-Validation Complete.")
# print("Aggregated Results Across All Folds:")
# print("====================================================\n")

# print('Aggregated Classification Report')
# print('------------------------------')
# report_labels = le.classes_
# print(classification_report(
#     all_fold_true_labels, 
#     all_fold_predictions, 
#     labels=report_labels,
#     target_names=CLASS_NAMES,
#     zero_division=0
# ))

# print('\nAggregated Confusion Matrix')
# print('---------------------------')
# cm = confusion_matrix(all_fold_true_labels, all_fold_predictions, labels=report_labels)
# with np.errstate(divide='ignore', invalid='ignore'):
#     cm_norm = np.where(cm.sum(axis=1)[:, np.newaxis] > 0, cm.astype('float') / cm.sum(axis=1)[:, np.newaxis], 0)

# plt.figure(figsize=(12, 10))
# sns.heatmap(
#     cm_norm, annot=True, fmt='.2%', cmap='Greens',
#     xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES
# )
# plt.title('XGBoost - Aggregated Normalized Confusion Matrix (LONO)')
# plt.ylabel('True Label')
# plt.xlabel('Predicted Label')
# plt.xticks(rotation=45, ha="right")
# plt.show()

# # --- SHAP Value Analysis ---
# print("\n--- SHAP Value Analysis (from last fold) ---")
# explainer = shap.TreeExplainer(model)
# shap_values = explainer.shap_values(X_test)

# for i, class_label in enumerate(le.classes_):
#     class_name = LABEL_TO_EVENT_GROUP_NAME[class_label]
#     print(f"\nSHAP Summary Plot for: {class_name} (Encoded as {i})")
#     try:
#         shap.summary_plot(shap_values[i], X_test, feature_names=FEATURE_COLUMNS, show=False)
#         plt.title(f"SHAP Values for {class_name}")
#         plt.show()
#     except Exception as e:
#         print(f"Could not plot SHAP for class {i}: {e}")

In [None]:
# # --- AGGREGATE AND PLOT FEATURE IMPORTANCES ---
# print("\n====================================================")
# print("Feature Importance Analysis (Averaged Across All Folds)")
# print("====================================================\n")

# # --- FIX: Aggregate Data from All Folds ---
# all_importances_df = pd.concat(all_fold_importances)
# mean_importance = all_importances_df.groupby('feature')['importance'].mean()
# std_importance = all_importances_df.groupby('feature')['importance'].std()

# # --- Prepare the DataFrame for Plotting ---
# final_importance_df = pd.DataFrame({
#     'mean_importance': mean_importance,
#     'std_importance': std_importance
# }).sort_values(by='mean_importance', ascending=False)

# # --- Create the Plot ---
# fig, ax = plt.subplots(figsize=(12, 8))
# plot_data = final_importance_df.head(10) # Plot top 10 features

# ax.barh(
#     y=plot_data.index,
#     width=plot_data['mean_importance'],
#     xerr=plot_data['std_importance'],
#     align='center',
#     ecolor='black',
#     capsize=5
# )

# ax.invert_yaxis() 
# ax.set_xlabel('Mean Permutation Importance (Weighted F1)')
# ax.set_ylabel('Feature')
# ax.set_title('Top 10 Feature Importances (Averaged Across LONO Folds)')
# ax.grid(axis='x', linestyle='--', alpha=0.7)
# plt.tight_layout()
# plt.show()