In [None]:
# Physiological Recording Classification: Healthy vs Impaired
# Simple Random Forest classifier with patient-based splits

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report


import warnings
warnings.filterwarnings('ignore')

# Set random state for reproducibility
RANDOM_STATE = 123
np.random.seed(RANDOM_STATE)

In [None]:
# Load the normalized data tables
print("Loading normalized data tables...")

# Load PSD and SNR normalized data
psd_data = pd.read_csv('detailed_psd_normalized_table.csv')
snr_data = pd.read_csv('detailed_snrs_normalized_table.csv')

print(f"PSD data shape: {psd_data.shape}")
print(f"SNR data shape: {snr_data.shape}")
print(f"\nLabel distribution in PSD data:")
print(psd_data['Arm Type'].value_counts())


In [None]:
# Combine datasets and prepare features
print("Combining datasets...")

# Merge PSD and SNR data on Patient, Recording, and Arm Type
combined_data = pd.merge(
 psd_data, 
 snr_data, 
 on=['Patient', 'Recording', 'Arm Type'],
 how='inner'
)

print(f"Combined data shape: {combined_data.shape}")
print(f"Combined label distribution:")
print(combined_data['Arm Type'].value_counts())

# Prepare features and labels
feature_cols = [col for col in combined_data.columns if col not in ['Patient', 'Recording', 'Arm Type']]

X = combined_data[feature_cols].values
y = combined_data['Arm Type'].values

print(f"\nFeature matrix shape: {X.shape}")
print(f"Number of features: {len(feature_cols)}")
print(f"PSD features: {len([c for c in feature_cols if 'PSD' in c])}")
print(f"SNR features: {len([c for c in feature_cols if 'SNR' in c])}")


In [None]:
# Patient-based train/test split function
def patient_based_split(combined_data, X, y, test_size=0.15, random_state=RANDOM_STATE):
    """
    Split data ensuring no patient appears in both train and test sets.
    """
    np.random.seed(random_state)

    # Get patient recording counts
    patient_data = combined_data.groupby('Patient').agg({
    'Recording': 'count',
    'Arm Type': lambda x: list(x)
    }).rename(columns={'Recording': 'num_recordings', 'Arm Type': 'arm_types'})

    # Calculate target test size
    total_recordings = len(combined_data)
    target_test_recordings = int(total_recordings * test_size)

    # Select patients for test set using greedy approach
    patients_by_size = patient_data.sort_values('num_recordings', ascending=False)
    remaining_patients = list(patients_by_size.index)

    best_test_patients = []
    best_diff = float('inf')

    # Try multiple random combinations to find best split
    for _ in range(100):  # Multiple random attempts
        np.random.shuffle(remaining_patients)
        current_test_patients = []
        current_count = 0
        
        for patient in remaining_patients:
            patient_recordings = patient_data.loc[patient, 'num_recordings']
            if current_count + patient_recordings <= target_test_recordings + 5:  # Allow small overshoot
                current_test_patients.append(patient)
                current_count += patient_recordings
                
                # If we're close enough, stop
                if abs(current_count - target_test_recordings) <= abs(best_diff):
                    if abs(current_count - target_test_recordings) < abs(best_diff):
                        best_test_patients = current_test_patients.copy()
                        best_diff = current_count - target_test_recordings

    test_patients = best_test_patients
    test_recordings_count = sum(patient_data.loc[p, 'num_recordings'] for p in test_patients)

    print(f"Selected {len(test_patients)} patients for test set")
    print(f"Test recordings: {test_recordings_count} ({test_recordings_count/total_recordings*100:.1f}%)")

    # Create train/test masks
    test_mask = combined_data['Patient'].isin(test_patients)
    train_mask = ~test_mask

    # Split the data
    X_train = X[train_mask]
    X_test = X[test_mask]
    y_train = y[train_mask]
    y_test = y[test_mask]

    # Verify no patient overlap
    train_patients = set(combined_data[train_mask]['Patient'].unique())
    test_patients_actual = set(combined_data[test_mask]['Patient'].unique())
    overlap = train_patients.intersection(test_patients_actual)

    # Return patient IDs aligned to X_test row order
    test_patient_ids = combined_data.loc[test_mask, "Patient"].to_numpy()

    if overlap:
        print(f"WARNING: Patient overlap detected: {overlap}")
    else:
        print("✓ No patient overlap between train and test sets")

    return X_train, X_test, y_train, y_test, test_patient_ids

# Perform the split
print("Performing patient-based train/test split...")
X_train, X_test, y_train, y_test, test_patient_ids = patient_based_split(combined_data, X, y)

print(f"\nTraining set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Training label distribution:")
unique, counts = np.unique(y_train, return_counts=True)
for label, count in zip(unique, counts):
 print(f" {label}: {count} ({count/len(y_train)*100:.1f}%)")

In [None]:
# Model creation function
def create_rf_model():
    """Create a RandomForestClassifier with consistent parameters."""
    return RandomForestClassifier(
    n_estimators=100,
    random_state=RANDOM_STATE,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features='sqrt',
    class_weight='balanced'
    )

# Scale features and train Random Forest
print("Scaling features and training Random Forest...")

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train Random Forest
rf_model = create_rf_model()

# Train the model
rf_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = rf_model.predict(X_test_scaled)

# Calculate test accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print(f"\nTest Accuracy: {test_accuracy:.3f}")

# Print detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=3))


In [None]:
# Cross-validation with patient-based splits
def patient_based_cv_split(combined_data, n_splits=5, random_state=RANDOM_STATE):
    """
    Create cross-validation splits ensuring no patient appears in multiple folds.
    """
    np.random.seed(random_state)

    # Get unique patients
    unique_patients = combined_data['Patient'].unique()
    np.random.shuffle(unique_patients)

    # Split patients into folds
    fold_size = len(unique_patients) // n_splits
    patient_folds = []

    for i in range(n_splits):
        start_idx = i * fold_size
        if i == n_splits - 1:  # Last fold gets remaining patients
            end_idx = len(unique_patients)
        else:
            end_idx = (i + 1) * fold_size
        patient_folds.append(unique_patients[start_idx:end_idx])

    # Create train/validation indices for each fold
    cv_splits = []
    for i in range(n_splits):
        val_patients = patient_folds[i]
        val_mask = combined_data['Patient'].isin(val_patients)
        train_mask = ~val_mask

        train_indices = combined_data[train_mask].index.values
        val_indices = combined_data[val_mask].index.values

        cv_splits.append((train_indices, val_indices))

    return cv_splits

print("Performing cross-validation with patient-based splits...")

# ensure balanced_accuracy_score is available at cell level
from sklearn.metrics import balanced_accuracy_score

# Create patient-based CV splits
cv_splits = patient_based_cv_split(combined_data, n_splits=5)

# Perform cross-validation
cv_scores = []
cv_balanced_scores = []
for fold, (train_idx, val_idx) in enumerate(cv_splits):
    # Get train and validation data for this fold
    X_fold_train = X[train_idx]
    X_fold_val = X[val_idx]
    y_fold_train = y[train_idx]
    y_fold_val = y[val_idx]

    # Scale features
    fold_scaler = StandardScaler()
    X_fold_train_scaled = fold_scaler.fit_transform(X_fold_train)
    X_fold_val_scaled = fold_scaler.transform(X_fold_val)

    # Train model
    fold_model = create_rf_model()
    fold_model.fit(X_fold_train_scaled, y_fold_train)

    # Evaluate
    fold_pred = fold_model.predict(X_fold_val_scaled)
    fold_accuracy = accuracy_score(y_fold_val, fold_pred)
    fold_balanced = balanced_accuracy_score(y_fold_val, fold_pred)
    cv_scores.append(fold_accuracy)
    cv_balanced_scores.append(fold_balanced)

    print(f"Fold {fold + 1}: Accuracy={fold_accuracy:.3f}, BalancedAcc={fold_balanced:.3f}")

cv_mean = np.mean(cv_scores)
cv_std = np.std(cv_scores)
cv_mean_bal = np.mean(cv_balanced_scores)
cv_std_bal = np.std(cv_balanced_scores)

print(f"\nCross-Validation Results:")
print(f"Mean CV Accuracy: {cv_mean:.3f} ± {cv_std:.3f}")
print(f"Mean CV BalancedAccuracy: {cv_mean_bal:.3f} ± {cv_std_bal:.3f}")
print(f"Individual fold scores (Accuracy): {[f'{score:.3f}' for score in cv_scores]}")
print(f"Individual fold scores (BalancedAcc): {[f'{score:.3f}' for score in cv_balanced_scores]}")


In [None]:
# Plot confusion matrix for test set
print("Plotting confusion matrix...")

# Create confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
 xticklabels=['Healthy', 'Impaired'], 
 yticklabels=['Healthy', 'Impaired'])
plt.title(f'Confusion Matrix - Random Forest\nTest Accuracy: {test_accuracy:.3f}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.tight_layout()
plt.show()

# Print summary
print(f"\nSUMMARY:")
print(f"========")
print(f"Test Accuracy: {test_accuracy:.3f}")
print(f"Cross-Validation Accuracy: {cv_mean:.3f} ± {cv_std:.3f}")
print(f"Training samples: {len(y_train)}")
print(f"Test samples: {len(y_test)}")
print(f"Features used: {X.shape[1]} (PSD + SNR normalized features)")


In [None]:
# add in SUMMARY cell (after Test Accuracy line)
POS_LABEL = "Impaired"

def _pos_index(model, pos_label=POS_LABEL):
 return list(model.classes_).index(pos_label)

rf_proba_test = rf_model.predict_proba(X_test_scaled)[
 :, _pos_index(rf_model, "Impaired")
]
rf_pred_test = y_pred # already computed earlier
from sklearn.metrics import (
 balanced_accuracy_score,
 f1_score,
 roc_auc_score,
 average_precision_score,
)
import pandas as pd
y_bin = (pd.Series(y_test) == "Impaired").astype(int).to_numpy()
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, rf_pred_test):.3f}")
print(f"Macro F1: {f1_score(y_test, rf_pred_test, average='macro'):.3f}")
print(f"AUROC: {roc_auc_score(y_bin, rf_proba_test):.3f}")
print(f"AUPRC: {average_precision_score(y_bin, rf_proba_test):.3f}")

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import (
 accuracy_score,
 balanced_accuracy_score,
 f1_score,
 roc_auc_score,
 average_precision_score,
)

rng = np.random.default_rng(RANDOM_STATE)

# Build a dataframe to keep things aligned
POS_LABEL = "Impaired"
y_true = pd.Series(y_test)
y_hat = pd.Series(rf_pred) if "rf_pred" in globals() else pd.Series(y_pred)
y_prob = pd.Series(rf_proba_test)
pid = pd.Series(test_patient_ids)

test_df = pd.DataFrame(
    {
    "y_true": y_true.values,
    "y_hat": y_hat.values,
    "y_prob": y_prob.values,
    "pid": pid.values,
    }
)


# Helper: compute all metrics on a slice of test_df
def _compute_metrics(df):
    # binary array for threshold-free metrics
    y_bin = (df["y_true"] == POS_LABEL).astype(int).to_numpy()

    out = {}
    out["Accuracy"] = accuracy_score(df["y_true"], df["y_hat"])
    out["BalancedAccuracy"] = balanced_accuracy_score(df["y_true"], df["y_hat"])
    out["MacroF1"] = f1_score(df["y_true"], df["y_hat"], average="macro")

    # AUROC / AUPRC may be undefined if resample has one class
    try:
        out["AUROC"] = roc_auc_score(y_bin, df["y_prob"].to_numpy())
    except Exception:
        out["AUROC"] = np.nan
    try:
        out["AUPRC"] = average_precision_score(y_bin, df["y_prob"].to_numpy())
    except Exception:
        out["AUPRC"] = np.nan
    return out


# Point estimates on the full test set (for reference)
pt = _compute_metrics(test_df)


# Clustered bootstrap over patients
def bootstrap_patient_ci(test_df, n_boot=5000, alpha=0.05, seed=RANDOM_STATE):
    rng = np.random.default_rng(seed)
    pids = test_df["pid"].unique()
    B = {k: [] for k in ["Accuracy", "BalancedAccuracy", "MacroF1", "AUROC", "AUPRC"]}

    for _ in range(n_boot):
    # sample patients with replacement
        boot_pids = rng.choice(pids, size=len(pids), replace=True)
        boot_df = pd.concat([test_df[test_df["pid"] == p] for p in boot_pids], axis=0)
        m = _compute_metrics(boot_df)
        for k in B:
            B[k].append(m[k])

        # Percentile CIs (drop NaNs for AUROC/AUPRC)
        ci = {}
        for k, vals in B.items():
            arr = np.asarray(vals, dtype=float)
            arr = arr[~np.isnan(arr)]
            lo, hi = np.quantile(arr, [alpha / 2, 1 - alpha / 2])
            ci[k] = (lo, hi, arr.size)
    return ci, pt


ci, pt = bootstrap_patient_ci(test_df, n_boot=5000, alpha=0.05, seed=RANDOM_STATE)


# Pretty print
def _fmt(v):
 return f"{v:.3f}"


print("Patient-clustered 95% bootstrap CIs (percentile):")
for k in ["Accuracy", "BalancedAccuracy", "MacroF1", "AUROC", "AUPRC"]:
    lo, hi, n_used = ci[k]
    print(f" {k:17s} { _fmt(pt[k]) } [{_fmt(lo)}, {_fmt(hi)}] (boot n={n_used})")

In [None]:
# === LOPO (leave-one-patient-out) — recording-level evaluation (arms) ===
# Assumes: combined_data, feature_cols, create_rf_model, RANDOM_STATE exist.

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
 accuracy_score,
 balanced_accuracy_score,
 f1_score,
 classification_report,
)

POS_LABEL = "Impaired" # arm label considered positive

X_all = combined_data[feature_cols].to_numpy()
y_all = combined_data["Arm Type"].to_numpy() # Healthy / Impaired (arm-level)
pids = combined_data["Patient"].to_numpy()

unique_pids = np.array(sorted(pd.unique(pids)))

# Storage for pooled (micro) metrics across all LOPO folds
pooled_y_true = []
pooled_y_pred = []

# Per-patient metrics table
rows = []

for pid in unique_pids:
    test_mask = pids == pid
    train_mask = ~test_mask

    # Fit scaler on train-patients only (no leakage)
    scaler = StandardScaler()
    X_tr = scaler.fit_transform(X_all[train_mask])
    X_te = scaler.transform(X_all[test_mask])
    y_tr = y_all[train_mask]
    y_te = y_all[test_mask]

    # Train one model per patient (train on others, test on this one)
    model = create_rf_model()
    model.fit(X_tr, y_tr)

    y_hat = model.predict(X_te)

    # Collect pooled (micro) predictions
    pooled_y_true.extend(y_te.tolist())
    pooled_y_pred.extend(y_hat.tolist())

    # Per-patient metrics (recording-level within that patient)
    acc = accuracy_score(y_te, y_hat)
    bacc = balanced_accuracy_score(y_te, y_hat)
    f1m = f1_score(y_te, y_hat, average="macro")
    n_imp = int(np.sum(np.array(y_te) == "Impaired"))
    n_hea = int(np.sum(np.array(y_te) == "Healthy"))

    rows.append(
    {
    "Patient": pid,
    "n_recordings": int(test_mask.sum()),
    "n_Healthy": n_hea,
    "n_Impaired": n_imp,
    "Acc": acc,
    "BalancedAcc": bacc,
    "MacroF1": f1m,
    }
 )

# ---- Aggregate results ----
per_patient_df = pd.DataFrame(rows).sort_values("Patient").reset_index(drop=True)

# Macro over patients = mean of per-patient metrics (each patient = 1 unit)
macro_over_patients = {
 "Acc": per_patient_df["Acc"].mean(),
 "BalancedAcc": per_patient_df["BalancedAcc"].mean(),
 "MacroF1": per_patient_df["MacroF1"].mean(),
}

# Micro (pooled) across all LOPO predictions = compute once over all recordings
pooled_y_true = np.array(pooled_y_true)
pooled_y_pred = np.array(pooled_y_pred)

micro_acc = accuracy_score(pooled_y_true, pooled_y_pred)
micro_bacc = balanced_accuracy_score(pooled_y_true, pooled_y_pred)
micro_f1m = f1_score(pooled_y_true, pooled_y_pred, average="macro")

print("=== LOPO (recording-level, arm labels) ===")
print(f"Patients: {len(unique_pids)} | Total recordings: {len(pooled_y_true)}")
print("\n-- Micro (pooled across all LOPO folds) --")
print(f" Accuracy : {micro_acc:.3f}")
print(f" BalancedAccuracy: {micro_bacc:.3f}")
print(f" Macro F1 : {micro_f1m:.3f}")

print("\n-- Macro over patients (mean of per-patient metrics) (I would deem this more relevant) --")
print(f" Accuracy : {macro_over_patients['Acc']:.3f}")
print(f" BalancedAccuracy: {macro_over_patients['BalancedAcc']:.3f}")
print(f" Macro F1 : {macro_over_patients['MacroF1']:.3f}")

print("\nPer-patient (recording-level) metrics:")
print(per_patient_df.to_string(index=False))

# Optional: quick class-wise pooled report
print("\nPooled classification report (across all LOPO predictions):")
print(classification_report(pooled_y_true, pooled_y_pred, digits=3))