In [1]:
import numpy as np
from pathlib import Path

processed_dir = Path("../data_processed")

# Load all chb01_* feature files
feature_files = sorted(processed_dir.glob("chb01_*_features.npy"))
print("Found feature files:")
for f in feature_files:
    print(" -", f.name)

X_list = [np.load(f) for f in feature_files]
X = np.vstack(X_list)

print("Combined feature matrix shape:", X.shape)

Found feature files:
 - chb01_01_features.npy
 - chb01_02_features.npy
 - chb01_03_features.npy
 - chb01_04_features.npy
 - chb01_05_features.npy
 - chb01_06_features.npy
 - chb01_07_features.npy
 - chb01_08_features.npy
Combined feature matrix shape: (5760, 69)


In [2]:
windows_per_file = 720  # from your preprocessing

seizure_files = {"chb01_03", "chb01_04"}  # files that contain seizures

y_list = []
for f in feature_files:
    base = f.stem.replace("_features", "")  # e.g. "chb01_03"
    label = 1 if base in seizure_files else 0
    y_list.append(np.full(windows_per_file, label, dtype=int))

y = np.concatenate(y_list)

print("Label vector shape:", y.shape)
print("Class balance (mean should be between 0 and 1):", y.mean())

Label vector shape: (5760,)
Class balance (mean should be between 0 and 1): 0.25


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

# Train/test split with stratification to keep class ratio
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

clf = make_pipeline(
    StandardScaler(),
    LogisticRegression(max_iter=1000)
)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:, 1]

print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_prob))

              precision    recall  f1-score   support

           0       0.75      1.00      0.86       864
           1       0.00      0.00      0.00       288

    accuracy                           0.75      1152
   macro avg       0.38      0.50      0.43      1152
weighted avg       0.56      0.75      0.64      1152

ROC AUC: 0.37673611111111105


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
