In [1]:
#SVM 
import os
import numpy as np
import mne
import pickle 
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# test patients with most epochs left after processing 
data_dir = "preprocessed_epochs"
test_patients_sd = ["52", "18", "29", "17", "34", "55","10", "22", "68", "19", "42", "63"]
test_patients_ns = ["01", "19", "30", "65", "10", "13", "25", "69", "24", "33", "38", "67"]

frequency_bands = {
    "theta": (4, 8),
    "alpha": (8, 13),
    "beta": (13, 30),
}

region_channels = {
    "frontal": ["Fp1", "Fp2", "Fz", "F3", "F4"],
    "central": ["Cz", "C3", "C4"],
    "parietal": ["Pz", "P3", "P4"],
    "occipital": ["O1", "O2"],
}
test_sessions = []

for patient in test_patients_ns: 
    test_sessions.append((patient, "1"))
for patient in test_patients_sd: 
    test_sessions.append((patient, "2"))

In [None]:
#FEATURE EXTRACTION
patient_features_dict = {}
features_file = "patient_features_dict.pkl"

for file_name in os.listdir(data_dir):
    if not file_name.endswith(".fif"):
        continue
    patient_id = file_name.split("_")[0].split("-")[1]  
    session = file_name.split("_")[1].split("-")[1]  
    file_name = f"sub-{patient_id}_ses-{session}_retained-epochs.fif"
    file_path = os.path.join(data_dir, file_name)
    epochs = mne.read_epochs(file_path, preload=True)

    if len(epochs) < 10:
        continue

    patient_epoch_features = []

    for band_name, (fmin, fmax) in frequency_bands.items():
        psd = epochs.compute_psd(method="welch", fmin=fmin, fmax=fmax, n_fft=512)
        psd_data = psd.get_data()  

        region_features = []
        for region, channels in region_channels.items():
            region_indices = [epochs.info["ch_names"].index(ch) for ch in channels if ch in epochs.info["ch_names"]]
            region_psd = psd_data[:, region_indices, :].mean(axis=1)  
            region_psd_mean = region_psd.mean(axis=1)
            region_features.append(region_psd_mean)

        region_features = np.column_stack(region_features)  
        
        if band_name == "theta":
            theta_power = region_features
        elif band_name == "alpha":
            alpha_power = region_features
        elif band_name == "beta":
            beta_power = region_features

        patient_epoch_features.append(region_features)

    if theta_power is not None and alpha_power is not None and beta_power is not None:
        theta_alpha_ratio = theta_power / (alpha_power + 1e-10)
        theta_beta_ratio = theta_power / (beta_power + 1e-10)
        alpha_beta_ratio = alpha_power / (beta_power + 1e-10)

        patient_epoch_features.append(theta_alpha_ratio)
        patient_epoch_features.append(theta_beta_ratio)
        patient_epoch_features.append(alpha_beta_ratio)

    patient_epoch_features = np.hstack(patient_epoch_features)
    print(patient_epoch_features.shape)

    unique_patient_id = f"{patient_id}_{session}"
    label = 0 if session == "1" else 1  # 0 for NS, 1 for SD
    patient_features_dict[unique_patient_id] = {"features": patient_epoch_features, "label": label}

with open(features_file, 'wb') as f:
    pickle.dump(patient_features_dict, f)

In [None]:
#TRAIN MODEL AND LEAVE 2 PATIENT OUT FOLD 
from collections import Counter
features_file = "patient_features_dict.pkl"

with open(features_file, 'rb') as f:
    patient_features_dict = pickle.load(f)

pairs = list(zip(test_patients_ns, test_patients_sd))

svm_configs = [
    {"kernel": "linear", "C": 1.0, "class_weight": "balanced"},
    {"kernel": "rbf", "C": 1.0, "gamma": "scale", "class_weight": "balanced"},
    {"kernel": "poly", "C": 1.0, "degree": 3, "class_weight": "balanced"},
    {"kernel": "sigmoid", "C": 1.0, "class_weight": "balanced"}
]

results = {}

for config in svm_configs:
    print(f"\nEvaluating SVM with configuration: {config}")
    
    total_correct_sessions = 0
    total_correct_epochs = 0
    total_sessions = len(test_patients_ns) + len(test_patients_sd)
    total_epochs = 0
    for fold, (ns_patient, sd_patient) in enumerate(pairs):
        train_features = []
        train_labels = []
        test_features = []
        test_labels = []
        test_patient_data = []

        for unique_patient_session, data in patient_features_dict.items():
            patient_id, session = unique_patient_session.split("_")

            if (patient_id == ns_patient and session == "1") or (patient_id == sd_patient and session == "2"):
                test_features.append(data["features"])  
                num_epochs = data["features"].shape[0] 
                test_labels.extend([data["label"]] * num_epochs) 
                test_patient_data.append(data)
                continue

            train_features.append(data["features"])
            num_epochs = data["features"].shape[0]  
            train_labels.extend([data["label"]] * num_epochs)

        X_train = np.vstack(train_features)
        y_train = np.array(train_labels)

        X_test = np.vstack(test_features)
        y_test = np.array(test_labels)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train) 
        X_test_scaled = scaler.transform(X_test)  
        pca = PCA(n_components=0.95)  # Retain 95% variance
        X_train_reduced = pca.fit_transform(X_train_scaled)
        X_test_reduced = pca.transform(X_test_scaled)
        
        svm = SVC(**config, random_state=42)
        svm.fit(X_train_reduced, y_train)

        y_pred = svm.predict(X_test_reduced)

        epoch_accuracy = accuracy_score(y_test, y_pred)
        print(f"Fold {fold + 1} Epoch-Level Accuracy: {epoch_accuracy:.2f}")

        epoch_correct = sum(y_test == y_pred)  
        fold_total_epochs = len(y_test)      
        total_correct_epochs += epoch_correct
        total_epochs += fold_total_epochs
        session_predictions = []
        session_labels = []
        start_idx = 0
        for data in test_patient_data:
            num_epochs = data["features"].shape[0]
            session_pred = y_pred[start_idx:start_idx + num_epochs]
            session_label = data["label"]
            majority_label = Counter(session_pred).most_common(1)[0][0]  
            session_predictions.append(majority_label)
            session_labels.append(session_label)
            start_idx += num_epochs

        session_accuracy = accuracy_score(session_labels, session_predictions)
        correct_sessions = sum(np.array(session_labels) == np.array(session_predictions))
        fold_total_sessions = len(session_labels)
        print(f"Fold {fold + 1} Session-Level Accuracy: {session_accuracy:.2f}")
        total_correct_sessions += correct_sessions


        print("Session-Level Classification Report:")
        print(classification_report(session_labels, session_predictions))

    overall_epoch_accuracy = total_correct_epochs / total_epochs
    overall_session_accuracy = total_correct_sessions / total_sessions

    print(f"\nOverall Epoch-Level Accuracy: {overall_epoch_accuracy:.2f}")  
    print(f"Total Correct Predictions (Epoch-Level): {total_correct_epochs}/{total_epochs}")
    print(f"\nOverall Session-Level Accuracy: {overall_session_accuracy:.2f}")
    print(f"Total Correct Predictions (Session-Level): {total_correct_sessions}/{total_sessions}")