In [None]:
import mne
import numpy as np
import sys
import re
import matplotlib.pyplot as plt
from scipy.io import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, classification_report, log_loss
from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.calibration import CalibratedClassifierCV

### Loading files

In [None]:
eyes_open_files = [r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10002_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10135_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10136_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10138_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10139_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10140_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10142_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10148_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10155_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10158_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10160_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10161_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10165_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10166_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10169_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10171_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10174_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10175_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10188_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10189_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10190_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10192_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10193_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10194_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10195_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10203_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10204_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10207_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10209_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10213_p01_epoched_EyesOpen_marked.set']
eyes_closed_files = [r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10213_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10209_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10207_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10204_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10203_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10195_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10194_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10193_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10192_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10190_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10189_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10188_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10175_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10174_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10171_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10169_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10166_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10165_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10161_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10160_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10158_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10155_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10148_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10142_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10140_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10139_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10138_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10136_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10135_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10135_p01_epoched_60EpochsMarked.set']

set_files = eyes_open_files+eyes_closed_files

### Pre-processing

In [None]:
# empty lists to hold the data and labels
X_list = []  # features (PSD data)
y_list = []  # labels (eyes-open/eyes-closed)    
subject_ids = []


# loop through each subject
for file in set_files:
    # Load the .set file for the subject
    epochs = mne.io.read_epochs_eeglab(file)
    
    # loading .set data as MATLAB to extract labels
    mat = loadmat(file, struct_as_record=False, squeeze_me=True)
    rejmanual = mat['reject'].rejmanual  # array of 0 and 1

    # getting labels from rejmanual 
    labels = np.array(rejmanual, dtype=int)

    # computing PSD for the current subject
    psd = epochs.compute_psd()

    # getting the PSD data and reshaping it (flattening the 3d array to 2d for logistic regression)
    psd_data = psd.get_data()  # Shape: (n_epochs, n_channels, n_freqs)

    # checking and deleting epochs with nan values in psd data
    nan_epochs = np.isnan(psd_data).any(axis=(1,2))
    psd_data_cleaned = psd_data[~nan_epochs]
    labels_cleaned = labels[~nan_epochs]

    # extracting marked epochs 
    eyes_marked = labels_cleaned == 0
    psd_data_marked = psd_data_cleaned[eyes_marked]

    # assigning labels based on file type
    if file in eyes_closed_files:
        final_labels = np.ones(psd_data_marked.shape[0], dtype=int)
    else:
        final_labels = np.zeros(psd_data_marked.shape[0], dtype=int)

    # flattening the data into a 2d matrix 
    psd_data_final = psd_data_marked.reshape(psd_data_marked.shape[0], -1)  # Shape: (n_epochs, n_channels * n_freqs)

    X_list.append(psd_data_final)
    y_list.append(final_labels)

    # extracting the subject IDs from the file path
    match = re.search(r'\\(\d{5})_', file)
    if match:
        subject_id = int(match.group(1))
    else:
        raise ValueError(f"Could not extract subject ID from path: {file}")

    subject_ids.extend([subject_id] * psd_data_final.shape[0])


X_combined = np.vstack(X_list)  # shape: (total_epochs, n_channels * n_freqs)
y_combined = np.hstack(y_list)  # shape: (total_epochs,)
subject_ids = np.array(subject_ids)

print(subject_ids)

### SVM

In [None]:
X = X_combined  # features
y = y_combined  # labels
groups = subject_ids  # group labels for leave-one-group-out cross-validation (one group per subject)

# leave-one-group-out cross-validation
logo = LeaveOneGroupOut()

log_losses = []
accuracies = []

for fold_idx, (train_idx, test_idx) in enumerate(logo.split(X, y, groups), 1):
    print(f"Processing fold {fold_idx}/30...")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # base SVM
    base_svc = SVC(kernel='rbf', probability=False, random_state=42)
    base_svc.fit(X_train_scaled, y_train)

    # calibratring SVM using sigmoid method
    calibrated_model = CalibratedClassifierCV(base_svc, method='sigmoid', cv='prefit')
    calibrated_model.fit(X_train_scaled, y_train)

    # predicting probabilities and labels
    prob = calibrated_model.predict_proba(X_test_scaled)[:, 1]
    y_test_pred = calibrated_model.predict(X_test_scaled)

    # metrics
    test_loss = log_loss(y_test, prob, labels=[0, 1])
    accuracy = accuracy_score(y_test, y_test_pred)

    log_losses.append(test_loss)
    accuracies.append(accuracy)

    print(f"Fold {fold_idx} - Log Loss: {test_loss:.4f}, Accuracy: {accuracy:.4f}")

# summary
print("\nCross-validation complete!")
print(f"Mean Log Loss: {np.mean(log_losses):.4f}")
print(f"Mean Accuracy: {np.mean(accuracies):.4f}")