# RF and SVM

In [None]:
import mne
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
import sys
import re

## Loading and Preprocessing data

In [None]:
eyes_open_files = [r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10002_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10135_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10136_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10138_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10139_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10140_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10142_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10148_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10155_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10158_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10160_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10161_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10165_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10166_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10169_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10171_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10174_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10175_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10188_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10189_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10190_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10192_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10193_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10194_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10195_p01_epoched_EyesOpen_marked.set',
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10203_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10204_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10207_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10209_p01_epoched_EyesOpen_marked.set', 
                   r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10213_p01_epoched_EyesOpen_marked.set']
eyes_closed_files = [r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10213_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10209_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10207_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10204_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10203_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10195_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10194_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10193_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10192_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10190_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10189_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10188_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10175_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10174_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10171_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10169_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10166_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10165_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10161_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10160_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10158_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10155_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10148_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10142_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10140_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10139_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10138_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10136_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10135_p01_epoched_60EpochsMarked.set',
                      r'E:\ChristianMusaeus\Data\Eyes_closed_marked\10135_p01_epoched_60EpochsMarked.set']

In [None]:
set_files = eyes_open_files+eyes_closed_files

# Initialize empty lists to hold the data and labels
X_list = []  # Features (PSD data)
y_list = []  # Labels (eyes-open/eyes-closed)    
subject_ids = []


# Loop through each .set file (subject)
for file in set_files:
    # Load the .set file for the subject
    epochs = mne.io.read_epochs_eeglab(file)
    
    # 2. Load .set data as MATLAB struct
    mat = loadmat(file, struct_as_record=False, squeeze_me=True)
    rejmanual = mat['reject'].rejmanual  # array of 0 and 1

    # Brug det som labels direkte (0 = eyes closed)
    labels = np.array(rejmanual, dtype=int)

    # Compute the PSD for the current subject
    psd = epochs.compute_psd()

    # Get the PSD data and reshape it (flatten the 3D array to 2D for logistic regression)
    psd_data = psd.get_data()  # Shape: (n_epochs, n_channels, n_freqs)

    # checking and deleting epochs with missing values in psd data
    nan_epochs = np.isnan(psd_data).any(axis=(1,2))
    psd_data_cleaned = psd_data[~nan_epochs]
    labels_cleaned = labels[~nan_epochs]

    # extracting marked epochs 
    eyes_marked = labels_cleaned == 0
    psd_data_marked = psd_data_cleaned[eyes_marked]

    # assign labels based on file type
    if file in eyes_closed_files:
        final_labels = np.ones(psd_data_marked.shape[0], dtype=int)
    else:
        final_labels = np.zeros(psd_data_marked.shape[0], dtype=int)

    # flattening the data into a 2 dimensional matrix 
    psd_data_final = psd_data_marked.reshape(psd_data_marked.shape[0], -1)  # Shape: (n_epochs, n_channels * n_freqs)

    # Append the reshaped data to the list of features
    X_list.append(psd_data_final)
    y_list.append(final_labels)

    # Extract numeric subject ID (e.g. 10136) from the file path
    match = re.search(r'\\(\d{5})_', file)
    if match:
        subject_id = int(match.group(1))
    else:
        raise ValueError(f"Could not extract subject ID from path: {file}")

    # After creating psd_data_final
    subject_ids.extend([subject_id] * psd_data_final.shape[0])


# Concatenate data from all 10 subjects into one large dataset
X_combined = np.vstack(X_list)  # Shape: (total_epochs, n_channels * n_freqs)
y_combined = np.hstack(y_list)  # Shape: (total_epochs,)
subject_ids = np.array(subject_ids)

print(subject_ids)


## SVM

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix, log_loss
)

# Input
X = X_combined
y = y_combined
groups = subject_ids

# Setup
logo = LeaveOneGroupOut()

log_losses = []
accuracies = []

all_probs_svm = []
all_y_true_folds_svm = []
all_y_pred_folds = []

# Cross-validation
for fold_idx, (train_idx, test_idx) in enumerate(logo.split(X, y, groups), 1):
    print(f"Processing fold {fold_idx}/30...")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    base_svc = SVC(kernel='rbf', probability=False, random_state=42)
    base_svc.fit(X_train_scaled, y_train)

    calibrated_model = CalibratedClassifierCV(base_svc, method='sigmoid', cv='prefit')
    calibrated_model.fit(X_train_scaled, y_train)

    # Predictions
    prob = calibrated_model.predict_proba(X_test_scaled)[:, 1]
    y_pred = (prob >= 0.5).astype(int)  # thresholded prediction

    # Store for ensembling
    all_probs_svm.append(prob)
    all_y_true_folds_svm.append(y_test)
    all_y_pred_folds.append(y_pred)

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    loss = log_loss(y_test, prob, labels=[0, 1])

    accuracies.append(acc)
    log_losses.append(loss)

    print(f"Fold {fold_idx} - Log Loss: {loss:.4f}, Accuracy: {acc:.4f}")

# Combine predictions and truths
all_y_true_flat = np.concatenate(all_y_true_folds_svm)
all_y_pred_flat = np.concatenate(all_y_pred_folds)


In [None]:
# Final Metrics
print("\n Cross-validation complete")
print(f"Mean Accuracy:  {np.mean(accuracies):.4f}")
print(f"Mean Log Loss:  {np.mean(log_losses):.4f}")

print("\n Classification Report:")
print(classification_report(all_y_true_flat, all_y_pred_flat, zero_division=0))

## Confusion matrix SVM

In [None]:
# Confusion Matrix Plot
cm = confusion_matrix(all_y_true_flat, all_y_pred_flat)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Predicted: 0", "Predicted: 1"],
            yticklabels=["True: 0", "True: 1"])
plt.title("Confusion Matrix - SVM")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

unique_subjects_ordered = np.unique(groups)

assert len(accuracies) == len(unique_subjects_ordered)

sorted_indices = np.argsort(unique_subjects_ordered)
sorted_subjects = unique_subjects_ordered[sorted_indices].astype(str)
sorted_accuracies = np.array(accuracies)[sorted_indices]


# Plot accuracies for each fold
plt.figure(figsize=(10, 5))
plt.bar(sorted_subjects, sorted_accuracies, color='#2d4987')
plt.title("Accuracy per Subject - SVM")
plt.xlabel("Subject ID")
plt.ylabel("Accuracy")
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha = 0.7)
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt 

all_probs_flat = np.concatenate(all_probs_svm)

# AUC and ROC Curve
fpr, tpr, thresholds = roc_curve(all_y_true_flat, all_probs_flat)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f"SVM ROC (AUC = {roc_auc:.2f})", color="#2d4987")
plt.plot([0, 1], [0, 1], "k--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - SVM")
plt.legend(loc="lower right")
plt.grid(True)
plt.tight_layout()
plt.show()

## Random Forest

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, f1_score, recall_score, precision_score
)
from sklearn.preprocessing import StandardScaler

# Preprocessed data
X = X_combined
y = y_combined
groups = subject_ids
unique_subjects = np.unique(groups)

# Storage
y_true_all = []
y_pred_all = []
y_prob_all = []
per_fold_accuracy = []
all_probs_rf = []
all_y_true_folds_rf = []
all_y_pred_folds_rf = []

print("âœ… Running Random Forest with Leave-One-Subject-Out Cross-Validation...")

for subject in unique_subjects:
    test_mask = groups == subject
    train_mask = ~test_mask

    X_train, y_train = X[train_mask], y[train_mask]
    X_test, y_test = X[test_mask], y[test_mask]

    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train Random Forest
    clf = RandomForestClassifier(
        n_estimators=100,
        max_depth=None,
        random_state=42,
        n_jobs=-1
    )
    clf.fit(X_train_scaled, y_train)

    y_pred = clf.predict(X_test_scaled)
    y_prob = clf.predict_proba(X_test_scaled)[:, 1]

    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)
    y_prob_all.extend(y_prob)
    all_probs_rf.append(y_prob)
    all_y_true_folds_rf.append(y_test)
    all_y_pred_folds_rf.append(y_pred)

    # Per-fold accuracy
    acc = accuracy_score(y_test, y_pred)
    per_fold_accuracy.append(acc)

# --- Final Evaluation ---
print("\nðŸ“Š Fold-by-fold Accuracy:")
for i, acc in enumerate(per_fold_accuracy):
    print(f"Fold {i+1:02}: Accuracy = {acc:.4f}")

print(f"\nâœ… Mean Accuracy across folds: {np.mean(per_fold_accuracy):.4f}")
random_forest_classification_report = classification_report(y_true_all, y_pred_all, target_names=["Eyes Open", "Eyes Closed"])

cm = confusion_matrix(y_true_all, y_pred_all)
print("Confusion Matrix:\n", cm)

overall_acc = accuracy_score(y_true_all, y_pred_all)
print(f"Overall accuracy: {overall_acc}")

# AUC and ROC Curve
fpr, tpr, thresholds = roc_curve(y_true_all, y_prob_all)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}", color="#2d4987")
plt.plot([0, 1], [0, 1], "k--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Random Forest")
plt.legend(loc="lower right")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
print(f"\nâœ… Mean Accuracy across folds: {np.mean(per_fold_accuracy):.4f}")

In [None]:
print(random_forest_classification_report)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

subject_ids = unique_subjects.astype(str) 
accuracies = np.array(per_fold_accuracy)

sorted_indices = np.argsort(unique_subjects)
sorted_subjects = subject_ids[sorted_indices]
sorted_accuracies = accuracies[sorted_indices]


# Plot accuracies for each fold
plt.figure(figsize=(10, 5))
plt.bar(sorted_subjects, sorted_accuracies, color='#2d4987')
plt.title("Accuracy per Subject - Random Forest")
plt.xlabel("Subject ID")
plt.ylabel("Accuracy")
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha = 0.7)
plt.tight_layout()
plt.show()
