In [None]:
# === Step 1: Define save directory for extracted features ===
import os
import numpy as np
import pandas as pd
import mne

from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv3D, BatchNormalization, Dropout, GlobalAveragePooling3D

# Directory where features will be saved
save_dir = "/Users/myatpwintphyu/Desktop/eeg_3dcnn_features"
os.makedirs(save_dir, exist_ok=True)

In [None]:
# === Step 2: Load participant labels from Excel ===

# Path to Excel file with participant info
xlsx_path = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/participants.xlsx"

# Load the file
df = pd.read_excel(xlsx_path)

# Filter: include only participants under 20 years old
df = df[df['age'] < 20]

# Binary label: BDI > 10 → depressed (1), else (0)
df['label'] = (df['BDI'] > 10).astype(int)

# Map subject IDs to binary labels
subject_label_map = dict(zip(df['participant_id'], df['label']))

In [None]:
# === Step 3: Define the 3D CNN model ===

def build_3dcnn_extractor(input_shape):
    """
    Creates a 3D CNN for EEG feature extraction.
    Expected input shape: (1, channels, time_points, 1)
    """
    inp = Input(shape=input_shape)
    x = Conv3D(32, kernel_size=(1, 3, 3), activation='relu')(inp)
    x = BatchNormalization()(x)
    x = Conv3D(64, kernel_size=(1, 3, 3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Conv3D(128, kernel_size=(1, 3, 3), activation='relu')(x)
    x = GlobalAveragePooling3D()(x)
    model = Model(inputs=inp, outputs=x)
    return model

In [None]:
# === Step 4: Extract and save features subject by subject ===

# EEG root directory
root_dir = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/Data"

# Get list of subject IDs
subject_ids = list(subject_label_map.keys())

chunk_id = 0  # used for naming output files

for subject in subject_ids:
    set_path = os.path.join(root_dir, subject, "eeg", f"{subject}_task-ProbabilisticSelection_eeg.set")

    if not os.path.exists(set_path):
        print(f"⚠️ Missing file for: {subject}")
        continue

    try:
        print(f"🔄 Processing subject: {subject}")

        # Load EEG
        raw = mne.io.read_raw_eeglab(set_path, preload=True)
        raw.filter(1., 50.)  # bandpass filter 1–50 Hz

        # Create overlapping epochs (2s duration, 1s overlap)
        epochs = mne.make_fixed_length_epochs(raw, duration=2.0, overlap=1.0, preload=True)
        data = epochs.get_data()  # shape: (n_epochs, n_channels, n_times)

        # Skip if no epochs
        if data.shape[0] == 0:
            print(f"❌ No valid epochs for: {subject}")
            continue

        # Get subject label
        label = subject_label_map[subject]

        # Reshape to fit 3D CNN input: (samples, 1, channels, time, 1)
        data = data[:, np.newaxis, :, :, np.newaxis]

        # Build model based on first subject's data shape
        if chunk_id == 0:
            input_shape = data.shape[1:]  # (1, channels, time, 1)
            cnn_model = build_3dcnn_extractor(input_shape)

        # Extract features
        feats = cnn_model.predict(data, batch_size=16, verbose=0)
        labels = np.full(len(feats), label)  # label array for all epochs

        # Normalize features
        scaler = StandardScaler()
        feats = scaler.fit_transform(feats)

        # Save to disk
        np.save(os.path.join(save_dir, f"X_feats_{chunk_id}.npy"), feats)
        np.save(os.path.join(save_dir, f"y_labels_{chunk_id}.npy"), labels)

        print(f"✅ Saved chunk {chunk_id}: {feats.shape}")
        chunk_id += 1

    except Exception as e:
        print(f"❗️ Error processing {subject}: {e}")