In [None]:
# === Settings ===
import os
import numpy as np
import pandas as pd
import mne
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, GlobalAveragePooling1D
from sklearn.preprocessing import StandardScaler

# Directory to save extracted features
save_dir = "/Users/myatpwintphyu/Desktop/eeg_1dcnn_features"
os.makedirs(save_dir, exist_ok=True)

In [None]:
# === Step 1: Load participant labels from Excel ===

# Path to participant metadata Excel file
xlsx_path = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/participants.xlsx"

# Load Excel and filter for participants under age 20
df = pd.read_excel(xlsx_path)
df = df[df['age'] < 20]

# Assign binary labels: 1 if BDI > 10 (depressed), else 0
df['label'] = (df['BDI'] > 10).astype(int)

# Create dictionary mapping participant_id to label
subject_label_map = dict(zip(df['participant_id'], df['label']))

In [None]:
# === Step 2: Define the 1D CNN model used for feature extraction ===

def build_1dcnn_extractor(input_shape):
    """
    Build a simple 1D CNN for feature extraction from EEG data.
    input_shape: (channels, time_points)
    returns: compiled Keras model that outputs features
    """
    inp = Input(shape=input_shape)
    x = Conv1D(64, kernel_size=5, activation='relu')(inp)
    x = Conv1D(128, kernel_size=5, activation='relu')(x)
    x = GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x)
    return model

In [None]:
# === Step 3: Extract features subject-by-subject from EEG ===

# EEG dataset root directory
root_dir = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/Data"

# List of participant IDs to process
subject_ids = list(subject_label_map.keys())

chunk_id = 0  # Used for naming output feature files

for subject in subject_ids:
    # Construct file path for EEG .set file
    set_path = os.path.join(root_dir, subject, "eeg", f"{subject}_task-ProbabilisticSelection_eeg.set")

    # Skip if file is missing
    if not os.path.exists(set_path):
        print(f"⚠️ Skipping missing: {subject}")
        continue

    try:
        print(f"🔄 Processing subject: {subject}")

        # Load raw EEG data
        raw = mne.io.read_raw_eeglab(set_path, preload=True)
        raw.filter(1., 50.)  # Bandpass filter between 1–50 Hz

        # Segment into fixed-length overlapping epochs (1s duration, 0.5s overlap)
        epochs = mne.make_fixed_length_epochs(raw, duration=1.0, overlap=0.5, preload=True)
        data = epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)

        label = subject_label_map[subject]

        # Skip subjects with no epochs
        if data.shape[0] == 0:
            print(f"❌ No valid epochs for subject: {subject}")
            continue

        # Build CNN model using the first subject's data shape
        if chunk_id == 0:
            input_shape = data.shape[1:]  # (channels, time)
            cnn_model = build_1dcnn_extractor(input_shape)

        # Extract features
        feats = cnn_model.predict(data, batch_size=16)
        labels = np.full(len(feats), label)  # Assign label to all feature vectors

        # Normalize features (optional but recommended)
        scaler = StandardScaler()
        feats = scaler.fit_transform(feats)

        # Save features and labels to disk
        np.save(os.path.join(save_dir, f"X_feats_{chunk_id}.npy"), feats)
        np.save(os.path.join(save_dir, f"y_labels_{chunk_id}.npy"), labels)

        print(f"✅ Saved features for chunk {chunk_id}: shape {feats.shape}")
        chunk_id += 1

    except Exception as e:
        print(f"❗️Error processing {subject}: {e}")