In [None]:
# === Step 1: Import Required Libraries ===
import os
import numpy as np
import pandas as pd
import mne
from scipy.signal import stft
from sklearn.preprocessing import StandardScaler

In [None]:
# === Step 2: Settings ===

# Directory to save extracted STFT features
save_dir = "/Users/myatpwintphyu/Desktop/eeg_stft_features"
os.makedirs(save_dir, exist_ok=True)

# Sampling rate and STFT parameters
fs = 256            # Sampling frequency in Hz
win_size = 128      # STFT window size in samples
overlap = 64        # Overlap between windows in samples

# EEG frequency bands (Hz)
bands = {
    "delta": (1, 4),
    "theta": (4, 8),
    "alpha": (8, 12),
    "beta": (12, 30),
    "gamma": (30, 50)
}

In [None]:
# === Step 3: Load Participant Labels ===

xlsx_path = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/participants.xlsx"

# Read the Excel file
df = pd.read_excel(xlsx_path)

# Filter participants younger than 20 years old
df = df[df['age'] < 20]

# Assign binary labels based on BDI score
df['label'] = (df['BDI'] > 10).astype(int)

# Create a dictionary mapping participant_id to label
subject_label_map = dict(zip(df['participant_id'], df['label']))

In [None]:
# === Step 4: STFT-Based Feature Extraction Function ===

def extract_stft_features(epoch_data, fs):
    """
    Extract average band power per channel using STFT.
    Input: epoch_data shape = (n_channels, n_samples)
    Output: flattened feature vector of shape (n_channels * n_bands,)
    """
    n_channels, n_samples = epoch_data.shape
    features = []

    for ch in range(n_channels):
        f, t, Zxx = stft(epoch_data[ch], fs=fs, window='hamming', nperseg=win_size, noverlap=overlap)
        power = np.abs(Zxx) ** 2  # Power spectrogram

        # Compute average power for each band
        band_powers = []
        for name, (low, high) in bands.items():
            band_mask = (f >= low) & (f <= high)
            band_power = power[band_mask, :].mean()
            band_powers.append(band_power)

        features.extend(band_powers)

    return np.array(features)

In [None]:
# === Step 5: Loop Over Subjects and Extract STFT Features ===

root_dir = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/Data"
subject_ids = list(subject_label_map.keys())
chunk_id = 0

for subject in subject_ids:
    set_path = os.path.join(root_dir, subject, "eeg", f"{subject}_task-ProbabilisticSelection_eeg.set")

    if not os.path.exists(set_path):
        print(f"⚠️ Missing file for: {subject}")
        continue

    try:
        print(f"🔄 Processing subject: {subject}")

        # Load EEG file
        raw = mne.io.read_raw_eeglab(set_path, preload=True)
        raw.filter(1., 50.)
        raw.resample(fs)  # Resample to match STFT sampling rate

        # Create overlapping 2-second epochs (with 1s overlap)
        epochs = mne.make_fixed_length_epochs(raw, duration=2.0, overlap=1.0, preload=True)
        data = epochs.get_data()  # shape: (n_epochs, n_channels, n_samples)

        # Skip if no epochs found
        if data.shape[0] == 0:
            print(f"❌ No valid epochs for: {subject}")
            continue

        label = subject_label_map[subject]
        feats = []

        # Extract STFT features for each epoch
        for i in range(data.shape[0]):
            fvec = extract_stft_features(data[i], fs)
            feats.append(fvec)

        feats = np.array(feats)
        labels = np.full(len(feats), label)

        # Normalize features
        scaler = StandardScaler()
        feats = scaler.fit_transform(feats)

        # Save to disk
        np.save(os.path.join(save_dir, f"X_feats_{chunk_id}.npy"), feats)
        np.save(os.path.join(save_dir, f"y_labels_{chunk_id}.npy"), labels)

        print(f"✅ Saved STFT features for chunk {chunk_id}: shape {feats.shape}")
        chunk_id += 1

    except Exception as e:
        print(f"❗️ Error processing {subject}: {e}")