In [None]:
# === Step 1: Import Required Libraries ===
import os
import numpy as np
import pandas as pd
import mne
from scipy.signal import stft
from sklearn.preprocessing import StandardScaler

In [None]:
# === Step 2: Settings ===

# Output directory to save beta band features
save_dir = "/Users/myatpwintphyu/Desktop/eeg_stft_beta_features"
os.makedirs(save_dir, exist_ok=True)

# Path to participant metadata file
xlsx_path = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/participants.xlsx"

# Sampling parameters
fs = 250           # Sampling frequency (Hz)
win_size = 128     # STFT window size
overlap = 64       # Overlap between windows
beta_range = (12, 30)  # Beta band in Hz

In [None]:
# === Step 3: Load Labels from Participant Excel ===

# Load the Excel file
df = pd.read_excel(xlsx_path)

# Filter participants under 20 years old
df = df[df['age'] < 20]

# Create binary label: 1 if BDI > 10, else 0
df['label'] = (df['BDI'] > 10).astype(int)

# Create mapping: participant_id → label
subject_label_map = dict(zip(df['participant_id'], df['label']))

In [None]:
# === Step 4: Define Beta Band Power Extraction Function ===

def extract_beta_power(data_epoch, fs):
    """
    Compute average beta band power per channel from a single EEG epoch.
    Parameters:
        data_epoch: shape (n_channels, n_times)
    Returns:
        beta_power: array of shape (n_channels,)
    """
    n_channels, n_times = data_epoch.shape
    beta_power = []

    for ch in range(n_channels):
        f, t, Zxx = stft(data_epoch[ch], fs=fs, nperseg=win_size, noverlap=overlap, window='hamming')
        power = np.abs(Zxx) ** 2  # Power spectrogram

        # Select only beta band frequencies
        beta_mask = (f >= beta_range[0]) & (f <= beta_range[1])
        beta_band_power = power[beta_mask].mean(axis=0)  # Mean across beta freqs
        beta_power.append(beta_band_power.mean())        # Mean across time

    return np.array(beta_power)

In [None]:
# === Step 5: Main Loop to Extract Features per Subject ===

data_root = "/Users/myatpwintphyu/Desktop/Monash/Master Thesis/Test_and_do_18_19_20/Data/ds003474-download/Data"
subject_ids = list(subject_label_map.keys())
chunk_id = 0

for subject in subject_ids:
    set_path = os.path.join(data_root, subject, "eeg", f"{subject}_task-ProbabilisticSelection_eeg.set")

    if not os.path.exists(set_path):
        print(f"⚠️ Missing file for: {subject}")
        continue

    try:
        print(f"🔄 Processing subject: {subject}")

        # Load EEG data
        raw = mne.io.read_raw_eeglab(set_path, preload=True)
        raw.filter(1., 50.)  # Bandpass filter

        # Create 2-second epochs with 1-second overlap
        epochs = mne.make_fixed_length_epochs(raw, duration=2.0, overlap=1.0, preload=True)
        data = epochs.get_data()  # shape: (n_epochs, n_channels, n_times)

        # Skip if no valid epochs
        if data.shape[0] == 0:
            print(f"❌ No valid epochs for {subject}")
            continue

        label = subject_label_map[subject]
        features = []

        # Extract features for each epoch
        for epoch in data:
            beta_feat = extract_beta_power(epoch, fs)
            features.append(beta_feat)

        feats = np.array(features)
        labels = np.full(len(feats), label)

        # Normalize features
        scaler = StandardScaler()
        feats = scaler.fit_transform(feats)

        # Save to disk
        np.save(os.path.join(save_dir, f"X_feats_{chunk_id}.npy"), feats)
        np.save(os.path.join(save_dir, f"y_labels_{chunk_id}.npy"), labels)

        print(f"✅ Saved chunk {chunk_id}: {feats.shape}")
        chunk_id += 1

    except Exception as e:
        print(f"❗️ Error processing {subject}: {e}")