In [None]:
from google.colab import drive
drive.mount("/content/drive")

PROJECT_ROOT = "/content/drive/MyDrive/stress-heart-ml-wesad"


In [None]:
import os

PROJECT_ROOT = "/content/stress-heart-ml-wesad"
DATA_DIR = f"{PROJECT_ROOT}/data/processed"
RESULTS_DIR = f"{PROJECT_ROOT}/results"

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

print("Project directories ready ✅")


In [None]:
def build_subject_dataset(pkl_path, subject_id):
    """
    Takes a WESAD subject .pkl file
    Returns ML-ready dataframe for that subject
    """

    import pickle
    import numpy as np
    import pandas as pd
    from scipy.signal import butter, filtfilt, find_peaks

    # ------------------
    # Load data
    # ------------------
    with open(pkl_path, "rb") as f:
        data = pickle.load(f, encoding="latin1")

    signals = data["signal"]["wrist"]
    labels  = data["label"]

    eda = signals["EDA"].squeeze()
    bvp = signals["BVP"].squeeze()

    # ------------------
    # EDA preprocessing
    # ------------------
    def lowpass(data, cutoff=0.5, fs=4, order=4):
        nyq = 0.5 * fs
        b, a = butter(order, cutoff/nyq, btype="low")
        return filtfilt(b, a, data)

    eda_f = lowpass(eda)

    # ------------------
    # BVP preprocessing
    # ------------------
    def bandpass(data, low=0.67, high=3.5, fs=64, order=3):
        nyq = 0.5 * fs
        b, a = butter(order, [low/nyq, high/nyq], btype="band")
        return filtfilt(b, a, data)

    bvp_f = bandpass(bvp)

    # ------------------
    # Windowing
    # ------------------
    def sliding_windows(x, fs, win=60, step=30):
        w = int(win * fs)
        s = int(step * fs)
        return [x[i:i+w] for i in range(0, len(x)-w, s)]

    eda_wins = sliding_windows(eda_f, fs=4)
    bvp_wins = sliding_windows(bvp_f, fs=64)

    # ------------------
    # Label windows
    # ------------------
    def window_labels(lbl, fs=700, win=60, step=30):
        w = int(win * fs)
        s = int(step * fs)
        y = []
        for i in range(0, len(lbl)-w, s):
            y.append(np.bincount(lbl[i:i+w]).argmax())
        return y

    y = window_labels(labels)

    # ------------------
    # Feature extraction
    # ------------------
    rows = []

    for e, b in zip(eda_wins, bvp_wins):
        # EDA features
        eda_n = (e - e.min()) / (e.max() - e.min() + 1e-8)
        peaks, _ = find_peaks(eda_n, height=0.1, distance=5)

        scr_count = len(peaks)
        scr_amp   = np.mean(eda_n[peaks]) if len(peaks) else 0

        # HRV features
        bp, _ = find_peaks(b, distance=64*0.4, prominence=0.2)
        if len(bp) < 3:
            continue

        rr = np.diff(bp) / 64

        rows.append({
            "mean_HR": np.mean(60/rr),
            "RMSSD": np.sqrt(np.mean(np.diff(rr)**2)),
            "SDNN": np.std(rr),
            "SCR_count": scr_count,
            "SCR_amp": scr_amp
        })

    df = pd.DataFrame(rows)

    # Align labels
    min_len = min(len(df), len(y))
    df = df.iloc[:min_len]
    df["label"] = y[:min_len]

    # Map labels + clean
    label_map = {1:0, 2:1, 3:2}
    df["label"] = df["label"].map(label_map)
    df = df.dropna().reset_index(drop=True)

    print(f"✅ Subject {subject_id}: {df.shape[0]} windows")

    return df


In [None]:
df_S2_ml = build_subject_dataset("/root/.cache/kagglehub/datasets/mohamedasem318/wesad-full-dataset/versions/2/WESAD/S2/S2.pkl", "S2")
df_S3_ml = build_subject_dataset("/root/.cache/kagglehub/datasets/mohamedasem318/wesad-full-dataset/versions/2/WESAD/S3/S3.pkl", "S3")


In [None]:
X_train = df_S2_ml.drop(columns=["label"])
y_train = df_S2_ml["label"]

X_test  = df_S3_ml.drop(columns=["label"])
y_test  = df_S3_ml["label"]


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42
)

rf.fit(X_train_scaled, y_train)


In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = rf.predict(X_test_scaled)

print("Cross-subject Accuracy (S2 → S3):",
      accuracy_score(y_test, y_pred))

print("\nClassification Report:\n",
      classification_report(y_test, y_pred))