In [62]:
import pickle
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [63]:
with open("WESAD/S10/S10.pkl", "rb") as f:
    data = pickle.load(f, encoding="latin1")

In [64]:
data.keys()

dict_keys(['signal', 'label', 'subject'])

In [65]:
signals = data["signal"]
labels = data["label"]

In [66]:
signals.keys()

dict_keys(['chest', 'wrist'])

In [67]:
chest = signals["chest"]
wrist = signals["wrist"]
ecg = chest["ECG"]
eda = wrist["EDA"]
labels = data["label"]

In [68]:
chest.keys(), wrist.keys()

(dict_keys(['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp']),
 dict_keys(['ACC', 'BVP', 'EDA', 'TEMP']))

In [69]:
ecg.shape, eda.shape, labels.shape

((3847200, 1), (21984, 1), (3847200,))

In [70]:
# 0 = no stress, 1 = mild stress, 2 = high stress
mapped_labels = np.zeros_like(labels)

mapped_labels[labels == 1] = 0          # baseline → no stress
mapped_labels[labels == 2] = 2          # stress → high stress
mapped_labels[(labels == 3) | (labels == 4)] = 1  # amusement/meditation → mild


In [71]:
ECG_WINDOW = 7000


In [72]:
def extract_ecg_features(x):
    return [
        np.mean(x),
        np.std(x),
        np.max(x),
        np.min(x)
    ]


In [73]:
def extract_eda_features(x):
    return [
        np.mean(x),
        np.std(x),
        np.max(x),
        np.min(x)
    ]


In [74]:
X = []
y = []

for i in range(0, len(ecg) - ECG_WINDOW, ECG_WINDOW):
    ecg_chunk = ecg[i:i+ECG_WINDOW].flatten()
    label_chunk = mapped_labels[i:i+ECG_WINDOW]
    
    # majority label in window
    label = np.bincount(label_chunk).argmax()
    
    # approximate EDA alignment
    eda_idx = int(i * len(eda) / len(ecg))
    eda_chunk = eda[eda_idx:eda_idx+10].flatten()
    
    if len(eda_chunk) < 10:
        continue
    
    features = extract_ecg_features(ecg_chunk) + extract_eda_features(eda_chunk)
    
    X.append(features)
    y.append(label)


In [75]:
X = np.array(X)
y = np.array(y)

X.shape, y.shape


((549, 8), (549,))

In [76]:
X.shape, y.shape, np.unique(y, return_counts=True)

((549, 8), (549,), (array([0, 1, 2]), array([359, 117,  73])))

In [77]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [78]:
split = int(0.8 * len(X_scaled))

X_train, X_test = X_scaled[:split], X_scaled[split:]
y_train, y_test = y[:split], y[split:]

In [81]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=300,
    random_state=42,
    class_weight={0:1, 1:3},
    max_depth=10,
    min_samples_leaf=5
)
model.fit(X_train, y_train)

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,5
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [82]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.6454545454545455

Classification Report:
               precision    recall  f1-score   support

           0       0.64      1.00      0.78        70
           1       1.00      0.03      0.05        40

    accuracy                           0.65       110
   macro avg       0.82      0.51      0.42       110
weighted avg       0.77      0.65      0.52       110


Confusion Matrix:
 [[70  0]
 [39  1]]
