<a href="https://colab.research.google.com/github/arshchatrath/SafeSpace/blob/main/SafeSpaceAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json



In [4]:
!kaggle datasets download -d orvile/wesad-wearable-stress-affect-detection-dataset
!unzip wesad-wearable-stress-affect-detection-dataset.zip -d wesad_data


Dataset URL: https://www.kaggle.com/datasets/orvile/wesad-wearable-stress-affect-detection-dataset
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading wesad-wearable-stress-affect-detection-dataset.zip to /content
 99% 2.41G/2.43G [00:28<00:00, 261MB/s]
100% 2.43G/2.43G [00:29<00:00, 90.0MB/s]
Archive:  wesad-wearable-stress-affect-detection-dataset.zip
  inflating: wesad_data/WESAD/S10/S10.pkl  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/ACC.csv  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/BVP.csv  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/EDA.csv  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/HR.csv  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/IBI.csv  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/TEMP.csv  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/info.txt  
  inflating: wesad_data/WESAD/S10/S10_E4_Data/tags.csv  
  inflating: wesad_data/WESAD/S10/S10_quest.csv  
  inflating: wesad_data/WESAD/S10/S10_readme.txt  
  inflating: wesad_data/WESAD/S10/S

In [6]:
import os

for root, dirs, files in os.walk('wesad_data'):
    for name in files:
        print(os.path.join(root, name))


wesad_data/WESAD/wesad_readme.pdf
wesad_data/WESAD/S7/S7.pkl
wesad_data/WESAD/S7/S7_respiban.txt
wesad_data/WESAD/S7/S7_quest.csv
wesad_data/WESAD/S7/S7_readme.txt
wesad_data/WESAD/S7/S7_E4_Data/tags.csv
wesad_data/WESAD/S7/S7_E4_Data/EDA.csv
wesad_data/WESAD/S7/S7_E4_Data/HR.csv
wesad_data/WESAD/S7/S7_E4_Data/ACC.csv
wesad_data/WESAD/S7/S7_E4_Data/info.txt
wesad_data/WESAD/S7/S7_E4_Data/IBI.csv
wesad_data/WESAD/S7/S7_E4_Data/TEMP.csv
wesad_data/WESAD/S7/S7_E4_Data/BVP.csv
wesad_data/WESAD/S14/S14.pkl
wesad_data/WESAD/S14/S14_readme.txt
wesad_data/WESAD/S14/S14_respiban.txt
wesad_data/WESAD/S14/S14_quest.csv
wesad_data/WESAD/S14/S14_E4_Data/tags.csv
wesad_data/WESAD/S14/S14_E4_Data/EDA.csv
wesad_data/WESAD/S14/S14_E4_Data/HR.csv
wesad_data/WESAD/S14/S14_E4_Data/ACC.csv
wesad_data/WESAD/S14/S14_E4_Data/info.txt
wesad_data/WESAD/S14/S14_E4_Data/IBI.csv
wesad_data/WESAD/S14/S14_E4_Data/TEMP.csv
wesad_data/WESAD/S14/S14_E4_Data/BVP.csv
wesad_data/WESAD/S15/S15_respiban.txt
wesad_data/WESAD

In [7]:
import pickle

with open('wesad_data/WESAD/S2/S2.pkl', 'rb') as file:
    data = pickle.load(file, encoding='latin1')

# Explore what's inside
print(data.keys())


dict_keys(['signal', 'label', 'subject'])


In [10]:
!pip install heartpy



In [9]:
# ------------------- Imports -------------------
import pickle
import numpy as np
import pandas as pd
import heartpy as hp
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from collections import Counter
import os

# ------------------- Constants -------------------
sample_rate = 700  # ECG sampling rate in WESAD chest device
window_size = 30 * sample_rate  # 30s windows
step_size = 15 * sample_rate    # 50% overlap
base_path = 'wesad_data/WESAD'  # Kaggle unzipped path in Colab

# ------------------- Label Mapping -------------------
def map_to_stress_level(label):
    """Map WESAD labels to Low-Medium (0) and Medium-High (1)"""
    if label in [0, 1, 3, 4]:  # baseline, amusement, meditation, neutral
        return 0  # Low-Medium
    elif label in [2, 5, 6, 7]:  # stress conditions
        return 1  # Medium-High
    else:
        return -1  # unknown/ignore

# ------------------- Feature Extraction -------------------
def extract_features_windowed(subject_path):
    with open(subject_path, 'rb') as file:
        data = pickle.load(file, encoding='latin1')

    signals = data['signal']['chest']
    ecg = signals['ECG'].flatten()
    emg = signals['EMG']
    eda = signals['EDA']
    temp = signals['Temp']
    acc = signals['ACC']
    labels = data['label']

    X, y = [], []

    for start in range(0, len(ecg) - window_size, step_size):
        end = start + window_size

        # Windowed signals
        ecg_win = ecg[start:end]
        emg_win = emg[start:end]
        eda_win = eda[start:end]
        temp_win = temp[start:end]
        acc_win = acc[start:end]
        label_win = labels[start:end]

        # Map and filter labels
        mapped_labels = [map_to_stress_level(l) for l in label_win]
        valid_labels = [l for l in mapped_labels if l != -1]
        if len(valid_labels) / len(label_win) < 0.8:
            continue
        y_label = np.argmax(np.bincount(valid_labels))  # majority label

        # HRV features
        try:
            _, hrv = hp.process(ecg_win, sample_rate)
            hrv_features = [
                hrv['rmssd'], hrv['sdnn'],
                hrv.get('lf', 0), hrv.get('hf', 0),
                hrv.get('lf/hf', 0)
            ]
        except:
            hrv_features = [0, 0, 0, 0, 0]

        # Time-domain features
        features = [
            np.mean(emg_win), np.std(emg_win),
            np.mean(eda_win), np.std(eda_win),
            np.mean(temp_win), np.std(temp_win),
            np.mean(acc_win[:, 0]), np.std(acc_win[:, 0]),
            np.mean(acc_win[:, 1]), np.std(acc_win[:, 1]),
            np.mean(acc_win[:, 2]), np.std(acc_win[:, 2]),
            *hrv_features
        ]

        X.append(features)
        y.append(y_label)

    return np.array(X), np.array(y)

# ------------------- Dataset Aggregation -------------------
subjects = ['S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10',
            'S11', 'S13', 'S14', 'S15', 'S16', 'S17']

X_all, y_all = [], []

for subject in subjects:
    try:
        print(f"Processing {subject}...")
        subject_file = os.path.join(base_path, subject, f"{subject}.pkl")
        X_sub, y_sub = extract_features_windowed(subject_file)
        X_all.append(X_sub)
        y_all.append(y_sub)
        print(f"  {len(y_sub)} windows | Class distribution: {Counter(y_sub)}")
    except Exception as e:
        print(f"Error processing {subject}: {e}")

X = np.vstack(X_all)
y = np.hstack(y_all)

print("\nFinal dataset:")
print(f"Total samples: {len(y)}")
print(f"Class distribution: {Counter(y)}")

# ------------------- Train-Test Split -------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ------------------- Train & Evaluate -------------------
print("\nTraining model...")
clf = RandomForestClassifier(
    n_estimators=300,
    class_weight='balanced',
    random_state=42
)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("\nClassification Report:")
print(classification_report(
    y_test, y_pred,
    target_names=['Low-Medium', 'Medium-High']
))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# ------------------- Feature Importance -------------------
print("\nTop 10 most important features:")
feature_names = [
    'EMG_mean', 'EMG_std',
    'EDA_mean', 'EDA_std',
    'Temp_mean', 'Temp_std',
    'ACC_X_mean', 'ACC_X_std',
    'ACC_Y_mean', 'ACC_Y_std',
    'ACC_Z_mean', 'ACC_Z_std',
    'HRV_RMSSD', 'HRV_SDNN',
    'HRV_LF', 'HRV_HF',
    'HRV_LF/HF'
]

importances = clf.feature_importances_
top_indices = np.argsort(importances)[-10:][::-1]
for idx in top_indices:
    print(f"{feature_names[idx]}: {importances[idx]:.4f}")


Processing S2...


The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
  interp = UnivariateSpline(x, rrlist, k=3)
  result = super().mean(axis=axis, dtype=dtype, **kwargs)[()]
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


  404 windows | Class distribution: Counter({np.int64(0): 354, np.int64(1): 50})
Processing S3...
  431 windows | Class distribution: Counter({np.int64(0): 373, np.int64(1): 58})
Processing S4...
  427 windows | Class distribution: Counter({np.int64(0): 375, np.int64(1): 52})
Processing S5...
  416 windows | Class distribution: Counter({np.int64(0): 363, np.int64(1): 53})
Processing S6...
  470 windows | Class distribution: Counter({np.int64(0): 416, np.int64(1): 54})
Processing S7...
  348 windows | Class distribution: Counter({np.int64(0): 295, np.int64(1): 53})
Processing S8...
  363 windows | Class distribution: Counter({np.int64(0): 308, np.int64(1): 55})
Processing S9...
  347 windows | Class distribution: Counter({np.int64(0): 292, np.int64(1): 55})
Processing S10...
  365 windows | Class distribution: Counter({np.int64(0): 306, np.int64(1): 59})
Processing S11...
  347 windows | Class distribution: Counter({np.int64(0): 292, np.int64(1): 55})
Processing S13...
  368 windows | C