# Welcome to the Modular Multimodal Data Fusion ML Pipeline for stress detection for the WESAD Database

## Table of contents






## Getting Started:
First, download necessary packages, if you are using a venv such as Conda, activate this first.

In [None]:
%pip install -r requirements.txt

## Data Installation
If you are on Linux, run this cell to download and extract the WESAD dataset automatically, otherwise download manually [here](https://uni-siegen.sciebo.de/s/HGdUkoNlW1Ub0Gx/download) and unzip the `WESAD` file into the `wesad` directory i.e. `wesad/WESAD/`

In [None]:
%cd src/wesad && bash download_database.sh
%cd ../..

## Data Preprocessing

This will automatically extract the biosensor data from the WESAD directory into several merged files in `.pkl` format.

This will take around 20 minutes depending on the machine.

In [None]:
from src.wesad.data_preprocessing.data_preprocessor import WESADDataPreprocessor

preprocessor = WESADDataPreprocessor()
preprocessor.preprocess()

## Signal Preprocessing Steps
We will preprocess each signal with their respective preprocessing steps:

### Chest Signals

#### ECG
- **Smoothing**: Savitzky–Golay filter with window size 11 and order 3.
- **Filtering**: Butterworth band-pass filter of order 3 with cutoff frequencies 0.7 Hz and 3.7 Hz.

#### EMG
- **Smoothing**: Savitzky–Golay filter with window size 11 and order 3.
- **Filtering**: Butterworth lowpass filter of order 3 with cutoff frequency 0.5 Hz.

#### EDA
- **Smoothing**: Savitzky–Golay filter with window size 11 and order 3.
- **Filtering**: Butterworth lowpass filter of order 2 with cutoff frequency 5 Hz.

#### TEMP
- **Smoothing**: Savitzky–Golay filter with window size 11 and order 3.

#### RESP
- **Smoothing**: Savitzky–Golay filter with window size 11 and order 3.
- **Filtering**: Butterworth band-pass filter of order 3 with cutoff frequencies 0.1 Hz and 0.35 Hz.

#### ACC
- **Smoothing**: Savitzky–Golay filter with window size 31 and order 5.

### Wrist Signals

#### BVP
- **Filtering**: Butterworth band-pass filter of order 3 with cutoff frequencies 0.7 Hz and 3.7 Hz.

#### TEMP
- **Smoothing**: Savitzky–Golay filter with window size 11 and order 3.

#### ACC
- **Filtering**: Finite Impulse Response (FIR) filter with a length of 64 with a cut-off frequency of 0.4 Hz.


In [None]:
from src.ml_pipeline.preprocessing import SignalPreprocessor

# preprocess the chest data
signal_preprocessor = SignalPreprocessor('src/wesad/WESAD/raw/merged_chest.pkl', 'src/wesad/WESAD/cleaned/chest_preprocessed.pkl', 'src/wesad/wesad_configuration.json')
signal_preprocessor.preprocess_signals()

# preprocess the wrist data
signal_preprocessor = SignalPreprocessor('src/wesad/WESAD/raw/merged_wrist.pkl', 'src/wesad/WESAD/cleaned/wrist_preprocessed.pkl', 'src/wesad/wesad_configuration.json', wrist=True)
signal_preprocessor.preprocess_signals()

## Traditional Machine Learning: Manual Feature Extraction

During the feature extraction, data is loaded in an augmented manner using a 60-second window with a sliding length of 5 seconds.

The manual feature extraction derives the following features:

In [1]:
from src.ml_pipeline.dataloader.dataloader import DataLoader
from src.ml_pipeline.feature_extraction.manual.manual_fe import ManualFE

wrist_sampling_rate = 64

wrist_dataloader = DataLoader('src/wesad/WESAD/cleaned/wrist_preprocessed.pkl', sampling_rate=wrist_sampling_rate)
batches = wrist_dataloader.segment_data()

import warnings as wa
wa.warn_explicit = wa.warn = lambda *_, **__: None
wa.filterwarnings("ignore", category=DeprecationWarning)

manual_fe = ManualFE(batches, 'src/wesad/WESAD/manual_fe/wrist_manual_fe.pkl', sampling_rate=wrist_sampling_rate)
manual_fe.extract_features()

[]

In [1]:
from src.ml_pipeline.dataloader.dataloader import DataLoader
from src.ml_pipeline.feature_extraction.manual.manual_fe import ManualFE

chest_dataloader = DataLoader('src/wesad/WESAD/cleaned/chest_preprocessed.pkl', 'src/wesad/wesad_configuration.json')
batches = chest_dataloader.segment_data()

import warnings as wa
wa.warn_explicit = wa.warn = lambda *_, **__: None
wa.filterwarnings("ignore", category=DeprecationWarning) 

manual_fe = ManualFE(batches, 'src/wesad/WESAD/manual_fe/chest_manual_fe.pkl', 'src/wesad/wesad_configuration.json', wrist=False)
manual_fe.extract_features()

KeyboardInterrupt: 

## Traditional Machine Learning: Automatic Feature Extraction

The automatic feature extraction uses autoencoders to derive features from its latent space:

Now, using the preprocessed `.pkl` files we will make it into a dataloader.

In [None]:
from wesad.data_preprocessing.dataloader import WesadDataset, WesadPreprocessor, perform_loocv
from torch.utils.data import DataLoader

# Usage example
preprocessor = WesadPreprocessor()
preprocessed_filepath = 'src/wesad/WESAD/preprocessed_wesad_data.pkl'

# First time processing and saving the data
perform_loocv(preprocessor, use_preprocessed=False, preprocessed_filepath=preprocessed_filepath)

# Later on, loading the preprocessed data for LOOCV
# results = perform_loocv(preprocessor, use_preprocessed=True, preprocessed_filepath=preprocessed_filepath)

In [2]:
import pandas as pd

df = pd.read_pickle('src/wesad/WESAD/raw/merged_chest.pkl')

df

Unnamed: 0,sid,acc1,acc2,acc3,ecg,emg,eda,temp,resp,label
214583,2.0,0.8914,-0.1102,-0.2576,0.030945,-0.003708,5.710983,29.083618,1.191711,1.0
214584,2.0,0.8926,-0.1086,-0.2544,0.033646,-0.014145,5.719376,29.122437,1.139832,1.0
214585,2.0,0.8930,-0.1094,-0.2580,0.033005,0.010208,5.706406,29.115234,1.141357,1.0
214586,2.0,0.8934,-0.1082,-0.2538,0.031815,0.012634,5.712509,29.126709,1.155090,1.0
214587,2.0,0.8930,-0.1096,-0.2570,0.030350,0.002060,5.727005,29.100861,1.133728,1.0
...,...,...,...,...,...,...,...,...,...,...
59628259,17.0,0.6494,0.0898,-0.6788,0.175003,0.033646,7.341385,34.592560,-2.023315,2.0
59628260,17.0,0.6486,0.0894,-0.6774,0.188828,-0.001099,7.352066,34.603302,-2.000427,2.0
59628261,17.0,0.6508,0.0894,-0.6760,0.201965,-0.007874,7.345200,34.584900,-1.994324,2.0
59628262,17.0,0.6500,0.0862,-0.6762,0.212631,-0.011856,7.351303,34.543488,-2.003479,2.0


In [3]:
import pandas as pd

df = pd.read_pickle('src/wesad/WESAD/raw/merged_wrist.pkl')

df

Unnamed: 0,sid,w_acc_x,w_acc_y,w_acc_z,bvp,w_eda,w_temp,label
0,2.0,62.0,-21.0,107.0,-59.37,1.138257,35.41,0.0
1,2.0,66.0,13.0,53.0,-53.42,1.125444,35.41,0.0
2,2.0,41.0,9.0,15.0,-44.40,1.011405,35.41,0.0
3,2.0,52.0,16.0,24.0,-33.17,1.033188,35.41,0.0
4,2.0,54.0,15.0,34.0,-20.79,0.935807,35.41,0.0
...,...,...,...,...,...,...,...,...
347467,17.0,-55.0,-34.0,-7.0,8.95,0.792287,31.81,1.0
347468,17.0,-55.0,-34.0,-7.0,14.60,0.787170,31.81,1.0
347469,17.0,-55.0,-34.0,-7.0,20.31,0.787170,31.81,1.0
347470,17.0,-55.0,-33.0,-7.0,25.72,0.784611,31.81,1.0


In [None]:
import pandas as pd

df = pd.read_pickle('src/wesad/WESAD/cleaned/chest_preprocessed.pkl')

df

In [None]:
import pandas as pd

# df = pd.read_pickle('src/wesad/WESAD/cleaned/chest_preprocessed.pkl')

df = pd.read_pickle('src/wesad/WESAD/augmented/chest_augmented.pkl')

type(df)

In [None]:
import pandas as pd

df = pd.read_pickle('src/wesad/WESAD/raw/merged_wrist.pkl')

df.head()

In [None]:
import pandas as pd

# Define file paths
pkl_path1 = 'wesad/WESAD/raw/subj_merged_acc_w.pkl'
pkl_path2 = 'wesad/WESAD/raw/subj_merged_eda_temp_w.pkl'
pkl_path3 = 'wesad/WESAD/raw/subj_merged_bvp_w.pkl'

# Load the data from pickle files
df1 = pd.read_pickle(pkl_path1)
df2 = pd.read_pickle(pkl_path2)
df3 = pd.read_pickle(pkl_path3)

# Merge dataframes on common columns (assumed to be a common index)
# Adjust the merge method and key columns as needed
merged_df = df1.merge(df2, left_index=True, right_index=True, how='inner')
merged_df = merged_df.merge(df3, left_index=True, right_index=True, how='inner')

# Save the merged dataframe to a new pickle file
merged_df.to_pickle('wesad/WESAD/raw/merged_wrist.pkl')
