# Importing Libraries

In [1]:
!pip install neurokit2

Collecting neurokit2
  Downloading neurokit2-0.2.10-py2.py3-none-any.whl.metadata (37 kB)
Downloading neurokit2-0.2.10-py2.py3-none-any.whl (693 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m693.1/693.1 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neurokit2
Successfully installed neurokit2-0.2.10


In [2]:
import pickle 
from IPython.display import display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
import neurokit2 as nk

# Importing Pickle file of Subject 2

In [3]:
file_path = "/kaggle/input/wesad-full-dataset/WESAD/S2/S2.pkl"

with open(file_path, "rb") as file:
    s2 = pickle.load(file, encoding="latin1")

print(s2)


{'signal': {'chest': {'ACC': array([[ 0.95539999, -0.222     , -0.55799997],
       [ 0.92579997, -0.2216    , -0.55379999],
       [ 0.90820003, -0.21960002, -0.53920001],
       ...,
       [ 0.87179995, -0.12379998, -0.30419999],
       [ 0.87300003, -0.12339997, -0.30260003],
       [ 0.87020004, -0.12199998, -0.30220002]]), 'ECG': array([[ 0.02142334],
       [ 0.02032471],
       [ 0.01652527],
       ...,
       [-0.00544739],
       [ 0.00013733],
       [ 0.0040741 ]]), 'EMG': array([[-0.00444031],
       [ 0.00434875],
       [ 0.00517273],
       ...,
       [-0.01716614],
       [-0.02897644],
       [-0.02357483]]), 'EDA': array([[5.25054932],
       [5.26733398],
       [5.24330139],
       ...,
       [0.36048889],
       [0.36582947],
       [0.365448  ]]), 'Temp': array([[30.120758],
       [30.129517],
       [30.138214],
       ...,
       [31.459229],
       [31.484283],
       [31.456268]], dtype=float32), 'Resp': array([[-1.14898682],
       [-1.12457275],
       

## Preprocessing
We will focus on the data from RespiBAN (chest device).
Heart rate (HR), EDA, and Respiration are the key features related to stress. We will extract the HR from the ECG data and then synchronize it with the EDA, respiration, and Labels. Finally, we'll organize everything into a data frame to prepare it for input into the model.

In [4]:
s2_ecg_signal = s2["signal"]["chest"]["ECG"]
s2_eda_signal = s2["signal"]["chest"]["EDA"]
s2_resp_signal = s2["signal"]["chest"]["Resp"]
s2_labels = s2["label"]

# Flatten the data (make it 1D array)
s2_ecg_signal = s2_ecg_signal.flatten()
s2_eda_signal = s2_eda_signal.flatten()
s2_resp_signal = s2_resp_signal.flatten()

# Sampling Rates (All signals were sampled at 700 Hz)
frequency = 700  

# Noise Reduction
s2_ecg_cleaned = nk.ecg_clean(s2_ecg_signal, sampling_rate=frequency)

# Finding R-Peaks
s2_r_peaks, _ = signal.find_peaks(s2_ecg_cleaned, distance=frequency*0.6, height=np.mean(s2_ecg_cleaned))

# Calculate HR in Beats Per Minute (BPM)
s2_rr_intervals = np.diff(s2_r_peaks) / frequency 
s2_hr_bpm = 60 / s2_rr_intervals  

# Upsampling HR to be the same number of values of EDA
s2_hr_bpm_resampled = np.interp(np.linspace(0, len(s2_hr_bpm), len(s2_eda_signal)),
                             np.linspace(0, len(s2_hr_bpm), len(s2_hr_bpm)),
                             s2_hr_bpm)

# Synchronizing features and labels
s2_data_dict = {
    "Subject_ID": 's2',
    "HR": s2_hr_bpm_resampled,         # معدل ضربات القلب (HR) متزامن مع EDA
    "EDA": s2_eda_signal,
    "Respiration": s2_resp_signal,
    "Label": s2_labels
}

s2_df = pd.DataFrame(s2_data_dict)

In [5]:
s2_df

Unnamed: 0,Subject_ID,HR,EDA,Respiration,Label
0,s2,77.777778,5.250549,-1.148987,0
1,s2,77.785815,5.267334,-1.124573,0
2,s2,77.793852,5.243301,-1.152039,0
3,s2,77.801889,5.249405,-1.158142,0
4,s2,77.809927,5.286407,-1.161194,0
...,...,...,...,...,...
4255295,s2,59.647906,0.400162,-1.063538,0
4255296,s2,59.650702,0.355911,-1.106262,0
4255297,s2,59.653498,0.360489,-1.103210,0
4255298,s2,59.656295,0.365829,-1.086426,0


# Doint The Same Preprocessing Steps For All Subjects

## Subject 3

### Importing Pickle file

In [6]:
with open('/kaggle/input/wesad-full-dataset/WESAD/S3/S3.pkl', "rb") as file:
    s3 = pickle.load(file, encoding="latin1")

### Preprocessing

In [7]:
s3_ecg_signal = s3["signal"]["chest"]["ECG"]
s3_eda_signal = s3["signal"]["chest"]["EDA"]
s3_resp_signal = s3["signal"]["chest"]["Resp"]
s3_labels = s3["label"]

# Flatten the data (make it 1D array)
s3_ecg_signal = s3_ecg_signal.flatten()
s3_eda_signal = s3_eda_signal.flatten()
s3_resp_signal = s3_resp_signal.flatten()

# Noise Reduction
s3_ecg_cleaned = nk.ecg_clean(s3_ecg_signal, sampling_rate=frequency)

# Finding R-Peaks
s3_r_peaks, _ = signal.find_peaks(s3_ecg_cleaned, distance=frequency*0.6, height=np.mean(s3_ecg_cleaned))

# Calculate HR in Beats Per Minute (BPM)
s3_rr_intervals = np.diff(s3_r_peaks) / frequency 
s3_hr_bpm = 60 / s3_rr_intervals  

# Upsampling HR to be the same number of values of EDA
s3_hr_bpm_resampled = np.interp(np.linspace(0, len(s3_hr_bpm), len(s3_eda_signal)),
                             np.linspace(0, len(s3_hr_bpm), len(s3_hr_bpm)),
                             s3_hr_bpm)

# Synchronizing features and labels
s3_data_dict = {
    "Subject_ID": 's3',
    "HR": s3_hr_bpm_resampled,         # معدل ضربات القلب (HR) متزامن مع EDA
    "EDA": s3_eda_signal,
    "Respiration": s3_resp_signal,
    "Label": s3_labels
}

s3_df = pd.DataFrame(s3_data_dict)


In [8]:
s3_df

Unnamed: 0,Subject_ID,HR,EDA,Respiration,Label
0,s3,63.540091,7.988358,2.204895,0
1,s3,63.548125,7.982254,2.180481,0
2,s3,63.556159,7.983398,2.166748,0
3,s3,63.564193,7.981873,2.191162,0
4,s3,63.572228,7.978821,2.183533,0
...,...,...,...,...,...
4545095,s3,49.055975,7.535934,1.371765,0
4545096,s3,49.058337,7.533646,1.356506,0
4545097,s3,49.060698,7.532120,1.344299,0
4545098,s3,49.063059,7.530594,1.341248,0


## Subject 4

### Importing Pickle file

In [9]:
with open('/kaggle/input/wesad-full-dataset/WESAD/S4/S4.pkl', "rb") as file:
    s4 = pickle.load(file, encoding="latin1")

### Preprocessing

In [10]:
s4_ecg_signal = s4["signal"]["chest"]["ECG"]
s4_eda_signal = s4["signal"]["chest"]["EDA"]
s4_resp_signal = s4["signal"]["chest"]["Resp"]
s4_labels = s4["label"]

# Flatten the data (make it 1D array)
s4_ecg_signal = s4_ecg_signal.flatten()
s4_eda_signal = s4_eda_signal.flatten()
s4_resp_signal = s4_resp_signal.flatten()

# Noise Reduction
s4_ecg_cleaned = nk.ecg_clean(s4_ecg_signal, sampling_rate=frequency)

# Finding R-Peaks
s4_r_peaks, _ = signal.find_peaks(s4_ecg_cleaned, distance=frequency*0.6, height=np.mean(s4_ecg_cleaned))

# Calculate HR in Beats Per Minute (BPM)
s4_rr_intervals = np.diff(s4_r_peaks) / frequency 
s4_hr_bpm = 60 / s4_rr_intervals  

# Upsampling HR to be the same number of values of EDA
s4_hr_bpm_resampled = np.interp(np.linspace(0, len(s4_hr_bpm), len(s4_eda_signal)),
                             np.linspace(0, len(s4_hr_bpm), len(s4_hr_bpm)),
                             s4_hr_bpm)

# Synchronizing features and labels
s4_data_dict = {
    "Subject_ID": 's4',
    "HR": s4_hr_bpm_resampled,         # معدل ضربات القلب (HR) متزامن مع EDA
    "EDA": s4_eda_signal,
    "Respiration": s4_resp_signal,
    "Label": s4_labels
}

s4_df = pd.DataFrame(s4_data_dict)


In [11]:
s4_df

Unnamed: 0,Subject_ID,HR,EDA,Respiration,Label
0,s4,60.085837,1.096344,2.836609,0
1,s4,60.089848,1.094437,2.839661,0
2,s4,60.093860,1.094055,2.838135,0
3,s4,60.097871,1.094818,2.839661,0
4,s4,60.101883,1.092911,2.882385,0
...,...,...,...,...,...
4496095,s4,63.169356,2.483749,2.578735,0
4496096,s4,63.166490,2.480698,1.963806,0
4496097,s4,63.163625,2.482605,2.581787,0
4496098,s4,63.160760,2.494431,2.571106,0


## Subject 5

### Importing Pickle file

In [12]:
with open('/kaggle/input/wesad-full-dataset/WESAD/S5/S5.pkl', "rb") as file:
    s5 = pickle.load(file, encoding="latin1")

### Preprocessing

In [13]:
s5_ecg_signal = s5["signal"]["chest"]["ECG"]
s5_eda_signal = s5["signal"]["chest"]["EDA"]
s5_resp_signal = s5["signal"]["chest"]["Resp"]
s5_labels = s5["label"]

# Flatten the data (make it 1D array)
s5_ecg_signal = s5_ecg_signal.flatten()
s5_eda_signal = s5_eda_signal.flatten()
s5_resp_signal = s5_resp_signal.flatten()

# Noise Reduction
s5_ecg_cleaned = nk.ecg_clean(s5_ecg_signal, sampling_rate=frequency)

# Finding R-Peaks
s5_r_peaks, _ = signal.find_peaks(s5_ecg_cleaned, distance=frequency*0.6, height=np.mean(s5_ecg_cleaned))

# Calculate HR in Beats Per Minute (BPM)
s5_rr_intervals = np.diff(s5_r_peaks) / frequency 
s5_hr_bpm = 60 / s5_rr_intervals  

# Upsampling HR to be the same number of values of EDA
s5_hr_bpm_resampled = np.interp(np.linspace(0, len(s5_hr_bpm), len(s5_eda_signal)),
                             np.linspace(0, len(s5_hr_bpm), len(s5_hr_bpm)),
                             s5_hr_bpm)

# Synchronizing features and labels
s5_data_dict = {
    "Subject_ID": 's5',
    "HR": s5_hr_bpm_resampled,         # معدل ضربات القلب (HR) متزامن مع EDA
    "EDA": s5_eda_signal,
    "Respiration": s5_resp_signal,
    "Label": s5_labels
}

s5_df = pd.DataFrame(s5_data_dict)

In [14]:
s5_df

Unnamed: 0,Subject_ID,HR,EDA,Respiration,Label
0,s5,93.541203,3.888321,0.044250,0
1,s5,93.496044,3.913116,0.032043,0
2,s5,93.450884,3.873062,0.015259,0
3,s5,93.405725,3.883362,-0.062561,0
4,s5,93.360566,3.886795,0.016785,0
...,...,...,...,...,...
4380595,s5,74.732221,10.231400,7.070923,0
4380596,s5,74.732440,10.231018,7.044983,0
4380597,s5,74.732659,10.231018,7.057190,0
4380598,s5,74.732877,10.230637,7.090759,0
