# WESAD Validation Notebook for FLIRT


In [1]:
# Import Packages
import pandas as pd
import numpy as np

import matplotlib; matplotlib.use('agg')
import matplotlib.pyplot as plt

import multiprocessing
from joblib import Parallel, delayed
from tqdm.autonotebook import trange

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, classification_report, accuracy_score


from datetime import datetime, timedelta
from typing import List
import lightgbm as lgb
import glob2
import os 

from sklearn import utils, model_selection, metrics, preprocessing
from sklearn.ensemble import RandomForestClassifier

import flirt.simple

  # Remove the CWD from sys.path while we load stuff.
This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


The following function retrieves all HRV, EDA and ACC features per subject using the FLIRT pipeline


In [9]:
time = "1.3"

minutes, seconds = map(int,time.split(".", 1))

#timedelta(minutes = )

AttributeError: 'float' object has no attribute 'split'

In [8]:
def get_features_per_subject(path, window_length):
    features = flirt.simple.get_features_for_empatica_archive(zip_file_path = path,
                                      window_length = window_length,
                                      window_step_size = 1,
                                      hrv_features = True,
                                      eda_features = True,
                                      acc_features = True,
                                      #bvp_features = False,
                                      #temp_features = False,
                                      debug = True)
    return features

The following function determines the time offsets of the start and end of each relevant analysis period (baseline, stress, amusement). These offsets are combined with the timestamp stating the start of recording, to determine the absolute timestamps of the sections of interest for each subject. 

In [14]:
def find_label_timestamps(csv_path, StartingTime):

    ID = csv_path.split('/', 3)[1]
    df_timestamp = pd.read_csv(glob2.glob('WESAD/' + ID + '/*quest.csv')[0], delimiter = ';', header = 1).iloc[:2, :].dropna(axis = 1)
    print('===================================')
    print('Printing the timestamp for {0}'.format(ID))
    print('===================================')
    print(df_timestamp.head())
    
    # Start/End of experiment periods
    print('\nStart of the baseline: ' + str(df_timestamp['Base'][0]))
    print('End of the baseline: ' + str(df_timestamp['Base'][1]))
    print('Start of the fun: ' + str(df_timestamp['Fun'][0]))
    print('End of the fun: ' + str(df_timestamp['Fun'][1]))
    print('Start of the stress: ' + str(df_timestamp['TSST'][0]))
    print('End of the stress: ' + str(df_timestamp['TSST'][1]))
    
    # Get start and end time and assign label into a dict
    lab_dict = {'Base':0, 'TSST':1, 'Fun':2}
    labels_times_dict = {}
    for mode in df_timestamp.columns.tolist():
        print('mode', mode)
        if mode=='Base' or mode=='Fun' or mode=='TSST':
            labels_times_dict[mode] = [StartingTime + timedelta(minutes = int(str(df_timestamp[mode][0]).split(".")[0]))+ timedelta(seconds = int(str(df_timestamp[mode][0]).split(".")[1])), 
                                  StartingTime + timedelta(minutes = int(str(df_timestamp[mode][1]).split(".")[0])) + timedelta(seconds = int(str(df_timestamp[mode][1]).split(".")[1])), lab_dict[mode]]
        
    return labels_times_dict

In [15]:
def main():
    #os.chdir('/home/fefespinola/ETHZ_Fall_2020/') #local directory where the script is
    df_all = pd.DataFrame(None)
    #relevant_features = pd.DataFrame(None)
    File_Path = glob2.glob('WESAD/**/*_readme.txt', recursive=True)
    window_length = 60 # in seconds
    window_shift = 0.25 # in seconds
    for subject_path in File_Path:
        print(subject_path)
        print(subject_path.split('/', 3)[1])
        ID = subject_path.split('/', 3)[1]
        zip_path = glob2.glob('WESAD/' + ID + '/*_Data.zip')[0]
        print(zip_path)
        features = get_features_per_subject(zip_path, window_length)
        features.index.name = 'timedata'
        StartingTime = features.index[0]
        print(features)
        labels_times = find_label_timestamps(subject_path, StartingTime)
        relevant_features = features.loc[
            ((features.index >= labels_times['Base'][0]) & (features.index <= labels_times['Base'][1])) 
            | ((features.index >= labels_times['Fun'][0]) & (features.index <= labels_times['Fun'][1])) 
            | ((features.index >= labels_times['TSST'][0]) & (features.index <= labels_times['TSST'][1]))]

        relevant_features.insert(0, 'ID', ID)
        relevant_features['label'] = np.zeros(len(relevant_features))
        relevant_features.loc[(relevant_features.index>=labels_times['Fun'][0]) &
                                (relevant_features.index<=labels_times['Fun'][1]), 'label'] = labels_times['Fun'][2]
        relevant_features.loc[(relevant_features.index>=labels_times['TSST'][0]) & 
                            (relevant_features.index<=labels_times['TSST'][1]), 'label'] = labels_times['TSST'][2]

        # concatenate all subjects and add IDs
        df_all = pd.concat((df_all, relevant_features))
    
    print(df_all)

    return df_all

Run the evaluation script to retrieve the labeled data and train classifier to output f1-score

In [16]:
df_all = main()
df_all.to_csv('hrv_eda_acc.csv')

WESAD/S5/S5_readme.txt
S5
WESAD/S5/S5_E4_Data.zip
Reading files
Calculating HRV features


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=1166.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=1166.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=1166.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=7551.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=7551.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-06-13 12:23:13+00:00           NaN             NaN            NaN   
2017-06-13 12:23:14+00:00           NaN             NaN            NaN   
2017-06-13 12:23:15+00:00           NaN             NaN            NaN   
2017-06-13 12:23:16+00:00           NaN             NaN            NaN   
2017-06-13 12:23:17+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-06-13 14:28:59+00:00    976.729585      975.424511        187.508   
2017-06-13 14:29:00+00:00    976.729585      975.424511        187.508   
2017-06-13 14:29:01+00:00    976.729585      975.424511        187.508   
2017-06-13 14:29:02+00:00    976.729585      975.424511        187.508   
2017-06-13 14:29:03+00:00    976.729585      975.424511        187.508   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=3314.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=3314.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=3314.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=7874.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=7875.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-05-22 07:16:25+00:00           NaN             NaN            NaN   
2017-05-22 07:16:26+00:00           NaN             NaN            NaN   
2017-05-22 07:16:27+00:00           NaN             NaN            NaN   
2017-05-22 07:16:28+00:00           NaN             NaN            NaN   
2017-05-22 07:16:29+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-05-22 09:27:35+00:00    944.800256         937.543        125.005   
2017-05-22 09:27:36+00:00    944.800256         937.543        125.005   
2017-05-22 09:27:37+00:00    944.800256         937.543        125.005   
2017-05-22 09:27:38+00:00    944.800256         937.543        125.005   
2017-05-22 09:27:39+00:00    944.800256         937.543        125.005   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=1751.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=1751.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=1751.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=7724.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=7724.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-05-24 11:09:48+00:00           NaN             NaN            NaN   
2017-05-24 11:09:49+00:00           NaN             NaN            NaN   
2017-05-24 11:09:50+00:00           NaN             NaN            NaN   
2017-05-24 11:09:51+00:00           NaN             NaN            NaN   
2017-05-24 11:09:52+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-05-24 13:18:27+00:00   1095.538466        1062.549         437.52   
2017-05-24 13:18:28+00:00   1095.538466        1062.549         437.52   
2017-05-24 13:18:29+00:00   1095.538466        1062.549         437.52   
2017-05-24 13:18:30+00:00   1095.538466        1062.549         437.52   
2017-05-24 13:18:31+00:00   1095.538466        1062.549         437.52   

                             hrv_sds

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=3123.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=3123.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=3123.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=8000.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=8000.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-06-13 08:34:40+00:00           NaN             NaN            NaN   
2017-06-13 08:34:41+00:00           NaN             NaN            NaN   
2017-06-13 08:34:42+00:00           NaN             NaN            NaN   
2017-06-13 08:34:43+00:00           NaN             NaN            NaN   
2017-06-13 08:34:44+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-06-13 10:47:55+00:00   1046.172052        1031.297        281.263   
2017-06-13 10:47:56+00:00   1046.172052        1031.297        281.263   
2017-06-13 10:47:57+00:00   1046.172052        1031.297        281.263   
2017-06-13 10:47:58+00:00   1046.172052        1031.297        281.263   
2017-06-13 10:47:59+00:00   1046.172052        1031.297        281.263   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=1951.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=1951.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=1951.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=7232.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=7232.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-08-11 07:20:22+00:00           NaN             NaN            NaN   
2017-08-11 07:20:23+00:00           NaN             NaN            NaN   
2017-08-11 07:20:24+00:00           NaN             NaN            NaN   
2017-08-11 07:20:25+00:00           NaN             NaN            NaN   
2017-08-11 07:20:26+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-08-11 09:20:49+00:00   1093.131531     1137.829163     186.634091   
2017-08-11 09:20:50+00:00   1093.131531     1137.829163     186.634091   
2017-08-11 09:20:51+00:00   1093.131531     1137.829163     186.634091   
2017-08-11 09:20:52+00:00   1093.131531     1137.829163     186.634091   
2017-08-11 09:20:53+00:00   1093.131531     1137.829163     186.634091   

                           hrv_sdsd 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=5028.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=5028.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=5028.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6822.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6823.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-07-25 07:06:08+00:00           NaN             NaN            NaN   
2017-07-25 07:06:09+00:00           NaN             NaN            NaN   
2017-07-25 07:06:10+00:00           NaN             NaN            NaN   
2017-07-25 07:06:11+00:00           NaN             NaN            NaN   
2017-07-25 07:06:12+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-07-25 08:59:46+00:00    644.733637         640.654         93.754   
2017-07-25 08:59:47+00:00    644.733637         640.654         93.754   
2017-07-25 08:59:48+00:00    644.733637         640.654         93.754   
2017-07-25 08:59:49+00:00    644.733637         640.654         93.754   
2017-07-25 08:59:50+00:00    644.733637         640.654         93.754   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=3118.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=3118.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=3118.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6461.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6461.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-07-25 11:15:19+00:00           NaN             NaN            NaN   
2017-07-25 11:15:20+00:00           NaN             NaN            NaN   
2017-07-25 11:15:21+00:00           NaN             NaN            NaN   
2017-07-25 11:15:22+00:00           NaN             NaN            NaN   
2017-07-25 11:15:23+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-07-25 13:02:55+00:00    739.721124         734.409        343.766   
2017-07-25 13:02:56+00:00    739.721124         734.409        343.766   
2017-07-25 13:02:57+00:00    739.721124         734.409        343.766   
2017-07-25 13:02:58+00:00    739.721124         734.409        343.766   
2017-07-25 13:02:59+00:00    739.721124         734.409        343.766   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=3743.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=3743.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=3743.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=7107.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=7108.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-08-10 12:00:25+00:00           NaN             NaN            NaN   
2017-08-10 12:00:26+00:00           NaN             NaN            NaN   
2017-08-10 12:00:27+00:00           NaN             NaN            NaN   
2017-08-10 12:00:28+00:00           NaN             NaN            NaN   
2017-08-10 12:00:29+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-08-10 13:58:48+00:00    685.210185         671.906        156.257   
2017-08-10 13:58:49+00:00    685.210185         671.906        156.257   
2017-08-10 13:58:50+00:00    685.210185         671.906        156.257   
2017-08-10 13:58:51+00:00    685.210185         671.906        156.257   
2017-08-10 13:58:52+00:00    685.210185         671.906        156.257   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=2254.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=2254.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=2254.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6638.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6639.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-07-10 11:11:40+00:00           NaN             NaN            NaN   
2017-07-10 11:11:41+00:00           NaN             NaN            NaN   
2017-07-10 11:11:42+00:00           NaN             NaN            NaN   
2017-07-10 11:11:43+00:00           NaN             NaN            NaN   
2017-07-10 11:11:44+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-07-10 13:02:14+00:00    773.956643      750.122701         437.52   
2017-07-10 13:02:15+00:00    773.956643      750.122701         437.52   
2017-07-10 13:02:16+00:00    773.956643      750.122701         437.52   
2017-07-10 13:02:17+00:00    773.956643      750.122701         437.52   
2017-07-10 13:02:18+00:00    773.956643      750.122701         437.52   

                             hrv_sds

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=2670.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=2670.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=2670.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=8324.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=8324.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-06-14 11:21:38+00:00           NaN             NaN            NaN   
2017-06-14 11:21:39+00:00           NaN             NaN            NaN   
2017-06-14 11:21:40+00:00           NaN             NaN            NaN   
2017-06-14 11:21:41+00:00           NaN             NaN            NaN   
2017-06-14 11:21:42+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-06-14 13:40:17+00:00    950.066614      976.770717     233.076284   
2017-06-14 13:40:18+00:00    950.066614      976.770717     233.076284   
2017-06-14 13:40:19+00:00    950.066614      976.770717     233.076284   
2017-06-14 13:40:20+00:00    950.066614      976.770717     233.076284   
2017-06-14 13:40:21+00:00    950.066614      976.770717     233.076284   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=2815.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=2815.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=2815.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6494.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6495.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-07-06 11:12:04+00:00           NaN             NaN            NaN   
2017-07-06 11:12:05+00:00           NaN             NaN            NaN   
2017-07-06 11:12:06+00:00           NaN             NaN            NaN   
2017-07-06 11:12:07+00:00           NaN             NaN            NaN   
2017-07-06 11:12:08+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-07-06 13:00:14+00:00    754.000713          765.66        140.631   
2017-07-06 13:00:15+00:00    754.000713          765.66        140.631   
2017-07-06 13:00:16+00:00    754.000713          765.66        140.631   
2017-07-06 13:00:17+00:00    754.000713          765.66        140.631   
2017-07-06 13:00:18+00:00    754.000713          765.66        140.631   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=3212.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=3212.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=3212.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6228.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6229.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-07-11 11:11:41+00:00           NaN             NaN            NaN   
2017-07-11 11:11:42+00:00           NaN             NaN            NaN   
2017-07-11 11:11:43+00:00           NaN             NaN            NaN   
2017-07-11 11:11:44+00:00           NaN             NaN            NaN   
2017-07-11 11:11:45+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-07-11 12:55:25+00:00    833.490192         859.414        281.263   
2017-07-11 12:55:26+00:00    833.490192         859.414        281.263   
2017-07-11 12:55:27+00:00    833.490192         859.414        281.263   
2017-07-11 12:55:28+00:00    833.490192         859.414        281.263   
2017-07-11 12:55:29+00:00    833.490192         859.414        281.263   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=2732.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=2732.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=2732.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6863.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6863.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-08-08 11:14:07+00:00           NaN             NaN            NaN   
2017-08-08 11:14:08+00:00           NaN             NaN            NaN   
2017-08-08 11:14:09+00:00           NaN             NaN            NaN   
2017-08-08 11:14:10+00:00           NaN             NaN            NaN   
2017-08-08 11:14:11+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-08-08 13:08:25+00:00    746.713735         750.034     116.723813   
2017-08-08 13:08:26+00:00    746.713735         750.034     116.723813   
2017-08-08 13:08:27+00:00    746.713735         750.034     116.723813   
2017-08-08 13:08:28+00:00    746.713735         750.034     116.723813   
2017-08-08 13:08:29+00:00    746.713735         750.034     116.723813   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=4820.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=4820.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=4820.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6986.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6987.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-08-09 07:10:31+00:00           NaN             NaN            NaN   
2017-08-09 07:10:32+00:00           NaN             NaN            NaN   
2017-08-09 07:10:33+00:00           NaN             NaN            NaN   
2017-08-09 07:10:34+00:00           NaN             NaN            NaN   
2017-08-09 07:10:35+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-08-09 09:06:53+00:00    861.886237      853.699715        187.509   
2017-08-09 09:06:54+00:00    861.886237      853.699715        187.509   
2017-08-09 09:06:55+00:00    861.886237      853.699715        187.509   
2017-08-09 09:06:56+00:00    861.886237      853.699715        187.509   
2017-08-09 09:06:57+00:00    861.886237      853.699715        187.509   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


HBox(children=(HTML(value='HRV Time Domain features '), FloatProgress(value=0.0, max=3308.0), HTML(value='')))




HBox(children=(HTML(value='HRV Frequency Domain features '), FloatProgress(value=0.0, max=3308.0), HTML(value=…




HBox(children=(HTML(value='HRV Statistical features '), FloatProgress(value=0.0, max=3308.0), HTML(value='')))


Calculating EDA features


HBox(children=(HTML(value='EDA features'), FloatProgress(value=0.0, max=6642.0), HTML(value='')))


Calculating ACC features


  # Remove the CWD from sys.path while we load stuff.


HBox(children=(HTML(value='ACC features'), FloatProgress(value=0.0, max=6643.0), HTML(value='')))


                           hrv_mean_nni  hrv_median_nni  hrv_range_nni  \
timedata                                                                 
2017-08-10 07:11:56+00:00           NaN             NaN            NaN   
2017-08-10 07:11:57+00:00           NaN             NaN            NaN   
2017-08-10 07:11:58+00:00           NaN             NaN            NaN   
2017-08-10 07:11:59+00:00           NaN             NaN            NaN   
2017-08-10 07:12:00+00:00           NaN             NaN            NaN   
...                                 ...             ...            ...   
2017-08-10 09:02:34+00:00      762.8862      764.919717         218.76   
2017-08-10 09:02:35+00:00      762.8862      764.919717         218.76   
2017-08-10 09:02:36+00:00      762.8862      764.919717         218.76   
2017-08-10 09:02:37+00:00      762.8862      764.919717         218.76   
2017-08-10 09:02:38+00:00      762.8862      764.919717         218.76   

                            hrv_sdsd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [21]:
data = pd.read_csv('hrv_eda_acc.csv')
data.set_index('timedata', inplace=True)
label=data.label
ID=data.ID
print(ID.unique())

['S5' 'S2' 'S3' 'S4' 'S17' 'S10' 'S11' 'S16' 'S8' 'S6' 'S7' 'S9' 'S13'
 'S14' 'S15']


In [18]:
data=data.filter(regex='^eda',axis=1)
data['label']=label
data['ID']=ID

In [22]:
data.head()

Unnamed: 0_level_0,ID,hrv_mean_nni,hrv_median_nni,hrv_range_nni,hrv_sdsd,hrv_rmssd,hrv_nni_50,hrv_pnni_50,hrv_nni_20,hrv_pnni_20,...,acc_l2_n_below_mean,acc_l2_n_sign_changes,acc_l2_iqr,acc_l2_iqr_5_95,acc_l2_pct_5,acc_l2_pct_95,acc_l2_entropy,acc_l2_perm_entropy,acc_l2_svd_entropy,label
timedata,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-06-13 12:28:50+00:00,S5,871.536234,880.50649,260.945038,80.505696,80.86061,6.0,41.704339,9.0,62.556509,...,1274,0,0.397128,6.270711,60.744959,67.01567,7.558265,0.94853,0.217955,0.0
2017-06-13 12:28:51+00:00,S5,871.539446,880.609628,261.151318,80.564772,80.920609,6.0,41.700484,9.0,62.550726,...,1270,0,0.397128,7.407377,60.435085,67.842462,7.557919,0.94853,0.240854,0.0
2017-06-13 12:28:52+00:00,S5,871.542658,880.712767,261.357598,80.623848,80.980608,6.0,41.696629,9.0,62.544944,...,1264,0,0.397128,8.112951,60.041652,68.154604,7.557738,0.948938,0.248568,0.0
2017-06-13 12:28:53+00:00,S5,871.54587,880.815905,261.563877,80.682924,81.040607,6.0,41.692774,9.0,62.539162,...,1258,0,0.397128,8.547502,59.838531,68.386033,7.557592,0.94853,0.25721,0.0
2017-06-13 12:28:54+00:00,S5,871.549082,880.919043,261.770157,80.742,81.100606,6.0,41.68892,9.0,62.533379,...,1244,0,0.397128,8.646181,59.739852,68.386033,7.55758,0.951355,0.257665,0.0


# Prediction (LightGBM)


In [23]:
df = data.replace([np.inf, -np.inf], np.nan) # np.inf leads to problems with some techniques

# Clean columns that contain a lot of nan values 
print(len(df), len(df.columns))
df = df.dropna(axis=1, thresh=int(len(df)*0.99))
print(len(df), len(df.columns))
print('Columns dropped: ', data.drop(df.columns, axis=1).columns.values)


stats = []

cv = model_selection.LeaveOneGroupOut()

X = df.drop(columns=['label', 'ID'])
y = df['label'].astype('int')
groups = df['ID']
print("running %d-fold CV..." % (cv.get_n_splits(X, y, groups)))

for train_index, test_index in cv.split(X, y, groups):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    params = {'objective': 'binary', 'is_unbalance': True}
    model = lgb.LGBMClassifier(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    stats.append({
        'f1': f1_score(y_test, y_pred, average="macro"),
        'accuracy': accuracy_score(y_test, y_pred)
    })
        
    
    print(metrics.classification_report(y_test, y_pred))

stats = pd.DataFrame(stats)
print(stats.f1.mean())

33917 179
33917 174
Columns dropped:  ['eda_tonic_entropy' 'eda_phasic_entropy' 'acc_acc_x_entropy'
 'acc_acc_y_entropy' 'acc_acc_z_entropy']
running 15-fold CV...
              precision    recall  f1-score   support

           0       0.87      0.94      0.90      1201
           1       0.89      0.80      0.85       773
           2       0.47      0.44      0.45       393

    accuracy                           0.81      2367
   macro avg       0.74      0.73      0.73      2367
weighted avg       0.81      0.81      0.81      2367

              precision    recall  f1-score   support

           0       0.64      0.99      0.78      1201
           1       0.97      0.39      0.56       701
           2       0.41      0.15      0.22       389

    accuracy                           0.67      2291
   macro avg       0.67      0.51      0.52      2291
weighted avg       0.70      0.67      0.62      2291

              precision    recall  f1-score   support

           0       

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.94      0.89      0.91      1161
           1       0.61      0.99      0.76       661
           2       0.00      0.00      0.00       351

    accuracy                           0.77      2173
   macro avg       0.52      0.63      0.56      2173
weighted avg       0.69      0.77      0.72      2173

              precision    recall  f1-score   support

           0       1.00      0.15      0.26      1152
           1       0.33      1.00      0.50       674
           2       0.00      0.00      0.00       393

    accuracy                           0.38      2219
   macro avg       0.44      0.38      0.25      2219
weighted avg       0.62      0.38      0.29      2219

              precision    recall  f1-score   support

           0       0.95      0.85      0.90      1219
           1       0.54      0.87      0.67       666
           2       0.32      0.11      0.17       395

    accuracy        

In [17]:
print(stats.accuracy.mean())

0.6686837095164828
