In [7]:
import pandas as pd
import numpy as np
import flirt
import os
import jupyter
import ipywidgets

# 1. Load WESAD and nurses dataset

In [None]:
# load data
df_wesad = pd.read_parquet('data-input/dataset_wesad.parquet')
df_nurses = pd.read_parquet('data-input/dataset_nurses.parquet')

df = pd.concat([df_wesad, df_nurses])

# 2. Function for getting features from FLIRT

In [None]:
def get_acc_features(subject, label, data, window_length, window_step_size):

    # we need to set a correct datetime index with 32hz, which is 3125000 nanoseconds,
    # otherwise Flirt will create a wrong timeindex
    time_index = pd.date_range(start=0, periods=len(data), freq='31250000N')
    data = data.set_index(time_index)
    
    # calculate features
    acc_features = flirt.get_acc_features(data,
                                      window_length = window_length, 
                                      window_step_size = window_step_size,
                                      data_frequency = 32)

    # remove index
    acc_features = acc_features.reset_index()
    acc_features = acc_features.drop(columns=['datetime'])

    # add subject and label column
    acc_features['subject'] = subject
    acc_features['label'] = label

    return acc_features

# 3. Calculate features for the whole dataset

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
%%time

result_dfs = []

window_length = 60
window_step_size = 1

# loop over subject and label groups
for key, df_chunk in df.groupby(['subject', 'label']):

    subject = key[0]
    label = key[1]
    df_chunk = df_chunk.drop(columns=['subject', 'label'])

    # each subject-label combination might have multiple inner chunks
    # consecutive index means consecutive measurements. We split each df_chunk if the
    # index is more than one (1/32 second) away
    list_of_df_chunk_inner = np.split(df_chunk, np.flatnonzero(np.diff(df_chunk.index) != 1) + 1)

    # calculate features
    for df_chunk_inner in list_of_df_chunk_inner:
        #display(df_chunk_inner)
        res_df_chunk_inner = get_acc_features(subject, label, df_chunk_inner, window_length, window_step_size)
        result_dfs.append(res_df_chunk_inner)

res = pd.concat(result_dfs)

In [None]:
# handle NANs
res = res.dropna()

In [None]:
# store as parquet

if not os.path.isdir('data-input'):
    os.makedirs('data-input')

res.to_parquet('data-input/flirt-'+str(window_length)+'-'+str(window_step_size)+'.parquet')

In [None]:
res