# Extracting arm swing features
The triaxial accelerometer, triaxial gyroscope, and (boolean) predictions of gait are used as input for extracting features relating to arm swing.

## Modules

In [1]:
# Automatically reload modules
%load_ext autoreload
%autoreload 2

import numpy as np
import os
import pandas as pd
import tsdf

from dbpd import DataColumns
from dbpd.extracting_features import *

## Constants

In [2]:
# paths and files
path_to_data = '../../../tests/data/'
input_path_imu = os.path.join(path_to_data, '2.preprocessed_data')
output_path = os.path.join(path_to_data, '3.extracted_features')

# general
sensor = 'IMU'
units = 'degrees'
sampling_frequency = 100

time_colname = 'time'
pred_gait_colname = 'pred_gait'
angle_smooth_colname = 'angle_smooth'
angle_colname = 'angle'
velocity_colname = 'velocity'
segment_nr_colname = 'segment_nr'

# windowing
window_type = 'hann'
window_length_s = 3       # seconds
window_overlap_s = 3*0.75    
window_step_size_s = window_length_s - window_overlap_s

l_data_point_level_cols = [
    DataColumns.ACCELERATION_X,
    DataColumns.ACCELERATION_Y,
    DataColumns.ACCELERATION_Z,
    DataColumns.ROTATION_X,
    DataColumns.ROTATION_Y,
    DataColumns.ROTATION_Z,
    f'grav_{DataColumns.ACCELERATION_X}',
    f'grav_{DataColumns.ACCELERATION_Y}',
    f'grav_{DataColumns.ACCELERATION_Z}',
    angle_smooth_colname, 
    velocity_colname
]

# computing power
power_band_low_frequency = 0.3
power_band_high_frequency = 3
power_total_low_frequency = 0
power_total_high_frequency = int(sampling_frequency / 2)

d_frequency_bandwidths = {
    'power_below_gait': [0.3, 0.7],
    'power_gait': [0.7, 3.5],
    'power_tremor': [3.5, 8],
    'power_above_tremor': [8, sampling_frequency]
}

# cepstral coefficients
cc_low_frequency = 0
cc_high_frequency = int(sampling_frequency / 2) 
filter_length = 16
n_dct_filters = 16

## Load data

In [17]:
# load accelerometer and gyroscope data
l_dfs = []
for sensor in ['acceleration', 'rotation']:
    meta_filename = f'{sensor}_meta.json'
    values_filename = f'{sensor}_samples.bin'
    time_filename = f'{sensor}_time.bin'

    metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path_imu, meta_filename))
    metadata_time = metadata_dict[time_filename]
    metadata_samples = metadata_dict[values_filename]
    l_dfs.append(tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns))

df = pd.merge(l_dfs[0], l_dfs[1], on='time')

df.sample(2)

Unnamed: 0,time,acceleration_x,grav_acceleration_x,acceleration_y,grav_acceleration_y,acceleration_z,grav_acceleration_z,rotation_x,rotation_y,rotation_z
51622,516.22,-1.476612,-0.653204,6.919076,0.164059,-4.887333,1.01662,-18.915667,59.970635,-15.670974
32208,322.08,1.41107,-0.013903,5.649538,0.100235,-7.101374,0.045233,4.804845,-1.295598,-0.123681


## Prepare data

In [18]:
# temporary add "random" predictions
df[pred_gait_colname] = np.concatenate([np.repeat([1], df.shape[0]//3), np.repeat([0], df.shape[0]//3), np.repeat([1], df.shape[0] + 1 - 2*df.shape[0]//3)], axis=0)

## Process data

In [19]:
df[velocity_colname] = pca_transform_gyroscope(
    df=df, 
    y_gyro_colname=DataColumns.ROTATION_Y,
    z_gyro_colname=DataColumns.ROTATION_Z,
    pred_gait_colname=pred_gait_colname
)

df[angle_colname] = compute_angle(
    velocity_col=df[velocity_colname],
    time_col=df[time_colname]
)

df[angle_smooth_colname] = remove_moving_average_angle(
    angle_col=df[angle_colname],
    sampling_frequency=sampling_frequency
)

df_segments = create_segments(
    df=df,
    pred_gait_colname=pred_gait_colname,
    time_colname=time_colname,
    sampling_frequency=sampling_frequency,
    window_length_s=window_length_s
)

l_dfs = []
for segment_nr in df_segments[segment_nr_colname].unique():
    df_single_segment = df_segments.loc[df_segments[segment_nr_colname]==segment_nr].copy().reset_index(drop=True)
    l_dfs.append(tabulate_windows(
        df=df_single_segment,
        data_point_level_cols=l_data_point_level_cols,
        window_length_s=window_length_s,
        window_step_size_s=window_step_size_s,
        sampling_frequency=sampling_frequency
        )
    )
df_windowed = pd.concat(l_dfs).reset_index(drop=True)

del df, df_segments

In [20]:
df_windowed['angle_freqs'], df_windowed['angle_fft'] = signal_to_ffts(
    sensor_col=df_windowed[angle_smooth_colname],
    window_type=window_type,
    sampling_frequency=sampling_frequency)

df_windowed['rotation_dominant_frequency'] = df_windowed.apply(
    lambda x: get_dominant_frequency(signal_ffts=x['angle_fft'],
                                     signal_freqs=x['angle_freqs'],
                                     fmin=power_band_low_frequency,
                                     fmax=power_band_high_frequency
                                     ), axis=1
)

df_windowed = df_windowed.drop(columns=['angle_fft', 'angle_freqs'])

df_windowed['angle_perc_power'] = df_windowed[angle_smooth_colname].apply(
    lambda x: compute_perc_power(
        sensor_col=x,
        fmin_band=power_band_low_frequency,
        fmax_band=power_band_high_frequency,
        fmin_total=power_total_low_frequency,
        fmax_total=power_total_high_frequency,
        sampling_frequency=sampling_frequency,
        window_type=window_type
        )
)

# note to eScience: why are the columns 'angle_new_minima', 'angle_new_maxima', 
# 'angle_minima_deleted' and 'angle_maxima deleted' created here? Should a copy
# of 'df_windowed' be created inside 'extract_angle_extremes' to prevent this from
# happening?
extract_angle_extremes(
    df=df_windowed,
    smooth_angle_colname=angle_smooth_colname,
    dominant_frequency_colname='rotation_dominant_frequency',
    sampling_frequency=sampling_frequency
)

df_windowed = df_windowed.drop(columns=[angle_smooth_colname])

df_windowed['angle_amplitudes'] = extract_range_of_motion(
    angle_extrema_values_col=df_windowed['angle_extrema_values']
)

df_windowed = df_windowed.drop(columns=['angle_extrema_values'])

df_windowed['range_of_motion'] = df_windowed['angle_amplitudes'].apply(lambda x: np.mean(x) if len(x) > 0 else 0).replace(np.nan, 0)

df_windowed = df_windowed.drop(columns=['angle_amplitudes'])

extract_peak_angular_velocity(
    df=df_windowed,
    velocity_colname=velocity_colname,
    angle_minima_colname='angle_minima',
    angle_maxima_colname='angle_maxima'
)

df_windowed = df_windowed.drop(columns=['angle_minima','angle_maxima', 'angle_new_minima',
                                        'angle_new_maxima', velocity_colname])

for dir in ['forward', 'backward']:
    df_windowed[f'{dir}_peak_ang_vel_mean'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.mean(x) if len(x) > 0 else 0)
    df_windowed[f'{dir}_peak_ang_vel_std'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.std(x) if len(x) > 0 else 0)

    df_windowed = df_windowed.drop(columns=[f'{dir}_peak_ang_vel'])

In [21]:
df_windowed['std_norm_acc'] = generate_std_norm(
    df=df_windowed,
    cols=[DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z]
)

for col in [x for x in df_windowed.columns if '_grav' in x]:
    for stat in ['mean', 'std']:
        df_windowed[f'{col}_{stat}'] = generate_statistics(
            sensor_col=df_windowed[col],
            statistic=stat
        )

for col in [DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z,
            DataColumns.ROTATION_X, DataColumns.ROTATION_Y, DataColumns.ROTATION_Z]:
    df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
        sensor_col=df_windowed[col],
        window_type=window_type,
        sampling_frequency=sampling_frequency
    )

    for bandwidth, frequencies in d_frequency_bandwidths.items():
        df_windowed[col+'_'+bandwidth] = df_windowed[col].apply(
            lambda x: compute_power_in_bandwidth(
                sensor_col=x,
                fmin=frequencies[0],
                fmax=frequencies[1],
                sampling_frequency=sampling_frequency,
                window_type=window_type,
                )
            )

    # dominant frequency
    df_windowed[col+'_dominant_frequency'] = df_windowed.apply(
        lambda x: get_dominant_frequency(
            signal_ffts=x[col+'_fft'], 
            signal_freqs=x[col+'_freqs'],
            fmin=power_total_low_frequency,
            fmax=power_total_high_frequency
        ), axis=1
    )

# cepstral coefficients
for sensor in ['acceleration', 'rotation']:
    if sensor == 'acceleration':
        fft_cols = [f'{col}_fft' for col in [DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z]]
    else:
        fft_cols = [f'{col}_fft' for col in [DataColumns.ROTATION_X, DataColumns.ROTATION_Y, DataColumns.ROTATION_Z]]

    df_windowed['total_power'] = compute_power(
        df=df_windowed,
        fft_cols=fft_cols
    )

    cc_cols = generate_cepstral_coefficients(
        total_power_col=df_windowed['total_power'],
        window_length_s=window_length_s,
        sampling_frequency=sampling_frequency,
        low_frequency=power_total_low_frequency,
        high_frequency=power_total_high_frequency,
        filter_length=filter_length,
        n_dct_filters=n_dct_filters
    )

    df_windowed = pd.concat([df_windowed, cc_cols], axis=1)

    for i in range(n_dct_filters):
        df_windowed = df_windowed.rename(columns={f'cc_{i+1}': f'cc_{i+1}_{sensor}'})

In [22]:
l_drop_cols = [DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z,
               DataColumns.ROTATION_X, DataColumns.ROTATION_Y, DataColumns.ROTATION_Z,
              f'grav_{DataColumns.ACCELERATION_X}', f'grav_{DataColumns.ACCELERATION_Y}', f'grav_{DataColumns.ACCELERATION_Z}',
              f'{DataColumns.ACCELERATION_X}_fft', f'{DataColumns.ACCELERATION_Y}_fft', f'{DataColumns.ACCELERATION_Z}_fft',
              f'{DataColumns.ROTATION_X}_fft', f'{DataColumns.ROTATION_Y}_fft', f'{DataColumns.ROTATION_Z}_fft',
              f'{DataColumns.ACCELERATION_X}_freqs', f'{DataColumns.ACCELERATION_Y}_freqs', f'{DataColumns.ACCELERATION_Z}_freqs',
              f'{DataColumns.ROTATION_X}_freqs', f'{DataColumns.ROTATION_Y}_freqs', f'{DataColumns.ROTATION_Z}_freqs',
              f'{DataColumns.ACCELERATION_X}_fft_power', f'{DataColumns.ACCELERATION_Y}_fft_power', f'{DataColumns.ACCELERATION_Z}_fft_power',
                f'{DataColumns.ROTATION_X}_fft_power', f'{DataColumns.ROTATION_Y}_fft_power', f'{DataColumns.ROTATION_Z}_fft_power',
              'total_power', 'rotation_dominant_frequency', 'window_nr', 'window_end']

df_windowed = df_windowed.drop(columns=l_drop_cols).rename(columns={'window_start': 'time'})

## Store data

In [23]:
df_windowed

Unnamed: 0,time,angle_perc_power,range_of_motion,forward_peak_ang_vel_mean,forward_peak_ang_vel_std,backward_peak_ang_vel_mean,backward_peak_ang_vel_std,std_norm_acc,acceleration_x_power_below_gait,acceleration_x_power_gait,...,cc_7_rotation,cc_8_rotation,cc_9_rotation,cc_10_rotation,cc_11_rotation,cc_12_rotation,cc_13_rotation,cc_14_rotation,cc_15_rotation,cc_16_rotation
0,0.00,0.937852,2.875672,8.193249,3.292503,4.938885,0.000000,3.726680,-0.187134,-1.189321,...,0.914803,3.981584,2.594423,2.683692,1.498986,0.654959,-1.101154,0.551201,-0.356018,0.606395
1,0.75,0.992215,4.295439,3.288568,1.612178,4.401343,0.000000,2.777449,-0.348174,-1.449448,...,2.565799,4.660158,1.949670,2.904717,1.364006,-0.064123,-2.094619,0.494000,1.354037,1.760634
2,1.50,0.998080,5.816519,6.290015,4.613625,4.731876,0.330533,1.260102,-0.971159,-2.321490,...,0.235606,2.441842,-0.615496,0.256531,-0.636605,-0.584744,-1.123548,0.788872,1.805123,2.431365
3,2.25,0.995942,5.633405,8.871174,5.245486,29.831658,24.769250,0.855139,-1.412012,-2.455470,...,1.108246,2.516428,-0.244773,-0.303749,-1.228367,0.683459,0.583634,0.778679,-0.961984,-0.471968
4,3.00,0.989044,6.189380,12.468567,1.564927,29.990825,20.225260,0.832562,-1.171041,-2.072287,...,2.785089,2.388961,0.133439,1.060614,-0.328700,0.840093,0.580146,0.398457,-2.219431,-1.344242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637,237.00,1.030471,0.000000,0.000000,0.000000,0.000000,0.000000,0.001182,-2.684251,-3.676117,...,1.191237,1.187859,-0.263366,-0.753408,-2.391607,0.750026,0.247475,-0.256464,-0.610907,-1.153230
638,237.75,1.060099,0.000000,0.000000,0.000000,0.000000,0.000000,0.001715,-2.930411,-3.821838,...,0.954391,1.218720,-0.449553,-0.823881,0.441474,0.067401,0.211490,-1.218516,-1.168574,-2.053878
639,238.50,1.044924,0.000000,0.000000,0.000000,0.000000,0.000000,0.002464,-2.979956,-3.907675,...,0.679442,1.953856,2.460654,-2.605725,0.935960,0.805159,0.440133,-0.649235,-1.186132,-1.798486
640,239.25,1.036011,0.000000,0.000000,0.000000,0.000000,0.000000,0.001779,-3.201211,-4.160213,...,1.652074,1.733244,2.969968,-2.553594,-0.073225,2.213804,0.141743,-0.575044,-0.916464,-0.553480
