# Extracting arm swing features
The triaxial accelerometer, triaxial gyroscope, and (boolean) predictions of gait are used as input for extracting features relating to arm swing.

## Modules

In [19]:
# Automatically reload modules
%load_ext autoreload
%autoreload 2

import numpy as np
import os
import pandas as pd
import tsdf

from dbpd import DataColumns
from dbpd.extracting_features import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Constants

In [20]:
# paths and files
path_to_data = '../../../tests/data/'
input_path_imu = os.path.join(path_to_data, '2.preprocessed_data', 'gait')
output_path = os.path.join(path_to_data, '3.extracted_features', 'gait')

# general
sensor = 'IMU'
units = 'degrees'
sampling_frequency = 100

time_colname = 'time'
pred_gait_colname = 'pred_gait'
angle_smooth_colname = 'angle_smooth'
angle_colname = 'angle'
velocity_colname = 'velocity'
segment_nr_colname = 'segment_nr'

# windowing
window_type = 'hann'
window_length_s = 3       # seconds
window_overlap_s = 3*0.75    
window_step_size_s = window_length_s - window_overlap_s

l_data_point_level_cols = [
    DataColumns.ACCELERATION_X,
    DataColumns.ACCELERATION_Y,
    DataColumns.ACCELERATION_Z,
    DataColumns.ROTATION_X,
    DataColumns.ROTATION_Y,
    DataColumns.ROTATION_Z,
    f'grav_{DataColumns.ACCELERATION_X}',
    f'grav_{DataColumns.ACCELERATION_Y}',
    f'grav_{DataColumns.ACCELERATION_Z}',
    angle_smooth_colname, 
    velocity_colname
]

# computing power
power_band_low_frequency = 0.3
power_band_high_frequency = 3
power_total_low_frequency = 0
power_total_high_frequency = int(sampling_frequency / 2)

d_frequency_bandwidths = {
    'power_below_gait': [0.3, 0.7],
    'power_gait': [0.7, 3.5],
    'power_tremor': [3.5, 8],
    'power_above_tremor': [8, sampling_frequency]
}

# cepstral coefficients
cc_low_frequency = 0
cc_high_frequency = int(sampling_frequency / 2) 
filter_length = 16
n_dct_filters = 16

d_channels_values = {
    'angle_perc_power': 'proportion',
    'range_of_motion': 'deg',
    'forward_peak_ang_vel_mean': 'deg/s',
    'forward_peak_ang_vel_std': 'deg/s',
    'backward_peak_ang_vel_mean': 'deg/s',
    'backward_peak_ang_vel_std': 'deg/s',
    'std_norm_acc': 'g',
    'grav_acceleration_x_mean': 'g',
    'grav_acceleration_x_std': 'g',
    'grav_acceleration_y_mean': 'g',
    'grav_acceleration_y_std': 'g',
    'grav_acceleration_z_mean': 'g',
    'grav_acceleration_z_std': 'g',
    'acceleration_x_power_below_gait': 'X', 
    'acceleration_x_power_gait': 'X',
    'acceleration_x_power_tremor': 'X',
    'acceleration_x_power_above_tremor': 'X',
    'acceleration_x_dominant_frequency': 'Hz',
    'acceleration_y_power_below_gait': 'X',
    'acceleration_y_power_gait': 'X',
    'acceleration_y_power_tremor': 'X',
    'acceleration_y_power_above_tremor': 'X',
    'acceleration_y_dominant_frequency': 'Hz',
    'acceleration_z_power_below_gait': 'X',
    'acceleration_z_power_gait': 'X',
    'acceleration_z_power_tremor': 'X',
    'acceleration_z_power_above_tremor': 'X',
    'acceleration_z_dominant_frequency': 'Hz',
    'rotation_x_dominant_frequency': 'Hz',
    'rotation_y_dominant_frequency': 'Hz',
    'rotation_z_dominant_frequency': 'Hz',
    'cc_1_acceleration': 'X',
    'cc_2_acceleration': 'X',
    'cc_3_acceleration': 'X',
    'cc_4_acceleration': 'X',
    'cc_5_acceleration': 'X',
    'cc_6_acceleration': 'X',
    'cc_7_acceleration': 'X',
    'cc_8_acceleration': 'X',
    'cc_9_acceleration': 'X',
    'cc_10_acceleration': 'X',
    'cc_11_acceleration': 'X',
    'cc_12_acceleration': 'X',
    'cc_13_acceleration': 'X',
    'cc_14_acceleration': 'X',
    'cc_15_acceleration': 'X',
    'cc_16_acceleration': 'X',
    'cc_1_rotation': 'X',
    'cc_2_rotation': 'X',
    'cc_3_rotation': 'X',
    'cc_4_rotation': 'X',
    'cc_5_rotation': 'X',
    'cc_6_rotation': 'X',
    'cc_7_rotation': 'X',
    'cc_8_rotation': 'X',
    'cc_9_rotation': 'X',
    'cc_10_rotation': 'X',
    'cc_11_rotation': 'X',
    'cc_12_rotation': 'X',
    'cc_13_rotation': 'X',
    'cc_14_rotation': 'X',
    'cc_15_rotation': 'X',
    'cc_16_rotation': 'X'
}

## Load data

In [21]:
# load accelerometer and gyroscope data
l_dfs = []
for sensor in ['acceleration', 'rotation']:
    meta_filename = f'{sensor}_meta.json'
    values_filename = f'{sensor}_samples.bin'
    time_filename = f'{sensor}_time.bin'

    metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path_imu, meta_filename))
    metadata_time = metadata_dict[time_filename]
    metadata_samples = metadata_dict[values_filename]
    l_dfs.append(tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns))

df = pd.merge(l_dfs[0], l_dfs[1], on='time')

df.head(2)

Unnamed: 0,time,acceleration_x,grav_acceleration_x,acceleration_y,grav_acceleration_y,acceleration_z,grav_acceleration_z,rotation_x,rotation_y,rotation_z
0,0.0,-0.53732,-4.24003e-09,0.560195,4.420535e-09,0.267026,2.107122e-09,-115.670732,-32.012195,26.097561
1,0.01,-0.496498,-3.783814e-08,0.580556,3.994549e-08,0.235247,1.871333e-08,-110.636301,-34.62471,24.701537


## Prepare data

In [22]:
# temporary add "random" predictions
df[pred_gait_colname] = np.concatenate([np.repeat([1], df.shape[0]//3), np.repeat([0], df.shape[0]//3), np.repeat([1], df.shape[0] + 1 - 2*df.shape[0]//3)], axis=0)

## Process data

In [23]:
df[velocity_colname] = pca_transform_gyroscope(
    df=df, 
    y_gyro_colname=DataColumns.ROTATION_Y,
    z_gyro_colname=DataColumns.ROTATION_Z,
    pred_gait_colname=pred_gait_colname
)

df[angle_colname] = compute_angle(
    velocity_col=df[velocity_colname],
    time_col=df[time_colname]
)

df[angle_smooth_colname] = remove_moving_average_angle(
    angle_col=df[angle_colname],
    sampling_frequency=sampling_frequency
)

df = df.loc[df[pred_gait_colname]==1].reset_index(drop=True)

df_segments = create_segments(
    df=df,
    time_colname=time_colname,
    segment_nr_colname='segment_nr',
    minimum_gap_s=3
)

df_segments = discard_segments(
    df=df_segments,
    time_colname=time_colname,
    segment_nr_colname='segment_nr',
    minimum_segment_length_s=3
)

l_dfs = []
for segment_nr in df_segments[segment_nr_colname].unique():
    df_single_segment = df_segments.loc[df_segments[segment_nr_colname]==segment_nr].copy().reset_index(drop=True)
    l_dfs.append(tabulate_windows(
        df=df_single_segment,
        time_column_name=time_colname,
        segment_nr_colname=segment_nr_colname,
        data_point_level_cols=l_data_point_level_cols,
        window_length_s=window_length_s,
        window_step_size_s=window_step_size_s,
        segment_nr=segment_nr,
        sampling_frequency=sampling_frequency,
        )
    )
df_windowed = pd.concat(l_dfs).reset_index(drop=True)

del df, df_segments

In [24]:
df_windowed['angle_freqs'], df_windowed['angle_fft'] = signal_to_ffts(
    sensor_col=df_windowed[angle_smooth_colname],
    window_type=window_type,
    sampling_frequency=sampling_frequency)

df_windowed['rotation_dominant_frequency'] = df_windowed.apply(
    lambda x: get_dominant_frequency(signal_ffts=x['angle_fft'],
                                     signal_freqs=x['angle_freqs'],
                                     fmin=power_band_low_frequency,
                                     fmax=power_band_high_frequency
                                     ), axis=1
)

df_windowed = df_windowed.drop(columns=['angle_fft', 'angle_freqs'])

df_windowed['angle_perc_power'] = df_windowed[angle_smooth_colname].apply(
    lambda x: compute_perc_power(
        sensor_col=x,
        fmin_band=power_band_low_frequency,
        fmax_band=power_band_high_frequency,
        fmin_total=power_total_low_frequency,
        fmax_total=power_total_high_frequency,
        sampling_frequency=sampling_frequency,
        window_type=window_type
        )
)

# note to eScience: why are the columns 'angle_new_minima', 'angle_new_maxima', 
# 'angle_minima_deleted' and 'angle_maxima deleted' created here? Should a copy
# of 'df_windowed' be created inside 'extract_angle_extremes' to prevent this from
# happening?
extract_angle_extremes(
    df=df_windowed,
    smooth_angle_colname=angle_smooth_colname,
    dominant_frequency_colname='rotation_dominant_frequency',
    sampling_frequency=sampling_frequency
)

df_windowed = df_windowed.drop(columns=[angle_smooth_colname])

df_windowed['angle_amplitudes'] = extract_range_of_motion(
    angle_extrema_values_col=df_windowed['angle_extrema_values']
)

df_windowed = df_windowed.drop(columns=['angle_extrema_values'])

df_windowed['range_of_motion'] = df_windowed['angle_amplitudes'].apply(lambda x: np.mean(x) if len(x) > 0 else 0).replace(np.nan, 0)

df_windowed = df_windowed.drop(columns=['angle_amplitudes'])

extract_peak_angular_velocity(
    df=df_windowed,
    velocity_colname=velocity_colname,
    angle_minima_colname='angle_minima',
    angle_maxima_colname='angle_maxima'
)

df_windowed = df_windowed.drop(columns=['angle_minima','angle_maxima', 'angle_new_minima',
                                        'angle_new_maxima', velocity_colname])

for dir in ['forward', 'backward']:
    df_windowed[f'{dir}_peak_ang_vel_mean'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.mean(x) if len(x) > 0 else 0)
    df_windowed[f'{dir}_peak_ang_vel_std'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.std(x) if len(x) > 0 else 0)

    df_windowed = df_windowed.drop(columns=[f'{dir}_peak_ang_vel'])

In [25]:
df_windowed['std_norm_acc'] = generate_std_norm(
    df=df_windowed,
    cols=[DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z]
)

for col in [x for x in df_windowed.columns if 'grav' in x]:
    for stat in ['mean', 'std']:
        df_windowed[f'{col}_{stat}'] = generate_statistics(
            sensor_col=df_windowed[col],
            statistic=stat
        )

for col in [DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z,
            DataColumns.ROTATION_X, DataColumns.ROTATION_Y, DataColumns.ROTATION_Z]:
    df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
        sensor_col=df_windowed[col],
        window_type=window_type,
        sampling_frequency=sampling_frequency
    )

    for bandwidth, frequencies in d_frequency_bandwidths.items():
        df_windowed[col+'_'+bandwidth] = df_windowed[col].apply(
            lambda x: compute_power_in_bandwidth(
                sensor_col=x,
                fmin=frequencies[0],
                fmax=frequencies[1],
                sampling_frequency=sampling_frequency,
                window_type=window_type,
                )
            )

    # dominant frequency
    df_windowed[col+'_dominant_frequency'] = df_windowed.apply(
        lambda x: get_dominant_frequency(
            signal_ffts=x[col+'_fft'], 
            signal_freqs=x[col+'_freqs'],
            fmin=power_total_low_frequency,
            fmax=power_total_high_frequency
        ), axis=1
    )

# cepstral coefficients
for sensor in ['acceleration', 'rotation']:
    if sensor == 'acceleration':
        fft_cols = [f'{col}_fft' for col in [DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z]]
    else:
        fft_cols = [f'{col}_fft' for col in [DataColumns.ROTATION_X, DataColumns.ROTATION_Y, DataColumns.ROTATION_Z]]

    df_windowed['total_power'] = compute_power(
        df=df_windowed,
        fft_cols=fft_cols
    )

    cc_cols = generate_cepstral_coefficients(
        total_power_col=df_windowed['total_power'],
        window_length_s=window_length_s,
        sampling_frequency=sampling_frequency,
        low_frequency=power_total_low_frequency,
        high_frequency=power_total_high_frequency,
        filter_length=filter_length,
        n_dct_filters=n_dct_filters
    )

    df_windowed = pd.concat([df_windowed, cc_cols], axis=1)

    for i in range(n_dct_filters):
        df_windowed = df_windowed.rename(columns={f'cc_{i+1}': f'cc_{i+1}_{sensor}'})

In [26]:
l_drop_cols = [DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z,
               DataColumns.ROTATION_X, DataColumns.ROTATION_Y, DataColumns.ROTATION_Z,
              f'grav_{DataColumns.ACCELERATION_X}', f'grav_{DataColumns.ACCELERATION_Y}', f'grav_{DataColumns.ACCELERATION_Z}',
              f'{DataColumns.ACCELERATION_X}_fft', f'{DataColumns.ACCELERATION_Y}_fft', f'{DataColumns.ACCELERATION_Z}_fft',
              f'{DataColumns.ROTATION_X}_fft', f'{DataColumns.ROTATION_Y}_fft', f'{DataColumns.ROTATION_Z}_fft',
              f'{DataColumns.ACCELERATION_X}_freqs', f'{DataColumns.ACCELERATION_Y}_freqs', f'{DataColumns.ACCELERATION_Z}_freqs',
              f'{DataColumns.ROTATION_X}_freqs', f'{DataColumns.ROTATION_Y}_freqs', f'{DataColumns.ROTATION_Z}_freqs',
              f'{DataColumns.ACCELERATION_X}_fft_power', f'{DataColumns.ACCELERATION_Y}_fft_power', f'{DataColumns.ACCELERATION_Z}_fft_power',
                f'{DataColumns.ROTATION_X}_fft_power', f'{DataColumns.ROTATION_Y}_fft_power', f'{DataColumns.ROTATION_Z}_fft_power',
              'total_power', 'rotation_dominant_frequency', 'window_nr', 'window_end']

df_windowed = df_windowed.drop(columns=l_drop_cols).rename(columns={'window_start': 'time'})

## Store data

In [27]:
from dateutil import parser
import datetime

end_iso8601 = (parser.parse(metadata_samples.start_iso8601) + datetime.timedelta(seconds=int(df_windowed['time'][-1:].values[0] + window_length_s))).strftime('%d-%b-%Y %H:%M:%S') + ' UTC'

metadata_samples.__setattr__('end_iso8601', end_iso8601)
metadata_samples.__setattr__('file_name', 'arm_swing_values.bin')
metadata_samples.__setattr__('file_dir_path', output_path)
metadata_time.__setattr__('end_iso8601', end_iso8601)
metadata_time.__setattr__('file_name', 'arm_swing_time.bin')
metadata_time.__setattr__('file_dir_path', output_path)

metadata_samples.__setattr__('channels', list(d_channels_values.keys()))
metadata_samples.__setattr__('units', list(d_channels_values.values()))
metadata_samples.__setattr__('data_type', np.float32)
metadata_samples.__setattr__('bits', 32)

metadata_time.__setattr__('channels', ['time'])
metadata_time.__setattr__('units', ['relative_time_ms'])
metadata_time.__setattr__('data_type', np.int32)
metadata_time.__setattr__('bits', 32)

In [28]:
if not os.path.exists(output_path):
    os.makedirs(output_path)

# store binaries and metadata
tsdf.write_dataframe_to_binaries(output_path, df_windowed, [metadata_time, metadata_samples])
tsdf.write_metadata([metadata_time, metadata_samples], 'arm_swing_meta.json')