# Extracting arm swing features
The triaxial accelerometer, triaxial gyroscope, and (boolean) predictions of gait are used as input for extracting features relating to arm swing.

## Modules

In [1]:
# Automatically reload modules
%load_ext autoreload
%autoreload 2

import numpy as np
import os
import pandas as pd
import tsdf

from dbpd import DataColumns
from dbpd.feature_extraction import *
from dbpd.windowing import *
from dbpd.gait_analysis_config import ArmSwingFeatureExtractionConfig
from dbpd.util import get_end_iso8601, write_data

## Constants

In [2]:
# Cell has the tag 'parameters'

# paths and files
path_to_data = '../../../tests/data/'
input_path = os.path.join(path_to_data, '2.preprocessed_data', 'gait')
output_path = os.path.join(path_to_data, '3.extracted_features', 'gait')

In [3]:
config = ArmSwingFeatureExtractionConfig()

## Load data

In [4]:
# load accelerometer and gyroscope data
l_dfs = []
for sensor in ['accelerometer', 'gyroscope']:
    meta_filename = f'{sensor}_meta.json'
    values_filename = f'{sensor}_samples.bin'
    time_filename = f'{sensor}_time.bin'

    metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path, meta_filename))
    metadata_time = metadata_dict[time_filename]
    metadata_samples = metadata_dict[values_filename]
    l_dfs.append(tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns))

df = pd.merge(l_dfs[0], l_dfs[1], on='time')

df.head(2)

Unnamed: 0,time,accelerometer_x,grav_accelerometer_x,accelerometer_y,grav_accelerometer_y,accelerometer_z,grav_accelerometer_z,gyroscope_x,gyroscope_y,gyroscope_z
0,0.0,-0.53732,-4.24003e-09,0.560195,4.420535e-09,0.267026,2.107122e-09,-115.670732,-32.012195,26.097561
1,0.01,-0.496498,-3.783814e-08,0.580556,3.994549e-08,0.235247,1.871333e-08,-110.636301,-34.62471,24.701537


## Prepare data

In [5]:
# temporary add "random" predictions
df[config.pred_gait_colname] = np.concatenate([np.repeat([1], df.shape[0]//3), np.repeat([0], df.shape[0]//3), np.repeat([1], df.shape[0] + 1 - 2*df.shape[0]//3)], axis=0)

## Process data

In [6]:
df[config.velocity_colname] = pca_transform_gyroscope(
    df=df, 
    y_gyro_colname=DataColumns.GYROSCOPE_Y,
    z_gyro_colname=DataColumns.GYROSCOPE_Z,
    pred_gait_colname=config.pred_gait_colname
)

df[config.angle_colname] = compute_angle(
    velocity_col=df[config.velocity_colname],
    time_col=df[config.time_colname]
)

df[config.angle_smooth_colname] = remove_moving_average_angle(
    angle_col=df[config.angle_colname],
    sampling_frequency=config.sampling_frequency
)

df = df.loc[df[config.pred_gait_colname]==1].reset_index(drop=True)

df_segments = create_segments(
    df=df,
    time_colname=config.time_colname,
    segment_nr_colname='segment_nr',
    minimum_gap_s=3
)

df_segments = discard_segments(
    df=df_segments,
    time_colname=config.time_colname,
    segment_nr_colname='segment_nr',
    minimum_segment_length_s=3
)

l_dfs = []
for segment_nr in df_segments[config.segment_nr_colname].unique():
    df_single_segment = df_segments.loc[df_segments[config.segment_nr_colname]==segment_nr].copy().reset_index(drop=True)
    l_dfs.append(tabulate_windows(
        df=df_single_segment,
        time_column_name=config.time_colname,
        segment_nr_colname=config.segment_nr_colname,
        data_point_level_cols=config.l_data_point_level_cols,
        window_length_s=config.window_length_s,
        window_step_size_s=config.window_step_size_s,
        segment_nr=segment_nr,
        sampling_frequency=config.sampling_frequency,
        )
    )
df_windowed = pd.concat(l_dfs).reset_index(drop=True)

del df, df_segments

In [7]:
df_windowed['angle_freqs'], df_windowed['angle_fft'] = signal_to_ffts(
    sensor_col=df_windowed[config.angle_smooth_colname],
    window_type=config.window_type,
    sampling_frequency=config.sampling_frequency)

df_windowed['gyroscope_dominant_frequency'] = df_windowed.apply(
    lambda x: get_dominant_frequency(signal_ffts=x['angle_fft'],
                                     signal_freqs=x['angle_freqs'],
                                     fmin=config.power_band_low_frequency,
                                     fmax=config.power_band_high_frequency
                                     ), axis=1
)

df_windowed = df_windowed.drop(columns=['angle_fft', 'angle_freqs'])

df_windowed['angle_perc_power'] = df_windowed[config.angle_smooth_colname].apply(
    lambda x: compute_perc_power(
        sensor_col=x,
        fmin_band=config.power_band_low_frequency,
        fmax_band=config.power_band_high_frequency,
        fmin_total=config.power_total_low_frequency,
        fmax_total=config.power_total_high_frequency,
        sampling_frequency=config.sampling_frequency,
        window_type=config.window_type
        )
)

# note to eScience: why are the columns 'angle_new_minima', 'angle_new_maxima', 
# 'angle_minima_deleted' and 'angle_maxima deleted' created here? Should a copy
# of 'df_windowed' be created inside 'extract_angle_extremes' to prevent this from
# happening?
extract_angle_extremes(
    df=df_windowed,
    angle_colname=config.angle_smooth_colname,
    dominant_frequency_colname='gyroscope_dominant_frequency',
    sampling_frequency=config.sampling_frequency
)

df_windowed = df_windowed.drop(columns=[config.angle_smooth_colname])

df_windowed['angle_amplitudes'] = extract_range_of_motion(
    angle_extrema_values_col=df_windowed['angle_extrema_values']
)

df_windowed = df_windowed.drop(columns=['angle_extrema_values'])

df_windowed['range_of_motion'] = df_windowed['angle_amplitudes'].apply(lambda x: np.mean(x) if len(x) > 0 else 0).replace(np.nan, 0)

df_windowed = df_windowed.drop(columns=['angle_amplitudes'])

extract_peak_angular_velocity(
    df=df_windowed,
    velocity_colname=config.velocity_colname,
    angle_minima_colname='angle_minima',
    angle_maxima_colname='angle_maxima'
)

df_windowed = df_windowed.drop(columns=['angle_minima','angle_maxima', 'angle_new_minima',
                                        'angle_new_maxima', config.velocity_colname])

for dir in ['forward', 'backward']:
    df_windowed[f'{dir}_peak_ang_vel_mean'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.mean(x) if len(x) > 0 else 0)
    df_windowed[f'{dir}_peak_ang_vel_std'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.std(x) if len(x) > 0 else 0)

    df_windowed = df_windowed.drop(columns=[f'{dir}_peak_ang_vel'])

In [8]:
df_windowed['std_norm_acc'] = generate_std_norm(
    df=df_windowed,
    cols=[DataColumns.ACCELEROMETER_X, DataColumns.ACCELEROMETER_Y, DataColumns.ACCELEROMETER_Z]
)

for col in [x for x in df_windowed.columns if 'grav' in x]:
    for stat in ['mean', 'std']:
        df_windowed[f'{col}_{stat}'] = generate_statistics(
            sensor_col=df_windowed[col],
            statistic=stat
        )

for col in [DataColumns.ACCELEROMETER_X, DataColumns.ACCELEROMETER_Y, DataColumns.ACCELEROMETER_Z,
            DataColumns.GYROSCOPE_X, DataColumns.GYROSCOPE_Y, DataColumns.GYROSCOPE_Z]:
    df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
        sensor_col=df_windowed[col],
        window_type=config.window_type,
        sampling_frequency=config.sampling_frequency
    )

    for bandwidth, frequencies in config.d_frequency_bandwidths.items():
        df_windowed[col+'_'+bandwidth] = df_windowed[col].apply(
            lambda x: compute_power_in_bandwidth(
                sensor_col=x,
                fmin=frequencies[0],
                fmax=frequencies[1],
                sampling_frequency=config.sampling_frequency,
                window_type=config.window_type,
                )
            )

    # dominant frequency
    df_windowed[col+'_dominant_frequency'] = df_windowed.apply(
        lambda x: get_dominant_frequency(
            signal_ffts=x[col+'_fft'], 
            signal_freqs=x[col+'_freqs'],
            fmin=config.power_total_low_frequency,
            fmax=config.power_total_high_frequency
        ), axis=1
    )

# cepstral coefficients
for sensor in ['accelerometer', 'gyroscope']:
    if sensor == 'accelerometer':
        fft_cols = [f'{col}_fft' for col in [DataColumns.ACCELEROMETER_X, DataColumns.ACCELEROMETER_Y, DataColumns.ACCELEROMETER_Z]]
    else:
        fft_cols = [f'{col}_fft' for col in [DataColumns.GYROSCOPE_X, DataColumns.GYROSCOPE_Y, DataColumns.GYROSCOPE_Z]]

    df_windowed['total_power'] = compute_power(
        df=df_windowed,
        fft_cols=fft_cols
    )

    cc_cols = generate_cepstral_coefficients(
        total_power_col=df_windowed['total_power'],
        window_length_s=config.window_length_s,
        sampling_frequency=config.sampling_frequency,
        low_frequency=config.power_total_low_frequency,
        high_frequency=config.power_total_high_frequency,
        filter_length=config.filter_length,
        n_dct_filters=config.n_dct_filters
    )

    df_windowed = pd.concat([df_windowed, cc_cols], axis=1)

    for i in range(config.n_dct_filters):
        df_windowed = df_windowed.rename(columns={f'cc_{i+1}': f'cc_{i+1}_{sensor}'})

In [9]:
l_drop_cols = [DataColumns.ACCELEROMETER_X, DataColumns.ACCELEROMETER_Y, DataColumns.ACCELEROMETER_Z,
               DataColumns.GYROSCOPE_X, DataColumns.GYROSCOPE_Y, DataColumns.GYROSCOPE_Z,
              f'grav_{DataColumns.ACCELEROMETER_X}', f'grav_{DataColumns.ACCELEROMETER_Y}', f'grav_{DataColumns.ACCELEROMETER_Z}',
              f'{DataColumns.ACCELEROMETER_X}_fft', f'{DataColumns.ACCELEROMETER_Y}_fft', f'{DataColumns.ACCELEROMETER_Z}_fft',
              f'{DataColumns.GYROSCOPE_X}_fft', f'{DataColumns.GYROSCOPE_Y}_fft', f'{DataColumns.GYROSCOPE_Z}_fft',
              f'{DataColumns.ACCELEROMETER_X}_freqs', f'{DataColumns.ACCELEROMETER_Y}_freqs', f'{DataColumns.ACCELEROMETER_Z}_freqs',
              f'{DataColumns.GYROSCOPE_X}_freqs', f'{DataColumns.GYROSCOPE_Y}_freqs', f'{DataColumns.GYROSCOPE_Z}_freqs',
              f'{DataColumns.ACCELEROMETER_X}_fft_power', f'{DataColumns.ACCELEROMETER_Y}_fft_power', f'{DataColumns.ACCELEROMETER_Z}_fft_power',
                f'{DataColumns.GYROSCOPE_X}_fft_power', f'{DataColumns.GYROSCOPE_Y}_fft_power', f'{DataColumns.GYROSCOPE_Z}_fft_power',
              'total_power', 'gyroscope_dominant_frequency', 'window_nr', 'window_end']

df_windowed = df_windowed.drop(columns=l_drop_cols).rename(columns={'window_start': 'time'})

## Store data

In [10]:
from dateutil import parser
import datetime

end_iso8601 = get_end_iso8601(metadata_samples.start_iso8601, 
                              df_windowed['time'][-1:].values[0] + config.window_length_s)

metadata_samples.__setattr__('end_iso8601', end_iso8601)
metadata_samples.__setattr__('file_name', 'arm_swing_values.bin')
metadata_samples.__setattr__('file_dir_path', output_path)
metadata_time.__setattr__('end_iso8601', end_iso8601)
metadata_time.__setattr__('file_name', 'arm_swing_time.bin')
metadata_time.__setattr__('file_dir_path', output_path)

metadata_samples.__setattr__('channels', list(config.d_channels_values.keys()))
metadata_samples.__setattr__('units', list(config.d_channels_values.values()))
metadata_samples.__setattr__('data_type', np.float32)
metadata_samples.__setattr__('bits', 32)

metadata_time.__setattr__('channels', ['time'])
metadata_time.__setattr__('units', ['relative_time_ms'])
metadata_time.__setattr__('data_type', np.int32)
metadata_time.__setattr__('bits', 32)

In [11]:
write_data(metadata_time, metadata_samples, output_path, 'arm_swing_meta.json', df_windowed)