# Extracting gait features
Here we use the preprocessed accelerometer data as input, create time windows and extract relevant features for the subsequent task of gait detection. 

## Modules

In [33]:
import math
import os
import pandas as pd
import tsdf

## Constants

In [36]:
# tsdf files
subject = '0A0B82C94960D6DCABC1F597EC0BA657F4B0EDC320702BCEE3B6955CE924DE05'
sensor = 'IMU'
week_nr = '104'
segment_nr = '0001'

input_path = os.path.join(r'C:\Users\erik_\Documents\PhD\data\ppp\preprocessed\test_data\1.input', subject, sensor)

base_filename = f'WatchData.{sensor}.Week{week_nr}.raw_segment{segment_nr}'
meta_filename = f'{base_filename}_meta.json'
values_filename = f'{base_filename}_samples.bin'
time_filename = f'{base_filename}_time.bin'
quality_filename = f'{base_filename}_quality.bin'

# windowing and feature engineering
window_type = 'hann'
verbose = 0

downsampled_frequency = 100 # Hz
window_length = 6 * downsampled_frequency # 6 seconds
window_step_size = 1 * downsampled_frequency # 1 second

# cepstral coefficients
low_frequency = 0 # Hz
high_frequency = int(downsampled_frequency / 2) # Hz
filter_length = high_frequency - 1 # here we assume a 1 Hz gait common frequency with equally-spaced harmonics
n_dct_filters = 16 # number of cepstral coefficients
filter_lenth = 16 # length of the filter

d_frequency_bandwidths = {
    'power_below_gait': [0.3, 0.7],
    'power_gait': [0.7, 3.5],
    'power_tremor': [3.5, 8],
    'power_above_tremor': [8, downsampled_frequency]
}

l_window_level_cols = ['id', 'window_nr', 'window_start', 'window_end']

# Functions

In [37]:
def create_window(df: pd.DataFrame,
                  window_nr: int,
                  lower_index: int,
                  upper_index: int,
                  data_point_level_cols: list
                  ):
    """Transforms (a subset of) a dataframe into a single row

    Parameters
    ----------
    df: pd.DataFrame
        The original dataframe to be windowed
    window_nr: int
        The identification of the window
    lower_index: int
        The dataframe index of the first sample to be windowed
    upper_index: int
        The dataframe index of the final sample to be windowed
    data_point_level_cols: list
        The columns in sensor_df that are to be kept as individual datapoints in a list instead of aggregates

    Returns
    -------
    l_subset_squeezed: list
        Rows corresponding to single windows
    """
    df_subset = df.loc[lower_index:upper_index, data_point_level_cols].copy()
    l_subset_squeezed = [window_nr+1, lower_index, upper_index] + df_subset.values.T.tolist()

    return l_subset_squeezed


def tabulate_windows(df: pd.DataFrame,
                     window_length: int,
                     window_step_size: int,
                     data_point_level_cols: list,
                    ):
    """Compiles multiple windows into a single dataframe

    Parameters
    ----------
    df: pd.DataFrame
        The original dataframe to be windowed
    window_length: int
        The number of samples a window constitutes
    window_step_size: int
        The number of samples between the start of the previous and the start of the next window
    data_point_level_cols: list
        The columns in sensor_df that are to be kept as individual datapoints in a list instead of aggregates

    Returns
    -------
    df_windows: pd.DataFrame
        Dataframe with each row corresponding to an individual window
    """

    df = df.reset_index(drop=True)

    if window_step_size <= 0:
        raise Exception("Step size should be larger than 0.")
    if window_length > df.shape[0]:
        return 

    l_windows = []
    n_windows = math.floor(
        (df.shape[0] - window_length) / 
         window_step_size
        ) + 1

    for window_nr in range(n_windows):
        lower = window_nr * window_step_size
        upper = window_nr * window_step_size + window_length - 1
        l_windows.append(create_window(df, window_nr, lower, upper, data_point_level_cols))

    df_windows = pd.DataFrame(l_windows, columns=['window_nr', 'window_start', 'window_end'] + data_point_level_cols)
            
    return df_windows.reset_index(drop=True)

## Load data

In [38]:
metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path, meta_filename))
metadata_time = metadata_dict[time_filename]
metadata_samples = metadata_dict[values_filename]

df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

df.sample(2)

Unnamed: 0,time,acceleration_x,acceleration_y,acceleration_z,rotation_x,rotation_y,rotation_z
32759,10.0,297,1189,-1531,1,-27,-19
71801,10.040039,-509,-1052,-1698,-6,-12,-26


## Preprocess accelerometer data