# Gait detection
This notebook shows how to predict gait probability using extracted features from windowed accelerometry data as input.

## Modules

In [39]:
import numpy as np
import os
import pandas as pd
import tsdf

from dbpd import DataColumns

## Constants

In [40]:
# Cell has the tag 'parameters'

path_to_data_folder = '../../../tests/data'
path_to_classifier_input = os.path.join(path_to_data_folder, '0.classifiers')
path_to_input_data = os.path.join(path_to_data_folder, '3.extracted_features')
path_output = os.path.join(path_to_data_folder, '4.predictions')

classifier_file_name = 'gd_classifier.pkl'
thresholds_file_name = 'gd_thresholds.txt'

meta_filename = 'gait_meta.json'
time_filename = 'gait_time.bin'
values_filename = 'gait_values.bin'

l_accel_cols = [DataColumns.ACCELERATION_X, DataColumns.ACCELERATION_Y, DataColumns.ACCELERATION_Z]

## Load data

#### Features

In [41]:
metadata_dict = tsdf.load_metadata_from_path(os.path.join(path_to_input_data, meta_filename))
metadata_time = metadata_dict[time_filename]
metadata_samples = metadata_dict[values_filename]

df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

df.head(2)

Unnamed: 0,time,grav_acceleration_x_mean,grav_acceleration_y_mean,grav_acceleration_z_mean,grav_acceleration_x_std,grav_acceleration_y_std,grav_acceleration_z_std,acceleration_x_power_below_gait,acceleration_y_power_below_gait,acceleration_z_power_below_gait,...,cc_7_acc,cc_8_acc,cc_9_acc,cc_10_acc,cc_11_acc,cc_12_acc,cc_13_acc,cc_14_acc,cc_15_acc,cc_16_acc
0,0.0,-0.425107,0.374834,0.134392,0.223998,0.21767,0.39587,-2.687014,-1.57629,-1.272932,...,5.88732,7.57728,3.188775,2.399377,2.6762,1.820231,1.488211,3.372005,3.030807,2.305803
1,1.0,-0.488027,0.466546,0.228711,0.139895,0.162045,0.419399,-2.754152,-1.540268,-1.353925,...,8.533753,5.603583,1.312436,0.109606,2.583266,3.1,3.168906,3.084722,1.585254,2.040236


#### Classifier

In [42]:
clf = pd.read_pickle(os.path.join(path_to_classifier_input, classifier_file_name))

with open(os.path.join(path_to_classifier_input, thresholds_file_name), 'r') as f:
    thresholds_str = f.read()

threshold = np.mean([float(x) for x in thresholds_str.split(' ')])

## Prepare data

In [43]:
clf.feature_names_in_ = [f'{x}_power_below_gait' for x in l_accel_cols] + \
                        [f'{x}_power_gait' for x in l_accel_cols] + \
                        [f'{x}_power_tremor' for x in l_accel_cols] + \
                        [f'{x}_power_above_tremor' for x in l_accel_cols] + \
                        ['std_norm_acc'] + [f'cc_{i}_acc' for i in range(1, 17)] + [f'grav_{x}_{y}' for x in l_accel_cols for y in ['mean', 'std']] + \
                        [f'{x}_dominant_frequency' for x in l_accel_cols]

X = df.loc[:, clf.feature_names_in_]

## Predict gait

In [44]:
df['pred_gait_proba'] = clf.predict_proba(X)[:, 1]
df['pred_gait'] = df['pred_gait_proba'] > threshold

## Store predictions

In [50]:
metadata_samples.__setattr__('file_name', 'gait_values.bin')
metadata_samples.__setattr__('file_dir_path', path_output)
metadata_time.__setattr__('file_name', 'gait_time.bin')
metadata_time.__setattr__('file_dir_path', path_output)

metadata_samples.__setattr__('channels', ['pred_gait_proba'])
metadata_samples.__setattr__('units', ['probability'])

metadata_time.__setattr__('channels', ['time'])
metadata_time.__setattr__('units', ['s'])
metadata_time.__setattr__('data_type', np.int64)

In [51]:
if not os.path.exists(path_output):
    os.makedirs(path_output)

# store binaries and metadata
tsdf.write_dataframe_to_binaries(path_output, df, [metadata_time, metadata_samples])
tsdf.write_metadata([metadata_time, metadata_samples], 'gait_meta.json')