### Data Processing

#### The purpose of this notebook is to read in inintial run data, investigate features, and perform preprocessing for modeling.

In [1]:
import polars as pl

raw_data_path = "../data-raw/run_data_1-16-26.csv"
raw_data = pl.read_csv(raw_data_path)
raw_data.head()

timestamp,accel_x,accel_y,accel_z,speed_mps
f64,f64,f64,f64,f64
1768600000.0,0.202988,-0.571716,-0.608765,0.0
1768600000.0,0.259293,-0.597717,-0.709015,0.0
1768600000.0,0.290527,-0.555283,-0.788055,0.0
1768600000.0,0.174026,-0.39856,-0.739792,0.0
1768600000.0,0.099594,-0.373596,-0.715012,0.0


In [3]:
from gait_analytics.preprocess import process_gait_data
from gait_analytics.visualization import plot_data

df_preprocessed = process_gait_data(raw_data)
plot_data(df_preprocessed)

In [6]:
# normalize features

features_to_scale = ["accel_x", "accel_y", "accel_z", "accel_magnitude_smoothed", "cadence_steps_per_minute"]

df_scaled = df_preprocessed.with_columns([
    ((pl.col(c) - pl.col(c).mean()) / pl.col(c).std()).alias(c)
    for c in features_to_scale
])

In [7]:
df_scaled.head()

timestamp,accel_x,accel_y,accel_z,speed_mps,accel_magnitude,is_step,cadence_steps_per_minute,accel_magnitude_smoothed,target_speed_mps,target_speed_mps_smoothed
f64,f64,f64,f64,f64,f64,bool,f64,f64,f64,f64
1768600000.0,-0.089778,0.07349,-0.005192,2.04,1.06134,False,-0.853403,-0.617078,1.86,1.8302
1768600000.0,0.27938,-0.343743,-0.315802,2.04,1.367709,False,-0.853403,-0.207575,1.86,1.82904
1768600000.0,0.287324,-0.758407,-0.496813,2.04,1.744429,False,-0.853403,0.243512,1.86,1.82788
1768600000.0,-0.386125,-0.848167,-0.864288,2.04,2.225552,False,-0.853403,0.556343,1.86,1.82672
1768600000.0,-1.03806,-0.545426,-1.229081,2.04,2.626112,True,-0.557265,0.667087,1.86,1.82556


In [37]:
df_scaled.write_parquet("../data-processed/run_data_1-16-26_processed.parquet")