### Modeling

#### The purpose of this notebook is to experiment with predictive modeling on pre-processed run data.

In [2]:
import polars as pl

features_path = "../data-processed/run_data_1-16-26_processed.parquet"
features = ["accel_x", "accel_y", "accel_z", "cadence_steps_per_minute", "accel_magnitude_smooth"]
target = "target_speed_meters_per_sec_smooth"

df = pl.read_parquet(features_path).select(features + [target])
df.head()

accel_x,accel_y,accel_z,cadence_steps_per_minute,accel_magnitude_smooth,target_speed_meters_per_sec_smooth
f64,f64,f64,f64,f64,f64
-0.089778,0.07349,-0.005192,-0.853403,-0.617078,1.8302
0.27938,-0.343743,-0.315802,-0.853403,-0.207575,1.82904
0.287324,-0.758407,-0.496813,-0.853403,0.243512,1.82788
-0.386125,-0.848167,-0.864288,-0.853403,0.556343,1.82672
-1.03806,-0.545426,-1.229081,-0.557265,0.667087,1.82556


In [None]:
import polars as pl
import numpy as np

def create_cnn_windows(
    df: pl.DataFrame, 
    features: list, 
    target: str, 
    window_size: int = 100, 
    stride: int = 10
):
    """
    Converts a flat Polars DataFrame into 3D Windows for 1D-CNNs.
    Output Shape: (N_Samples, Num_Features, Window_Size)

    Args:
        df: The processed and scaled DataFrame
        features: List of column names to use as inputs
        target: The name of the target column (speed)
        window_size: Number of time-steps per sequence (e.g., 100 for 2s @ 50Hz)
        stride: How many rows to move the window (1 = heavily overlapping)
    """
    feature_array = df.select(features).to_numpy()
    target_array = df.select(target).to_numpy()

    num_samples = (len(feature_array) - window_size) // stride + 1
    
    # Use strides to create (Samples, Window_Size, Features)
    X = np.lib.stride_tricks.as_strided(
        feature_array,
        shape=(num_samples, window_size, len(features)),
        strides=(
            feature_array.strides[0] * stride, 
            feature_array.strides[0], 
            feature_array.strides[1]
        )
    )

    # TRANSPOSE: Swap axes to get (Samples, Features, Window_Size)
    # This matches PyTorch Conv1d requirements: (N, C, L)
    X = X.transpose(0, 2, 1)

    # Target corresponds to the end of each window
    y = target_array[(window_size - 1) :: stride][:num_samples]

    return X, y

In [16]:
X, y = create_cnn_windows(df, features, target)
print(f"Feature data shape: {X.shape}")
print(f"Target data shape: {y.shape}")

Feature data shape: (1830, 5, 100)
Target data shape: (1830, 1)
