# Modules

In [1]:
from pdathome.constants import global_constants as gc
from pdathome.preprocessing import preprocess_filtering_gait

# Process data

In [3]:
for subject in gc.participant_ids.L_PD_IDS + gc.participant_ids.L_HC_IDS:
    preprocess_filtering_gait(subject)

Time 2024-11-29 11:33:05.401901 - hbv002 - Starting preprocessing filtering gait ...
Time 2024-11-29 11:33:12.937718 - hbv002 - Finished preprocessing filtering gait.
Time 2024-11-29 11:33:13.125996 - hbv012 - Starting preprocessing filtering gait ...
Time 2024-11-29 11:33:21.328181 - hbv012 - Finished preprocessing filtering gait.
Time 2024-11-29 11:33:21.528124 - hbv014 - Starting preprocessing filtering gait ...
Time 2024-11-29 11:33:28.924726 - hbv014 - Finished preprocessing filtering gait.
Time 2024-11-29 11:33:29.101190 - hbv015 - Starting preprocessing filtering gait ...
Time 2024-11-29 11:33:37.111695 - hbv015 - Finished preprocessing filtering gait.
Time 2024-11-29 11:33:37.303838 - hbv016 - Starting preprocessing filtering gait ...
Time 2024-11-29 11:33:44.344427 - hbv016 - Finished preprocessing filtering gait.
Time 2024-11-29 11:33:44.519984 - hbv017 - Starting preprocessing filtering gait ...
Time 2024-11-29 11:33:51.933952 - hbv017 - Finished preprocessing filtering gait

In [None]:
import datetime
import json
import numpy as np
import os
import pandas as pd

from collections import Counter
from scipy.interpolate import CubicSpline

from paradigma.gait.feature_extraction import extract_temporal_domain_features, extract_spectral_domain_features, \
    pca_transform_gyroscope, compute_angle, remove_moving_average_angle, extract_angle_features
from paradigma.gait.gait_analysis_config import GaitFeatureExtractionConfig, ArmActivityFeatureExtractionConfig
from paradigma.imu_preprocessing import butterworth_filter
from paradigma.preprocessing_config import IMUPreprocessingConfig
from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments

from pdathome.constants import global_constants as gc, mappings as mp
from pdathome.load import load_stage_start_end, load_sensor_data, load_video_annotations
from pdathome.utils import save_to_pickle


def compute_mode(data):
    """Computes the mode for 1D data using np.unique."""
    values, counts = np.unique(data, return_counts=True)
    max_count_index = np.argmax(counts)
    return values[max_count_index], counts[max_count_index]

def is_majority(data, target="Walking"):
    """Checks if 'target' occurs more than half the time in 1D data."""
    values, counts = np.unique(data, return_counts=True)
    target_count = counts[values == target].sum() if target in values else 0
    return target_count > (len(data) / 2)

for side in [gc.descriptives.MOST_AFFECTED_SIDE, gc.descriptives.LEAST_AFFECTED_SIDE]:

    # load timestamps
    df_ts = pd.read_pickle(os.path.join(gc.paths.PATH_PREPARED_DATA, f'{subject}_{side}.pkl'))
    df_ts['time'] = df_ts['time'].round(2)

    # load gait features
    df_features = pd.read_pickle(os.path.join(gc.paths.PATH_GAIT_FEATURES, f'{subject}_{side}.pkl'))

    # Load gait predictions
    df_pred = pd.read_pickle(os.path.join(gc.paths.PATH_GAIT_PREDICTIONS, gc.classifiers.GAIT_DETECTION_CLASSIFIER_SELECTED, f'{subject}_{side}.pkl'))

    # Load classification threshold
    with open(os.path.join(gc.paths.PATH_THRESHOLDS, 'gait', f'{gc.classifiers.GAIT_DETECTION_CLASSIFIER_SELECTED}.txt'), 'r') as f:
        threshold = float(f.read())

    # Determine gait prediction per timestamp
    l_cols_features = ['time']
    df_predictions = pd.concat([df_features[l_cols_features], df_pred], axis=1)
    
    imu_config = IMUPreprocessingConfig()
    gait_config = GaitFeatureExtractionConfig()
    arm_activity_config = ArmActivityFeatureExtractionConfig()

    # Step 1: Expand each window into individual timestamps
    expanded_data = []
    for _, row in df_predictions.iterrows():
        start_time = row['time']
        proba = row['pred_gait_proba']
        timestamps = np.arange(start_time, start_time + gait_config.window_length_s, 1/gc.parameters.DOWNSAMPLED_FREQUENCY)
        expanded_data.extend(zip(timestamps, [proba] * len(timestamps)))

    # Create a new DataFrame with expanded timestamps
    expanded_df = pd.DataFrame(expanded_data, columns=['time', 'pred_gait_proba'])

    # Step 2: Round timestamps to avoid floating-point inaccuracies
    expanded_df['time'] = expanded_df['time'].round(2)

    # Step 3: Aggregate by unique timestamps and calculate the mean probability
    expanded_df = expanded_df.groupby('time', as_index=False)['pred_gait_proba'].mean()

    df_ts = pd.merge(left=df_ts, right=expanded_df, how='left', on='time')

    imu_config.acceleration_units = 'g'
    arm_activity_config.list_value_cols += [gc.columns.TIME, gc.columns.FREE_LIVING_LABEL]

    # Extract relevant gc.columns for accelerometer data
    accel_cols = imu_config.l_accelerometer_cols

    # Change to correct units [g]
    df_ts[accel_cols] = df_ts[accel_cols] / 9.81 if imu_config.acceleration_units == 'm/s^2' else df_ts[accel_cols]

    # Extract accelerometer data
    accel_data = df_ts[imu_config.l_accelerometer_cols].values

    filter_configs = {
        "hp": {"result_columns": imu_config.l_accelerometer_cols, "replace_original": True},
        "lp": {"result_columns": [f'{col}_grav' for col in imu_config.l_accelerometer_cols], "replace_original": False},
    }

    # Apply filters in a loop
    for passband, filter_config in filter_configs.items():
        filtered_data = butterworth_filter(
            data=accel_data,
            order=imu_config.filter_order,
            cutoff_frequency=imu_config.lower_cutoff_frequency,
            passband=passband,
            sampling_frequency=imu_config.sampling_frequency,
        )

        # Replace or add new columns based on configuration
        df_ts[filter_config["result_columns"]] = filtered_data

    # Process free living label and remove nans
    df_ts = df_ts.dropna(subset=gc.columns.L_GYROSCOPE)
        
    # Apply threshold and filter data
    df_ts[gc.columns.PRED_GAIT] = (df_ts[gc.columns.PRED_GAIT_PROBA] >= threshold).astype(int)

    # Perform principal component analysis on the gyroscope signals to obtain the angular velocity in the
    # direction of the swing of the arm 
    df_ts[gc.columns.VELOCITY] = pca_transform_gyroscope(
        config=arm_activity_config,
        df=df_ts,
    )

    # Integrate the angular velocity to obtain an estimation of the angle
    df_ts[gc.columns.ANGLE] = compute_angle(
        config=arm_activity_config,
        df=df_ts,
    )

    # Remove the moving average from the angle to account for possible drift caused by the integration
    # of noise in the angular velocity
    df_ts[gc.columns.ANGLE] = remove_moving_average_angle(
        config=arm_activity_config,
        df=df_ts,
    )
    
    # Filter unobserved data
    if subject in gc.participant_ids.L_PD_IDS:
        df_ts = df_ts[df_ts[gc.columns.ARM_LABEL] != 'Cannot assess']
    
    # Use only predicted gait for the subsequent steps
    df_ts = df_ts[df_ts[gc.columns.PRED_GAIT] == 1].reset_index(drop=True)

    # Group consecutive timestamps into segments with new segments starting after a pre-specified gap
    df_ts[gc.columns.SEGMENT_NR] = create_segments(
        config=arm_activity_config,
        df=df_ts
    )

    # Remove any segments that do not adhere to predetermined criteria
    df_ts = discard_segments(
        config=arm_activity_config,
        df=df_ts
    )

    # Create windows of fixed length and step size from the time series
    windowed_data = []

    l_windowed_cols = [
        gc.columns.TIME, gc.columns.FREE_LIVING_LABEL, gc.columns.ANGLE, gc.columns.VELOCITY
        ] + arm_activity_config.l_accelerometer_cols + arm_activity_config.l_gravity_cols + arm_activity_config.l_gyroscope_cols
    
    if subject in gc.participant_ids.L_PD_IDS:
        l_windowed_cols += [gc.columns.ARM_LABEL]

    df_grouped = df_ts.groupby(gc.columns.SEGMENT_NR, sort=False)

    for _, group in df_grouped:
        windows = tabulate_windows(
            config=arm_activity_config,
            df=group,
            columns=l_windowed_cols
        )
        if len(windows) > 0:  # Skip if no windows are created
            windowed_data.append(windows)

    if len(windowed_data) > 0:
        windowed_data = np.concatenate(windowed_data, axis=0)
    else:
        raise ValueError("No windows were created from the given data.")

    df_features = pd.DataFrame()

    df_features[gc.columns.TIME] = sorted(windowed_data[:, 0, l_windowed_cols.index(gc.columns.TIME)])

    if subject in gc.participant_ids.L_PD_IDS:
        df_features = pd.merge(left=df_features, right=df_ts[[gc.columns.TIME, gc.columns.PRE_OR_POST]], how='left', on=gc.columns.TIME) 

    # Calulate the mode of the labels
    windowed_labels = windowed_data[:, :, l_windowed_cols.index(gc.columns.FREE_LIVING_LABEL)]
    modes_and_counts = np.apply_along_axis(lambda x: compute_mode(x), axis=1, arr=windowed_labels)
    modes, counts = zip(*modes_and_counts)

    df_features[gc.columns.ACTIVITY_LABEL_MAJORITY_VOTING] = modes
    df_features[gc.columns.GAIT_MAJORITY_VOTING] = [is_majority(window) for window in windowed_labels]

    if subject in gc.participant_ids.L_PD_IDS:
        windowed_labels = windowed_data[:, :, l_windowed_cols.index(gc.columns.ARM_LABEL)]
        modes_and_counts = np.apply_along_axis(lambda x: compute_mode(x), axis=1, arr=windowed_labels)
        modes, counts = zip(*modes_and_counts)

        df_features[gc.columns.ARM_LABEL_MAJORITY_VOTING] = modes
        df_features[gc.columns.NO_OTHER_ARM_ACTIVITY_MAJORITY_VOTING] = [is_majority(window, target="Gait without other behaviours or other positions") for window in windowed_labels]

    # compute statistics of the temporal domain signals
    accel_indices = [l_windowed_cols.index(x) for x in gait_config.l_accelerometer_cols]
    grav_indices = [l_windowed_cols.index(x) for x in gait_config.l_gravity_cols]
    gyro_indices = [l_windowed_cols.index(x) for x in gait_config.l_gyroscope_cols]
    idx_angle = l_windowed_cols.index(gc.columns.ANGLE)
    idx_velocity = l_windowed_cols.index(gc.columns.VELOCITY)

    accel_windowed = np.asarray(windowed_data[:, :, np.min(accel_indices):np.max(accel_indices) + 1], dtype=float)
    grav_windowed = np.asarray(windowed_data[:, :, np.min(grav_indices):np.max(grav_indices) + 1], dtype=float)
    gyro_windowed = np.asarray(windowed_data[:, :, np.min(gyro_indices):np.max(gyro_indices) + 1], dtype=float)
    angle_windowed = np.asarray(windowed_data[:, :, idx_angle], dtype=float)
    velocity_windowed = np.asarray(windowed_data[:, :, idx_velocity], dtype=float)

    # angle features
    df_features_angle = extract_angle_features(arm_activity_config, angle_windowed, velocity_windowed)
    df_features = pd.concat([df_features, df_features_angle], axis=1)

    # compute statistics of the temporal domain accelerometer signals
    df_temporal_features = extract_temporal_domain_features(arm_activity_config, accel_windowed, grav_windowed, l_grav_stats=['mean', 'std'])
    df_features = pd.concat([df_features, df_temporal_features], axis=1)

    # transform the accelerometer and gyroscope signals from the temporal domain to the spectral domain
    # using the fast fourier transform and extract spectral features
    for sensor_name, windowed_sensor in zip(['accelerometer', 'gyroscope'], [accel_windowed, gyro_windowed]):
        df_spectral_features = extract_spectral_domain_features(arm_activity_config, sensor_name, windowed_sensor)
        df_features = pd.concat([df_features, df_spectral_features], axis=1)

    file_path = os.path.join(gc.paths.PATH_ARM_ACTIVITY_FEATURES, f'{subject}_{side}.pkl')
    df_features.to_pickle(file_path)