# Gait Data Analysis Pipeline  
Load gait‐parameter CSVs and reference data, merge & compare them, compute RMSE and save the data as a excel file.


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import scipy.stats as stats
import plotly.express as px
import plotly.graph_objects as go

from ipywidgets import widgets, Output
from IPython.display import display

# --- Settings ---
trials = [f"TEST-{chr(c)}" for c in range(65, 77)]  # TEST-A to TEST-L
base_path = "/path/to/trials"  # root folder
output_file = os.path.join(base_path, "gait_analysis_results.xlsx")

## 1. Load Algorithm CSVs

In [None]:
# Step 1: Read per-trial CSVs into algo_data dict
algo_data = {}
for trial in trials:
    csv_path = os.path.join(base_path, trial, "Results", "Figures", f"{trial}_parameters.csv")
    try:
        df = pd.read_csv(csv_path, header=[0,1])
        # clean, re-index...
        algo_data[trial] = df
    except FileNotFoundError:
        print(f"CSV missing for {trial}")


## 2. Load Reference (Awinda) Excel Summaries

In [None]:
# Step 2: Read per-trial Excel into ref_data dict
ref_data = {}
for trial in trials:
    xls_path = os.path.join(base_path, trial, "Awinda Summarized.xlsx")
    try:
        df = pd.read_excel(xls_path, header=[1,2,3], index_col=0)
        # clean, stack, filter mean...
        ref_data[trial] = df
    except FileNotFoundError:
        print(f"Excel missing for {trial}")


## 3. Merge Algorithm Data and Reference Data


In [None]:
# Step 3: For each trial, merge algo_data[trial] with ref_data[trial]
merged_list = []
for trial in trials:
    if trial in algo_data and trial in ref_data:
        df_a = algo_data[trial]  # reset_index, filter bilateral
        df_r = ref_data[trial]
        df_m = pd.merge(df_r, df_a, on=['Trial','Task','Foot'], suffixes=('_ref','_algo'))
        merged_list.append((trial, df_a, df_r, df_m))


## 4. Compute RMSE Summary

In [None]:
# Step 4: Concatenate all merges and compute task-level RMSE
all_merged = pd.concat([m for _,_,_,m in merged_list], ignore_index=True)
df_bi = all_merged[all_merged['Foot']=='bilateral']
gait_params = ['cadence','stance_time','swing_time', 'stride_length', 'stride_width']
rmse_df = pd.DataFrame(index=df_bi['Task'].unique(), columns=gait_params)

for p in gait_params:
    x = df_bi[f"{p}_mean_ref"]
    y = df_bi[f"{p}_mean_algo"]
    rmse_df[p] = np.sqrt(((x-y)**2).groupby(df_bi['Task']).mean())
rmse_df['n_trials'] = df_bi.groupby('Task').size()


## 5. Export to Excel Workbook

In [None]:
# Step 5: Save each sheet into a single .xlsx
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    for trial, df_a, df_r, df_m in merged_list:
        df_a.to_excel(writer, sheet_name=f"{trial}_algo")
        df_r.to_excel(writer, sheet_name=f"{trial}_ref", index=False)
        df_m.to_excel(writer, sheet_name=f"{trial}_merged", index=False)
    all_merged.to_excel(writer, sheet_name="All_Merged", index=False)
    rmse_df.to_excel(writer, sheet_name="RMSE_Summary")
print("Workbook saved:", output_file)


## 6. Visualization Helper Functions

In [None]:
# Step 6: Visualization Helper Functions


from ipywidgets import Output
from scipy.stats import linregress

# Prepare bilateral-only DataFrame
df_bilateral = all_merged[all_merged['Foot']=='bilateral'].copy()

def show_scatter(parameter, tasks=None):
    """
    Scatter plot of Awinda vs. Algorithm for `parameter`.
    Draws identity line and best-fit regression.
    """
    out = Output()
    display(out)
    if tasks is None:
        tasks = df_bilateral['Task'].unique().tolist()
    dfp = df_bilateral[df_bilateral['Task'].isin(tasks)]
    xcol = f"{parameter}_mean_ref"
    ycol = f"{parameter}_mean_algo"

    with out:
        fig = px.scatter(
            dfp, x=xcol, y=ycol,
            color='Task', hover_data=['Trial']
        )
        # Identity line y=x
        lims = [dfp[xcol].min(), dfp[xcol].max()]
        fig.add_trace(go.Scatter(
            x=lims, y=lims, mode='lines',
            line=dict(dash='dash', color='gray'),
            name='y = x'
        ))
        # Best-fit regression
        slope, intercept, r_val, _, _ = linregress(dfp[xcol], dfp[ycol])
        fit_y = [slope * v + intercept for v in lims]
        fig.add_trace(go.Scatter(
            x=lims, y=fit_y, mode='lines',
            line=dict(color='blue'), name='Best fit'
        ))
        fig.update_layout(
            title=f"Awinda vs Algo: {parameter}",
            xaxis_title=f"Awinda ({parameter})",
            yaxis_title=f"Algorithm ({parameter})"
        )
        fig.show()

def show_bland_altman(parameter, tasks=None):
    """
    Bland–Altman plot for `parameter`.
    Plots mean vs bias with 95% limits of agreement.
    """
    out = Output()
    display(out)
    if tasks is None:
        tasks = df_bilateral['Task'].unique().tolist()
    dfp = df_bilateral[df_bilateral['Task'].isin(tasks)]
    xcol = f"{parameter}_mean_ref"
    ycol = f"{parameter}_mean_algo"

    mean_vals = (dfp[xcol] + dfp[ycol]) / 2
    bias = dfp[ycol] - dfp[xcol]
    mbias = bias.mean()
    sd    = bias.std()
    upper = mbias + 1.96 * sd
    lower = mbias - 1.96 * sd

    with out:
        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=mean_vals, y=bias, mode='markers',
            marker=dict(size=8), name='Data'
        ))
        # mean bias line
        fig.add_hline(y=mbias, line_dash='dash', line_color='gray',
                      annotation_text=f"Mean bias: {mbias:.2f}", showarrow=False)
        # limits of agreement
        fig.add_hline(y=upper, line_dash='dash', line_color='red',
                      annotation_text=f"+1.96 SD: {upper:.2f}", showarrow=False)
        fig.add_hline(y=lower, line_dash='dash', line_color='red',
                      annotation_text=f"-1.96 SD: {lower:.2f}", showarrow=False)
        fig.update_layout(
            title=f"Bland–Altman: {parameter}",
            xaxis_title="Mean of Two Systems",
            yaxis_title="Bias"
        )
        fig.show()


## 7. Interactive Widgets

In [None]:
# Step 7: Interactive Widgets Which Output a Scatter Plot and a Bland-Altman Plot

# Dropdown for choosing parameter
param_dropdown = widgets.Dropdown(
    options=[p for p in df_bilateral.columns if p.endswith('_mean_ref')],
    description='Parameter:',
    layout=widgets.Layout(width='50%')
)

# Multi-select for tasks
task_selector = widgets.SelectMultiple(
    options=df_bilateral['Task'].unique().tolist(),
    description='Tasks:',
    layout=widgets.Layout(width='50%', height='150px')
)

# Display controls
display(param_dropdown, task_selector)

# Update both plots when controls change
def on_controls_change(change):
    # clear previous outputs
    clear_output(wait=True)
    display(param_dropdown, task_selector)
    # extract base param name without suffix
    param = param_dropdown.value.rsplit('_', 2)[0]
    show_scatter(param, list(task_selector.value))
    show_bland_altman(param, list(task_selector.value))

param_dropdown.observe(on_controls_change, names='value')
task_selector.observe(on_controls_change, names='value')

# Initial draw
on_controls_change(None)
