# Chapter 3, Analyze

Ebook, A Hands-On Guide to Biomechanics Data Analysis with Python and AI  
Author, Dr. Hossein Mokhtarzadeh  
Powered by PoseIQ

Goal, detect gait events and compute basic metrics from parsed data.  
Click Runtime, Run all.


## Install packages

In [None]:
!pip -q install ezc3d pandas scipy matplotlib

## Download and parse minimal data, same source as Chapter 2

In [None]:
import os, urllib.request, zipfile, numpy as np, pandas as pd, ezc3d

def safe_download(url, out_path):
    try:
        urllib.request.urlretrieve(url, out_path)
        return True
    except Exception as e:
        print("Download failed:", e)
        return False

os.makedirs("sample_data", exist_ok=True)
zpath = "sample_data.zip"
ok = safe_download("https://c3d.org/data/Sample00.zip", zpath) or safe_download("https://www.c3d.org/data/Sample00.zip", zpath) \
     or safe_download("https://c3d.org/data/Sample01.zip", zpath) or safe_download("https://www.c3d.org/data/Sample01.zip", zpath)

if not ok:
    raise RuntimeError("Could not download sample data from c3d.org. Upload a .c3d file to proceed.")

with zipfile.ZipFile(zpath, 'r') as zf:
    zf.extractall("sample_data")

# Find C3D
c3d_files = []
for root, dirs, files in os.walk("sample_data"):
    for f in files:
        if f.lower().endswith(".c3d"):
            c3d_files.append(os.path.join(root, f))
if not c3d_files:
    raise FileNotFoundError("No .c3d files found after extraction.")

c3d_path = c3d_files[0]
c3d = ezc3d.c3d(c3d_path)

# Points and time
points = c3d["data"]["points"]
point_rate = float(c3d["parameters"]["POINT"]["RATE"]["value"][0]) if "POINT" in c3d["parameters"] and "RATE" in c3d["parameters"]["POINT"] else float(c3d["header"]["points"]["frame_rate"])
n_frames = points.shape[2]
time = np.arange(n_frames) / point_rate

# One marker, filtered
from scipy.signal import butter, filtfilt
def butter_lowpass(data, cutoff_hz, fs_hz, order=4, axis=-1):
    nyq = 0.5 * fs_hz
    b, a = butter(order, cutoff_hz/nyq, btype="low")
    return filtfilt(b, a, data, axis=axis)

xyz = points[:3, 0, :].T / 1000.0
xyz_f = butter_lowpass(xyz, 6, point_rate, order=4, axis=0)

# Analogs
analogs = c3d["data"]["analogs"]
has_analogs = isinstance(analogs, np.ndarray) and analogs.size > 0

analog_rate = None
analog_labels = []
if "ANALOG" in c3d["parameters"]:
    parA = c3d["parameters"]["ANALOG"]
    if "RATE" in parA:
        analog_rate = float(parA["RATE"]["value"][0])
    for k, v in parA.items():
        if k.startswith("LABELS"):
            analog_labels.extend(list(v["value"]))

# Build df
df = pd.DataFrame({
    "Time": time,
    "Marker_Z": xyz_f[:, 2]
})

# Attach one analog channel sampled to marker rate for convenience
if has_analogs and analog_rate:
    n_sub, n_analogs, n_frames_a = analogs.shape
    # pick first channel
    ch = 0
    # if subframes align with point frames, take subframe 0
    if int(round(analog_rate/point_rate)) == n_sub:
        df["Analog"] = analogs[0, ch, :][:len(df)]
        analog_name = analog_labels[ch] if analog_labels else "Analog"
    else:
        # fallback, simple stride through flattened analog series
        an2 = analogs.transpose(1,2,0).reshape(n_analogs, -1)
        step = max(1, an2.shape[1] // len(df))
        df["Analog"] = an2[ch, ::step][:len(df)]
        analog_name = analog_labels[ch] if analog_labels else "Analog"
    df.rename(columns={"Analog": analog_name}, inplace=True)

print(df.head())


## Detect gait events from vertical force using adaptive threshold

In [None]:
import numpy as np

force_col = None
for col in df.columns:
    if col.lower().startswith("fz") or "force" in col.lower():
        force_col = col
        break
if force_col is None:
    # if we used generic analog name, use that
    for col in df.columns:
        if col not in ["Time", "Marker_Z"]:
            force_col = col
            break

if force_col is None:
    raise RuntimeError("No candidate force column found.")

y = df[force_col].values.astype(float)
# adaptive threshold at 5 percent of max, avoids unit assumptions
thr = 0.05 * np.nanmax(np.abs(y))
contact = y > thr

onsets = np.where((contact[1:] & ~contact[:-1]))[0] + 1
offsets = np.where((~contact[1:] & contact[:-1]))[0] + 1

heel_strikes = df["Time"].values[onsets]
toe_offs = df["Time"].values[offsets]

print("Detected heel strikes, s:", heel_strikes[:10])
print("Detected toe offs, s:", toe_offs[:10])


## Compute metrics, peak force (relative), stride time

In [None]:
import numpy as np

peak_rel = np.nanmax(y) / (np.nanmax(y) if np.nanmax(y) != 0 else 1.0)
print(f"Relative peak of {force_col}: {peak_rel:.2f}")

if heel_strikes.size > 1:
    stride_times = np.diff(heel_strikes)
    print(f"Average stride time, s: {np.mean(stride_times):.3f}")
else:
    print("Not enough heel strikes to compute stride time")


## Savitzky Golay filter for marker smoothing, and Butterworth for force

In [None]:
from scipy.signal import savgol_filter, butter, filtfilt
import matplotlib.pyplot as plt

df["Marker_Z_sg"] = savgol_filter(df["Marker_Z"].values, window_length=11, polyorder=3)

# Butterworth on force - only apply if force data exists
if 'y' in locals() and y is not None:
    b, a = butter(4, 20/(point_rate/2), btype="low") if point_rate > 0 else (None, None)
    if b is not None:
        df[f"{force_col}_filt"] = filtfilt(b, a, y)
    else:
        df[f"{force_col}_filt"] = y

    plt.figure(figsize=(10,4))
    plt.plot(df["Time"], y, label=f"{force_col} raw")
    plt.plot(df["Time"], df[f"{force_col}_filt"], label=f"{force_col} filt")
    plt.xlabel("Time s")
    plt.ylabel("Force")
    plt.title("Vertical force channel")
    plt.legend()
    plt.grid(True)
    plt.show()
else:
    print("Skipping force data filtering and plotting: No force data available.")


plt.figure(figsize=(10,4))
plt.plot(df["Time"], df["Marker_Z"], label="Marker_Z raw")
plt.plot(df["Time"], df["Marker_Z_sg"], label="Marker_Z sg")
plt.xlabel("Time s")
plt.ylabel("Marker Z m")
plt.title("Marker vertical position")
plt.legend()
plt.grid(True)
plt.show()

## Segment cycles between heel strikes and compute average cycle for the force

In [None]:
import numpy as np

def normalise_cycle(sig, t, t0, t1, n=100):
    # extract segment between t0 and t1 and resample to n points
    m = (t >= t0) & (t < t1)
    if m.sum() < 2:
        return None
    seg = sig[m]
    # normalise over 0..1
    xp = np.linspace(0, 1, seg.size)
    xnew = np.linspace(0, 1, n)
    return np.interp(xnew, xp, seg)

cycles = []
for hs0, hs1 in zip(heel_strikes[:-1], heel_strikes[1:]):
    c = normalise_cycle(df[f"{force_col}_filt"].values, df["Time"].values, hs0, hs1, n=101)
    if c is not None:
        cycles.append(c)

import matplotlib.pyplot as plt
if cycles:
    arr = np.vstack(cycles)
    mean_cycle = np.nanmean(arr, axis=0)
    plt.figure(figsize=(8,4))
    for i in range(min(10, arr.shape[0])):
        plt.plot(np.linspace(0,100,arr.shape[1]), arr[i], alpha=0.4)
    plt.plot(np.linspace(0,100,arr.shape[1]), mean_cycle, linewidth=2)
    plt.xlabel("Gait cycle percent")
    plt.ylabel("Force")
    plt.title("Normalised force cycles and mean")
    plt.grid(True)
    plt.show()
else:
    print("No cycles available to plot. Check event detection.")


## Summary

You detected events with an adaptive threshold, computed basic metrics, smoothed signals, and built mean cycles.  
Next, we will visualise more signals and build reports.
