# C3D quick extractor for Colab
Markers, Analogs, Events, Metadata

* install ezc3d

* let you upload a C3D

* print a clean summary of metadata

* list marker names, analog channel names, and event labels

* export three CSVs: markers_wide.csv, analogs.csv, events.csv, plus metadata.json





In [None]:
# ================================
# C3D quick extractor for Colab
# Markers, Analogs, Events, Metadata
# ================================

# 1) Setup
!pip -q install ezc3d

import json
import numpy as np
import pandas as pd
import io, os, math
from google.colab import files
import ezc3d

# 2) Upload a C3D file
print("Upload your C3D file")
uploaded = files.upload()
assert uploaded, "No file uploaded"
c3d_path = list(uploaded.keys())[0]
print("Loaded:", c3d_path)

# 3) Load with ezc3d
c3d = ezc3d.c3d(c3d_path)

# 4) Helpers
def get_param(group, name, default=None):
    try:
        return c3d["parameters"][group][name]["value"]
    except KeyError:
        return default

def merge_labels(group):
    lbl = get_param(group, "LABELS", [])
    lbl2 = get_param(group, "LABELS2", [])
    out = []
    for arr in (lbl, lbl2):
        if isinstance(arr, (list, tuple, np.ndarray)):
            out.extend([str(x) for x in arr])
    # de dup, keep order
    seen = set()
    keep = []
    for s in out:
        if s and s not in seen:
            seen.add(s)
            keep.append(s)
    return keep

# 5) Basic metadata
first_frame = int(c3d["header"]["points"]["first_frame"])
last_frame = int(c3d["header"]["points"]["last_frame"])
n_frames = last_frame - first_frame + 1
point_rate = float(c3d["header"]["points"]["frame_rate"])
n_points = int(c3d["header"]["points"]["size"])

analog_rate = float(c3d["header"]["analogs"]["frame_rate"])
n_analogs = int(c3d["header"]["analogs"]["size"])

point_units = get_param("POINT", "UNITS", ["mm"])
point_units = point_units[0] if isinstance(point_units, (list, np.ndarray)) and point_units else str(point_units)

analog_units = get_param("ANALOG", "UNITS", [])
# analog units can be one per channel or a single string; keep as is

metadata = {
    "point_frame_rate": point_rate,
    "analog_frame_rate": analog_rate,
    "first_frame": first_frame,
    "last_frame": last_frame,
    "n_frames": n_frames,
    "n_points": n_points,
    "n_analogs": n_analogs,
    "point_units": point_units,
    "analog_units": analog_units,
}

# 6) Labels
marker_labels = merge_labels("POINT")
analog_labels = merge_labels("ANALOG")

print("\n=== Summary ===")
print(f"Point rate: {point_rate} Hz")
print(f"Analog rate: {analog_rate} Hz")
print(f"Frames: {first_frame}..{last_frame}  total {n_frames}")
print(f"Markers: {len(marker_labels)}")
print(f"Analogs: {len(analog_labels)}")
print(f"Point units: {point_units}")

# 7) Events
events = []
if "EVENT" in c3d["parameters"]:
    ev_used = int(get_param("EVENT", "USED", [0])[0])
    ev_labels = get_param("EVENT", "LABELS", [])
    ev_contexts = get_param("EVENT", "CONTEXTS", [])
    ev_times = get_param("EVENT", "TIMES", [])
    # ezc3d gives TIMES as a 2 x N float array, take the first row
    if isinstance(ev_times, np.ndarray) and ev_times.ndim == 2:
        times_sec = ev_times[0, :].tolist()
    else:
        # fallback
        times_sec = list(ev_times) if hasattr(ev_times, "__len__") else []
    # Normalize lengths
    L = min(ev_used, len(times_sec), len(ev_labels) if hasattr(ev_labels, "__len__") else 0)
    for i in range(L):
        t = float(times_sec[i])
        label = str(ev_labels[i]) if i < len(ev_labels) else ""
        context = str(ev_contexts[i]) if i < len(ev_contexts) else ""
        # approximate event frame in point domain
        frame_index = first_frame + int(round(t * point_rate))
        events.append({
            "time_sec": t,
            "frame": frame_index,
            "label": label,
            "context": context
        })

print(f"Events: {len(events)}")
if events:
    print("Event labels:", sorted({e['label'] for e in events}))

# 8) Build markers wide DataFrame
# points shape: (4, n_points, n_frames) where rows 0..2 are X Y Z, 3 is residual
pts = c3d["data"]["points"]  # ndarray
if pts.shape[0] < 3:
    raise ValueError("Point data does not have X Y Z rows")

x = pts[0, :, :].T  # shape (n_frames, n_points)
y = pts[1, :, :].T
z = pts[2, :, :].T
resid = pts[3, :, :].T if pts.shape[0] > 3 else np.full_like(x, np.nan)

# If labels missing, create generic names
if not marker_labels or len(marker_labels) != x.shape[1]:
    marker_labels = [f"Marker_{i+1}" for i in range(x.shape[1])]

columns = []
data_cols = []
for j, name in enumerate(marker_labels):
    columns.extend([f"{name}_X", f"{name}_Y", f"{name}_Z", f"{name}_Res"])
    data_cols.extend([x[:, j], y[:, j], z[:, j], resid[:, j]])

markers_df = pd.DataFrame(
    np.column_stack(data_cols),
    columns=columns
)
markers_df.insert(0, "frame", np.arange(first_frame, first_frame + n_frames, dtype=int))
markers_df.insert(1, "time_sec", (markers_df["frame"] - first_frame) / point_rate)

# 9) Build analog DataFrame
# ezc3d analogs are usually (n_analogs, n_subframes, n_frames)
an = c3d["data"]["analogs"]
if an.ndim == 3:
    n_an, n_sub, n_fr = an.shape
    an_full = an.transpose(2, 1, 0).reshape(n_fr * n_sub, n_an)  # shape (samples, channels)
    analog_rate_calc = point_rate * n_sub if point_rate > 0 else analog_rate
else:
    # Fallback, treat as (channels, samples)
    n_an = an.shape[0]
    an_full = an.T
    analog_rate_calc = analog_rate

# Fix analog labels length
if not analog_labels or len(analog_labels) != n_an:
    analog_labels = [f"Analog_{i+1}" for i in range(n_an)]

analogs_df = pd.DataFrame(an_full, columns=analog_labels)
analogs_df.insert(0, "sample", np.arange(len(analogs_df), dtype=int))
analogs_df.insert(1, "time_sec", analogs_df["sample"] / float(analog_rate_calc))

# 10) Events DataFrame
events_df = pd.DataFrame(events) if events else pd.DataFrame(columns=["time_sec", "frame", "label", "context"])

# 11) Save outputs
markers_csv = "markers_wide.csv"
analogs_csv = "analogs.csv"
events_csv = "events.csv"
meta_json = "metadata.json"

markers_df.to_csv(markers_csv, index=False)
analogs_df.to_csv(analogs_csv, index=False)
events_df.to_csv(events_csv, index=False)
with open(meta_json, "w") as f:
    json.dump(metadata, f, indent=2)

print("\nSaved files:")
print(markers_csv, analogs_csv, events_csv, meta_json)

# 12) Also print concise lists
print("\nMarker names:")
print(", ".join(marker_labels))

print("\nAnalog channel names:")
print(", ".join(analog_labels))

if not events_df.empty:
    print("\nEvent labels:")
    print(", ".join(sorted(events_df["label"].dropna().unique())))
else:
    print("\nNo events found")

# 13) Offer downloads
files.download(markers_csv)
files.download(analogs_csv)
files.download(events_csv)
files.download(meta_json)
