In [None]:
from __future__ import annotations

from pathlib import Path
import numpy as np
import pandas as pd

# ----------------------------
# INPUTS (your paths)
# ----------------------------
beh_path = r"C:\Users\psych-aalab\Desktop\zenon_frametest\20251028\beh-cam_frame-id_0.csv"
neu_path = r"C:\Users\psych-aalab\Desktop\zenon_frametest\20251028\miniscope_frame-id_0.csv"

# Column names (change if your CSV uses different names)
BEH_TS_COL = "Timestamp"
NEU_TS_COL = "Timestamp"
BEH_IDX_COL = "recorded_idx"   # set to None if not present
NEU_IDX_COL = "recorded_idx"   # set to None if not present

FPS = 30.0
TOLERANCE_MS = 20.0  # widen to 35-50ms if you see too many NaNs

# ----------------------------
# Helpers
# ----------------------------
def read_and_prep(csv_path: str, ts_col: str, name: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    if ts_col not in df.columns:
        raise ValueError(f"{name}: missing timestamp column {ts_col!r}. Columns: {list(df.columns)}")

    df = df.copy()
    df[ts_col] = pd.to_datetime(df[ts_col], utc=True, errors="raise")
    df = df.sort_values(ts_col).reset_index(drop=True)

    # Optional: drop duplicate timestamps for deterministic merge
    df = df.drop_duplicates(subset=[ts_col], keep="first").reset_index(drop=True)

    # Create a per-stream frame index (since your CSV doesn't include one)
    df["frame"] = np.arange(len(df), dtype=np.int64)

    return df


def build_global_timeline(
    beh: pd.DataFrame, neu: pd.DataFrame, beh_ts_col: str, neu_ts_col: str, fps: float
) -> pd.DataFrame:
    dt = pd.to_timedelta(1 / fps, unit="s")  # 33.333...ms

    start = min(beh[beh_ts_col].iloc[0], neu[neu_ts_col].iloc[0])
    end = max(beh[beh_ts_col].iloc[-1], neu[neu_ts_col].iloc[-1])

    n = int(np.floor((end - start) / dt)) + 1
    # Build timestamps by stepping dt in integer nanoseconds (stable)
    step_ns = int(dt / pd.Timedelta("1ns"))
    global_ts = start + pd.to_timedelta(np.arange(n, dtype=np.int64) * step_ns, unit="ns")

    return pd.DataFrame({"global_idx": np.arange(n, dtype=np.int64), "global_ts": global_ts})


def merge_onto_timeline(
    timeline: pd.DataFrame,
    stream: pd.DataFrame,
    tl_ts_col: str,
    stream_ts_col: str,
    prefix: str,
    tolerance: pd.Timedelta,
) -> pd.DataFrame:
    s = stream.copy()

    # Rename stream columns with prefix (keep the timestamp as <prefix>_ts)
    rename_map = {c: f"{prefix}_{c}" for c in s.columns if c != stream_ts_col}
    s = s.rename(columns=rename_map)
    s = s.rename(columns={stream_ts_col: f"{prefix}_ts"})

    out = pd.merge_asof(
        timeline.sort_values(tl_ts_col),
        s.sort_values(f"{prefix}_ts"),
        left_on=tl_ts_col,
        right_on=f"{prefix}_ts",
        direction="nearest",
        tolerance=tolerance,
    )
    return out


# ----------------------------
# Main
# ----------------------------
beh = read_and_prep(beh_path, BEH_TS_COL, "behavior")
neu = read_and_prep(neu_path, NEU_TS_COL, "neural")

timeline = build_global_timeline(beh, neu, BEH_TS_COL, NEU_TS_COL, FPS)

tol = pd.Timedelta(milliseconds=TOLERANCE_MS)

aligned = merge_onto_timeline(timeline, beh, "global_ts", BEH_TS_COL, "beh", tol)
aligned = merge_onto_timeline(aligned, neu, "global_ts", NEU_TS_COL, "neu", tol)

# Diagnostics: time errors (ms) and match flags
aligned["beh_time_error_ms"] = (aligned["beh_ts"] - aligned["global_ts"]).dt.total_seconds() * 1000
aligned["neu_time_error_ms"] = (aligned["neu_ts"] - aligned["global_ts"]).dt.total_seconds() * 1000
aligned["beh_matched"] = aligned["beh_ts"].notna()
aligned["neu_matched"] = aligned["neu_ts"].notna()

def summarize_unmatched_runs(df: pd.DataFrame, matched_col: str, label: str, max_show: int = 20) -> None:
    miss = ~df[matched_col].fillna(False)
    if miss.sum() == 0:
        print(f"{label}: no unmatched frames")
        return

    # Find contiguous runs of missing
    run_id = (miss != miss.shift(fill_value=False)).cumsum()
    runs = (
        df.loc[miss, ["global_idx", "global_ts"]]
        .assign(run_id=run_id[miss].to_numpy())
        .groupby("run_id")
        .agg(
            start_idx=("global_idx", "min"),
            end_idx=("global_idx", "max"),
            start_ts=("global_ts", "min"),
            end_ts=("global_ts", "max"),
            n=("global_idx", "size"),
        )
        .sort_values(["n", "start_idx"], ascending=[False, True])
    )

    print(f"\n{label}: {int(miss.sum())} unmatched frames across {len(runs)} runs")
    print(runs.head(max_show).to_string())

summarize_unmatched_runs(aligned, "beh_matched", "BEHAVIOR")
summarize_unmatched_runs(aligned, "neu_matched", "NEURAL")

# Summary
total = len(aligned)
beh_rate = aligned["beh_matched"].mean() * 100
neu_rate = aligned["neu_matched"].mean() * 100

print(f"Global frames: {total}")
print(f"Behavior matched: {beh_rate:.2f}% | Neural matched: {neu_rate:.2f}%")

if aligned["beh_matched"].any():
    m = aligned.loc[aligned.beh_matched, "beh_time_error_ms"]
    print(f"Behavior time error (ms): median={m.median():.3f} p95={m.quantile(0.95):.3f} p99={m.quantile(0.99):.3f}")
if aligned["neu_matched"].any():
    m = aligned.loc[aligned.neu_matched, "neu_time_error_ms"]
    print(f"Neural time error (ms):   median={m.median():.3f} p95={m.quantile(0.95):.3f} p99={m.quantile(0.99):.3f}")

# Save output next to the input files
out_dir = Path(beh_path).parent
out_csv = out_dir / "aligned_global_30fps.csv"
aligned.to_csv(out_csv, index=False)
print(f"Wrote: {out_csv}")

# ----------------------------
# Frame map: global_ts -> (neu_frame, beh_frame)
# ----------------------------
frame_map = aligned[[
    "global_ts",
    "neu_frame",
    "beh_frame",
]].copy()

out_map = out_dir / "global_to_neu_beh_frame_map.csv"
frame_map.to_csv(out_map, index=False)
print(f"Wrote: {out_map}")

print(frame_map.head(12))
print(frame_map.isna().sum())

Behavior dropped frames (est): 8
Neural dropped frames (est): 12
    global_idx  recorded_idx_neu                       Timestamp_neu  \
0            0               0.0    2025-10-28 21:03:43.801088+00:00   
1            1               1.0 2025-10-28 21:03:43.828211200+00:00   
2            2               2.0 2025-10-28 21:03:43.860979200+00:00   
3            3               3.0    2025-10-28 21:03:43.894400+00:00   
4            4               4.0 2025-10-28 21:03:43.927180800+00:00   
5            5               5.0 2025-10-28 21:03:43.960332800+00:00   
6            6               6.0 2025-10-28 21:03:43.993779200+00:00   
7            7               7.0 2025-10-28 21:03:44.026483200+00:00   
8            8               8.0 2025-10-28 21:03:44.059724800+00:00   
9            9               9.0 2025-10-28 21:03:44.092838400+00:00   
10          10              10.0 2025-10-28 21:03:44.125798400+00:00   
11          11              11.0 2025-10-28 21:03:44.158310400+00:00   

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,4))
plt.plot(aligned.index, aligned['is_dropped_neu'].fillna(0).astype(int), label='Neural dropped', alpha=0.7)
plt.plot(aligned.index, aligned['is_dropped_beh'].fillna(0).astype(int), label='Behavior dropped', alpha=0.7)
plt.xlabel('Global frame index')
plt.ylabel('Dropped (1) / Recorded (0)')
plt.title('Dropped frames over time')
plt.legend()
plt.show()