In [None]:
import pandas as pd
import numpy as np

# ------------------------------------------------------------
# Drop detection utilities
# ------------------------------------------------------------

def compute_global_idx(ts: pd.Series, t0: pd.Timestamp, fps: float) -> pd.Series:
    """
    Convert timestamps into a global frame index relative to a shared t0.
    """
    return np.floor((ts - t0).dt.total_seconds() * fps).astype(int)


def detect_missing_indices(
    df: pd.DataFrame,
    fps: float,
    tolerance_s: float,
    ts_col="Timestamp"
) -> list[int]:

    expected_dt = 1.0 / fps
    df = df.copy()
    df[ts_col] = pd.to_datetime(df[ts_col], utc=True)
    df = df.sort_values(ts_col).reset_index(drop=True)

    dt = df[ts_col].diff().dt.total_seconds()

    n_dropped_est = pd.Series(
        np.where(
            dt > expected_dt * 1.5,
            np.round(dt / expected_dt - 1),
            0
        ),
        index=df.index
    ).astype(int)

    drop_events = df[n_dropped_est >= 1]
    missing = []

    for i, k in zip(
        drop_events.index.to_list(),
        n_dropped_est.loc[drop_events.index].astype(int).to_list()
    ):
        missing.extend(range(i + 1, i + 1 + k))

    return missing


def insert_dropped_rows(
    df: pd.DataFrame,
    fps: float,
    missing_idx: list[int],
    t0: pd.Timestamp,
    ts_col="Timestamp",
):
    df = df.copy()
    df = df.sort_values(ts_col).reset_index(drop=True)
    df["recorded_idx"] = np.arange(len(df), dtype=int)

    expected_dt = pd.to_timedelta(1.0 / fps, unit="s")
    missing_set = set(missing_idx)

    rows = []

    for i in range(len(df)):
        ts = df.loc[i, ts_col]

        # real frame
        rows.append({
            "Timestamp": ts,
            "recorded_idx": i,
            "is_dropped": False,
        })

        # synthetic dropped frames
        if (i + 1) in missing_set:
            k = 0
            j = i + 1
            while j in missing_set:
                k += 1
                j += 1

            for kk in range(k):
                rows.append({
                    "Timestamp": ts + (kk + 1) * expected_dt,
                    "recorded_idx": np.nan,
                    "is_dropped": True,
                })

    filled = pd.DataFrame(rows)

    # âœ… THIS is the key part: global time ordering
    filled = filled.sort_values("Timestamp").reset_index(drop=True)
    filled["global_idx"] = np.arange(len(filled), dtype=int)

    # copy original data into non-dropped rows
    non_drop = filled["is_dropped"] == False
    data_cols = [c for c in df.columns if c != ts_col]

    for col in data_cols:
        filled.loc[non_drop, col] = df.loc[
            filled.loc[non_drop, "recorded_idx"].astype(int).values,
            col
        ].values

    return filled
# ------------------------------------------------------------
# Paths / parameters
# ------------------------------------------------------------

beh_path = r"C:\Users\psych-aalab\Desktop\zenon_frametest\20251028\beh-cam_frame-id_0.csv"
neu_path = r"C:\Users\psych-aalab\Desktop\zenon_frametest\20251028\miniscope_frame-id_0.csv"

fps = 30
tolerance_s = 0.002


# ------------------------------------------------------------
# Load data
# ------------------------------------------------------------

beh = pd.read_csv(beh_path)
beh["Timestamp"] = pd.to_datetime(beh["Timestamp"], utc=True)

neu = pd.read_csv(neu_path)
neu["Timestamp"] = pd.to_datetime(neu["Timestamp"], utc=True)

t0 = min(beh["Timestamp"].min(), neu["Timestamp"].min())
print("Global t0:", t0)

beh_missing = detect_missing_indices(beh, fps, tolerance_s)
beh_filled = insert_dropped_rows(beh, fps, beh_missing, t0)

neu_missing = detect_missing_indices(neu, fps, tolerance_s)
neu_filled = insert_dropped_rows(neu, fps, neu_missing, t0)


# ------------------------------------------------------------
# Align streams
# ------------------------------------------------------------

aligned = (
    pd.merge(
        neu_filled,
        beh_filled,
        on="global_idx",
        how="outer",
        suffixes=("_neu", "_beh")
    )
    .sort_values("global_idx")
    .reset_index(drop=True)
)

print("Behavior dropped frames (est):", len(beh_missing))
print("Neural dropped frames (est):", len(neu_missing))
print(aligned.head(20))

aligned.to_csv("aligned_frames.csv", index=False)

Behavior dropped frames (est): 8
Neural dropped frames (est): 12
    global_idx  recorded_idx_neu                       Timestamp_neu  \
0            0               0.0    2025-10-28 21:03:43.801088+00:00   
1            1               1.0 2025-10-28 21:03:43.828211200+00:00   
2            2               2.0 2025-10-28 21:03:43.860979200+00:00   
3            3               3.0    2025-10-28 21:03:43.894400+00:00   
4            4               4.0 2025-10-28 21:03:43.927180800+00:00   
5            5               5.0 2025-10-28 21:03:43.960332800+00:00   
6            6               6.0 2025-10-28 21:03:43.993779200+00:00   
7            7               7.0 2025-10-28 21:03:44.026483200+00:00   
8            8               8.0 2025-10-28 21:03:44.059724800+00:00   
9            9               9.0 2025-10-28 21:03:44.092838400+00:00   
10          10              10.0 2025-10-28 21:03:44.125798400+00:00   
11          11              11.0 2025-10-28 21:03:44.158310400+00:00   

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,4))
plt.plot(aligned.index, aligned['is_dropped_neu'].fillna(0).astype(int), label='Neural dropped', alpha=0.7)
plt.plot(aligned.index, aligned['is_dropped_beh'].fillna(0).astype(int), label='Behavior dropped', alpha=0.7)
plt.xlabel('Global frame index')
plt.ylabel('Dropped (1) / Recorded (0)')
plt.title('Dropped frames over time')
plt.legend()
plt.show()