In [14]:
import pandas as pd
import numpy as np

# ------------------------------------------------------------
# Drop detection utilities
# ------------------------------------------------------------

def detect_missing_indices(df: pd.DataFrame, fps: float, tolerance_s: float, ts_col="Timestamp") -> list[int]:
    expected_dt = 1.0 / fps
    df = df.copy()
    df[ts_col] = pd.to_datetime(df[ts_col], utc=True)
    df = df.sort_values(ts_col).reset_index(drop=True)

    dt = df[ts_col].diff().dt.total_seconds()
    n_dropped_est = np.round(dt / expected_dt - 1).clip(lower=0).astype("Int64")

    drop_events = df[n_dropped_est >= 1]
    missing = []
    for i, k in zip(
        drop_events.index.to_list(),
        n_dropped_est.loc[drop_events.index].astype(int).to_list()
    ):
        missing.extend(range(i + 1, i + 1 + k))

    return missing


def insert_dropped_rows(
    df: pd.DataFrame,
    fps: float,
    missing_idx: list[int],
    ts_col="Timestamp",
):
    df = df.copy()
    df = df.sort_values(ts_col).reset_index(drop=True)
    df["recorded_idx"] = np.arange(len(df), dtype=int)

    expected_dt = pd.to_timedelta(1.0 / fps, unit="s")
    missing_set = set(missing_idx)

    rows = []
    for i in range(len(df)):
        rows.append({
            "global_idx": len(rows),
            "recorded_idx": i,
            ts_col: df.loc[i, ts_col],
            "is_dropped": False,
        })

        if (i + 1) in missing_set:
            k = 0
            j = i + 1
            while j in missing_set:
                k += 1
                j += 1

            last_ts = df.loc[i, ts_col]
            for kk in range(k):
                rows.append({
                    "global_idx": len(rows),
                    "recorded_idx": np.nan,
                    ts_col: last_ts + (kk + 1) * expected_dt,
                    "is_dropped": True,
                })

    filled = pd.DataFrame(rows)

    non_drop = filled["is_dropped"] == False
    filled.loc[non_drop, df.columns.difference([ts_col], sort=False)] = df.loc[
        filled.loc[non_drop, "recorded_idx"].astype(int).values,
        df.columns.difference([ts_col], sort=False)
    ].to_numpy()

    return filled


# ------------------------------------------------------------
# Paths / parameters
# ------------------------------------------------------------

beh_path = r"C:\Users\psych-aalab\Desktop\zenon_frametest\20251028\beh-cam_frame-id_0.csv"
neu_path = r"C:\Users\psych-aalab\Desktop\zenon_frametest\20251028\miniscope_frame-id_0.csv"

fps = 30
tolerance_s = 0.002


# ------------------------------------------------------------
# Load behavior + detect drops
# ------------------------------------------------------------

beh = pd.read_csv(beh_path)
beh["Timestamp"] = pd.to_datetime(beh["Timestamp"], utc=True)

beh_missing = detect_missing_indices(
    beh,
    fps=fps,
    tolerance_s=tolerance_s,
    ts_col="Timestamp",
)

beh_filled = insert_dropped_rows(
    beh,
    fps=fps,
    missing_idx=beh_missing,
    ts_col="Timestamp",
)


# ------------------------------------------------------------
# Load neural + detect drops
# ------------------------------------------------------------

neu = pd.read_csv(neu_path)
neu["Timestamp"] = pd.to_datetime(neu["Timestamp"], utc=True)

neu_missing = detect_missing_indices(
    neu,
    fps=fps,
    tolerance_s=tolerance_s,
    ts_col="Timestamp",
)

neu_filled = insert_dropped_rows(
    neu,
    fps=fps,
    missing_idx=neu_missing,
    ts_col="Timestamp",
)


# ------------------------------------------------------------
# Align by global_idx (side-by-side timestamps + NaNs)
# ------------------------------------------------------------

aligned = pd.merge_asof(
    neu_filled.sort_values("Timestamp").rename(columns={
        "Timestamp": "neu_ts",
        "recorded_idx": "neu_recorded_idx",
        "is_dropped": "neu_dropped",
    }),
    beh_filled.sort_values("Timestamp")[["Timestamp", "recorded_idx", "is_dropped"]].rename(columns={
        "Timestamp": "beh_ts",
        "recorded_idx": "beh_recorded_idx",
        "is_dropped": "beh_dropped",
    }),
    left_on="neu_ts",
    right_on="beh_ts",
    direction="nearest",
    tolerance=pd.Timedelta(milliseconds=20),
)


print("Behavior dropped frames (est):", len(beh_missing))
print("Neural dropped frames (est):", len(neu_missing))
print(aligned.head(20))

aligned.to_csv("aligned_frames.csv", index=False)


Behavior dropped frames (est): 8
Neural dropped frames (est): 12
    global_idx  neu_recorded_idx                              neu_ts  \
0            0               0.0    2025-10-28 21:03:43.801088+00:00   
1            1               1.0 2025-10-28 21:03:43.828211200+00:00   
2            2               2.0 2025-10-28 21:03:43.860979200+00:00   
3            3               3.0    2025-10-28 21:03:43.894400+00:00   
4            4               4.0 2025-10-28 21:03:43.927180800+00:00   
5            5               5.0 2025-10-28 21:03:43.960332800+00:00   
6            6               6.0 2025-10-28 21:03:43.993779200+00:00   
7            7               7.0 2025-10-28 21:03:44.026483200+00:00   
8            8               8.0 2025-10-28 21:03:44.059724800+00:00   
9            9               9.0 2025-10-28 21:03:44.092838400+00:00   
10          10              10.0 2025-10-28 21:03:44.125798400+00:00   
11          11              11.0 2025-10-28 21:03:44.158310400+00:00   

In [15]:
import os
print(os.getcwd())

c:\Users\psych-aalab\Documents\GitHub\1p_pipeline


In [16]:
git rev-parse --show-toplevel

SyntaxError: invalid syntax (2282355244.py, line 1)