In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# Resolve repo root (works when running from malca/notebooks)
HERE = Path.cwd().resolve()

def find_repo_root(start: Path) -> Path:
    for p in (start, *start.parents):
        if (p / "pyproject.toml").exists() and (p / "malca").is_dir():
            return p
    return start

REPO_ROOT = find_repo_root(HERE)
directory = REPO_ROOT / "input" / "skypatrol2"

if not directory.exists():
    raise FileNotFoundError(f"Missing directory: {directory}")

files = sorted(directory.glob("*.csv"))

results = []
for filepath in files:
    df = pd.read_csv(filepath, comment="#")  # Skip comment lines starting with #

    # Sort by JD and calculate gaps
    jd_sorted = df["JD"].sort_values().values
    gaps = np.diff(jd_sorted)

    p99 = np.percentile(gaps, 99.73)
    results.append({
        "filename": filepath.stem.replace("-light-curves", ""),
        "n_points": len(df),
        "p99_gap_days": p99,
    })

results_df = pd.DataFrame(results)
results_df
