In [3]:
import pandas as pd
import numpy as np
from pathlib import Path

# Resolve repo root (works when running from malca/notebooks)
HERE = Path.cwd().resolve()

def find_repo_root(start: Path) -> Path:
    for p in (start, *start.parents):
        if (p / "pyproject.toml").exists() and (p / "malca").is_dir():
            return p
    return start

REPO_ROOT = find_repo_root(HERE)
directory = REPO_ROOT / "input" / "skypatrol2"

if not directory.exists():
    raise FileNotFoundError(f"Missing directory: {directory}")

files = sorted(directory.glob("*.csv"))

results = []
for filepath in files:
    df = pd.read_csv(filepath, comment="#")  # Skip comment lines starting with #

    # Sort by JD and calculate gaps
    jd_sorted = df["JD"].sort_values().values
    gaps = np.diff(jd_sorted)

    p99 = np.percentile(gaps, 99.73)
    results.append({
        "filename": filepath.stem.replace("-light-curves", ""),
        "n_points": len(df),
        "p99_gap_days": p99,
    })

results_df = pd.DataFrame(results)
results_df


Unnamed: 0,filename,n_points,p99_gap_days
0,120259184943,1970,89.344004
1,223339338105,1755,91.050459
2,231929175915,1380,109.119571
3,25770019815,1562,97.111992
4,266288137752,1826,98.412852
5,317827964025,1182,114.415481
6,326417831663,1819,106.957149
7,335007754417,4032,56.722017
8,352187470767,2659,50.932327
9,360777377116,2658,70.734517
