In [None]:
from pathlib import Path
import pandas as pd

# CONVERTS PARQUET INTO CSV

# base directory
PROC_DIR = Path.cwd().parents[1] / "data" / "01_processed" / "elset_history_aodr"

# first of each month from Jan-Aug 2025
days = [f"2025-{m:02d}-01" for m in range(1, 9)]
paths = [PROC_DIR / f"epoch_date={d}" for d in days]

OUT_DIR = PROC_DIR / "csv.out"
OUT_DIR.mkdir(exist_ok=True)

for p in paths:
    if not p.exists():
        print("Skip (missing):", p)
        continue

    # use .glob if files are only at the top level; use .rglob if nested
    files = sorted(p.glob("*.parquet"))      # change to .rglob("*.parquet") if needed
    if not files:
        print("No parquet files in", p)
        continue

    df = pd.concat((pd.read_parquet(fp) for fp in files), ignore_index=True)

    # (optional) add the epoch_date as a column and dedupe rows
    epoch_date = p.name.split("=", 1)[1]
    df["epoch_date"] = epoch_date
    # df = df.drop_duplicates(subset=["satNo","epoch"])  # tweak subset as needed

    out = OUT_DIR / f"{p.name}.csv"          # e.g., epoch_date=2025-06-01.csv
    df.to_csv(out, index=False)
    print("Wrote", out)

Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-01-01.csv
Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-02-01.csv
Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-03-01.csv
Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-04-01.csv
Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-05-01.csv
Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-06-01.csv
Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-07-01.csv
Wrote c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr\csv.out\epoch_date=2025-08-01.csv
