In [3]:
import json, yaml
from pathlib import Path
import pandas as pd

# Load config (works with spaces in path)
CFG = Path("config.yaml")
cfg = yaml.safe_load(open(CFG, "r"))

TLGS = Path(cfg["paths"]["tlgs"])
BM   = Path(cfg["paths"]["benchmarks"])
OUTD = Path(cfg["output"]["dir"])
OUTD.mkdir(parents=True, exist_ok=True)

print("Reading:", TLGS)
tlgs = pd.read_parquet(TLGS)
print("Reading:", BM)
bench = pd.read_parquet(BM)

# Basic QC
errs = []
if not (0 <= tlgs["recovery"]).all() or not (tlgs["recovery"] <= 1).all():
    errs.append("Recovery values outside [0,1].")

if "observed_drug_tolerance_ugmL" in bench:
    if not (bench["observed_drug_tolerance_ugmL"] >= 0).all():
        errs.append("Negative observed_drug_tolerance_ugmL in benchmarks.")

qc = {
    "tlgs_rows": int(len(tlgs)),
    "tlgs_cols": tlgs.columns.tolist(),
    "bench_rows": int(len(bench)),
    "bench_cols": bench.columns.tolist(),
    "methods_in_bench": sorted(bench["method"].unique().tolist()) if "method" in bench else [],
    "errors": errs,
}

(OUTD / "qc_summary.json").write_text(json.dumps(qc, indent=2))
print("QC summary ->", OUTD / "qc_summary.json")
if errs:
    print("QC ERRORS:", errs)
else:
    print("QC OK")

Reading: /Users/cmontefusco/Coding projects/ada-panda-mini/reports/tlgs.parquet
Reading: /Users/cmontefusco/Coding projects/ada-panda-mini/reports/benchmarks.parquet
QC summary -> /Users/cmontefusco/Coding projects/regulatory-style-clinpharm-report/output/qc_summary.json
QC OK
