In [1]:
#!/usr/bin/env python3

import pandas as pd
import glob
import os

# ---- CONFIG ----
GRIST_DIR = "."   # change if needed
OUTPUT_FILE = "grist_summary.csv"
PATTERN = "*.summary.csv"
# ----------------

rows = []

for grist_path in sorted(glob.glob(os.path.join(GRIST_DIR, PATTERN))):
    df = pd.read_csv(grist_path)

    if df.empty:
        continue

    # infer sample_id (SRRxxxxxx)
    sample_id = df.loc[0, "sample_id"]
    if pd.isna(sample_id):
        sample_id = os.path.basename(grist_path).replace(".summary.csv", "")

    summary = {
        "sample_id": sample_id,
        "n_matched_genomes": df["genome_id"].nunique(),
        "avg_f_covered_bp": df["f_covered_bp"].mean(),
        "avg_effective_coverage": df["effective_coverage"].mean(),
        "avg_n_mapped_reads": df["n_mapped_reads"].mean(),
    }

    rows.append(summary)

summary_df = pd.DataFrame(rows).sort_values("sample_id")
summary_df.to_csv(OUTPUT_FILE, index=False)

print(f"Wrote {OUTPUT_FILE} with {len(summary_df)} samples")


Wrote grist_summary.csv with 83 samples
