In [3]:
#!/usr/bin/env python3

import pandas as pd
import glob
import os

# ---- CONFIG ----
FMG_DIR = "."   # change if needed
OUTPUT_FILE = "fmg_summary.csv"
PATTERN = "*.fmg.g226.k21.csv"
# ----------------

rows = []

for fmg_path in sorted(glob.glob(os.path.join(FMG_DIR, PATTERN))):
    df = pd.read_csv(fmg_path)

    if df.empty:
        continue

    # infer sample name (SRRxxxxxx)
    sample_id = df.loc[0, "query_name"]
    if sample_id == "-" or pd.isna(sample_id):
        sample_id = os.path.basename(fmg_path).split(".")[0]

    summary = {
        "sample_id": sample_id,
        "n_matched_genomes": df["match_name"].nunique(),
        "avg_f_match": df["f_match"].mean(),
        "avg_median_abund": df["median_abund"].mean(),
        "avg_n_unique_weighted_found": df["n_unique_weighted_found"].mean(),
    }

    rows.append(summary)

summary_df = pd.DataFrame(rows).sort_values("sample_id")
summary_df.to_csv(OUTPUT_FILE, index=False)

print(f"Wrote {OUTPUT_FILE} with {len(summary_df)} samples")


Wrote fmg_summary.csv with 1901 samples
