In [9]:
import json
from json import JSONDecodeError
from pathlib import Path
import pandas as pd

In [10]:

ROOT      = Path("../experiments/output")
ANALYSES  = {"entailment", "geval", "ragas", "hitl"}   # adjust if needed
OUTFILE   = Path("all_stats.csv")

In [11]:
def load_any(fp: Path):
    """Return list[dict] from .json, .jsonl or mis-labelled JSON-Lines."""
    txt = fp.read_text().strip()
    try:
        data = json.loads(txt)
        return data if isinstance(data, list) else [data]
    except JSONDecodeError:
        return [json.loads(line) for line in txt.splitlines() if line.strip()]


def dataset_from_file(fname: str, model: str) -> str:
    stem = fname.rsplit(".", 1)[0]
    tag  = f"_{model}.stats"
    return stem[:-len(tag)] if stem.endswith(tag) else "_".join(stem.split("_")[:2])


def flatten(d, parent="", sep="."):
    """Flatten nested dictionaries."""
    items = {}
    for k, v in d.items():
        new_key = f"{parent}{sep}{k}" if parent else k
        if isinstance(v, dict):
            items.update(flatten(v, new_key, sep=sep))
        else:
            items[new_key] = v
    return items

In [12]:
rows = []

for analysis in ANALYSES:
    adir = ROOT / analysis
    if not adir.exists():
        continue

    for model_dir in filter(Path.is_dir, adir.iterdir()):
        model = model_dir.name

        for fp in model_dir.iterdir():
            if not fp.name.endswith((".stats.json", ".stats.jsonl")):
                continue

            dataset = dataset_from_file(fp.name, model)

            for idx, stats_obj in enumerate(load_any(fp)):
                flat = flatten(stats_obj)

                # Remove 'distribution' for HITL
                if analysis == "hitl":
                    flat = {k: v for k, v in flat.items()
                            if not (k == "distribution" or k.startswith("distribution."))}

                # Prefix every metric key with the analysis name
                prefixed = {f"{analysis}_{k}": v for k, v in flat.items()}

                rows.append({
                    "analysis": analysis,
                    "model": model,
                    "dataset": dataset,
                    "row_id": idx,          # 0 for normal JSON; >0 for JSON-Lines
                    **prefixed,
                })

# -------------------------------------------------------------- save ---------
df = pd.DataFrame(rows)
df.to_csv(OUTFILE, index=False)
print(f"✓ wrote {OUTFILE} with {len(df)} rows and {df.shape[1]} columns")


✓ wrote all_stats.csv with 60 rows and 10 columns
