In [13]:
import os
import json
from pathlib import Path
from json import JSONDecodeError

In [17]:
ROOT = Path("../experiments/output")
ANALYSES = {"entailment", "geval", "ragas"}
OUT_PARENT = ROOT / "merged"

SHARED_FIELDS = {
    "id",
    "statement",
    "evidences",
    "label",
    "model_verdict",
    "explanation",
    "confidence",
}

STRICT_MATCH = {"id", "statement"}

In [15]:
def load_results(fp: Path):
    """Return list[dict] from .json, .jsonl or mis-labelled JSON-Lines."""
    txt = fp.read_text().strip()
    try:
        data = json.loads(txt)
        return data if isinstance(data, list) else [data]
    except JSONDecodeError:
        return [json.loads(line) for line in txt.splitlines() if line.strip()]


def dataset_from_file(fname: str, model: str) -> str:
    """Derive dataset name by stripping ‘_<model>.results|stats’."""
    stem = fname.rsplit(".", 1)[0]                       # no extension
    for suf in (".results", ".stats"):
        tag = f"_{model}{suf}"
        if stem.endswith(tag):
            return stem[:-len(tag)]
    return "_".join(stem.split("_")[:2])                 # fallback


def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

In [18]:
merged = {}   # model ➜ dataset ➜ {records, dataset_stats}

for analysis in ANALYSES:
    for model_dir in (ROOT / analysis).glob("*"):
        if not model_dir.is_dir():
            continue
        model = model_dir.name

        for file in model_dir.iterdir():
            if file.suffix not in {".json", ".jsonl"}:
                continue
            is_results = file.name.endswith(".results" + file.suffix)
            is_stats   = file.name.endswith(".stats"   + file.suffix)
            if not (is_results or is_stats):
                continue

            dataset = dataset_from_file(file.name, model)
            entry = merged.setdefault(model, {}).setdefault(
                dataset, {"records": {}, "dataset_stats": {}}
            )

            if is_results:                         # record-level stuff
                for item in load_results(file):
                    rec = entry["records"].setdefault(item["id"], {"id": item["id"]})

                    # shared fields
                    for field in SHARED_FIELDS:
                        if field not in item:
                            continue
                        if field in STRICT_MATCH:
                            if field in rec and rec[field] != item[field]:
                                raise ValueError(
                                    f"Mismatch in {field} for {item['id']}"
                                )
                        rec.setdefault(field, item[field])

                    # analysis-specific fields → prefixed
                    for k, v in item.items():
                        if k not in SHARED_FIELDS:
                            rec[f"{analysis}_{k}"] = v

            else:                                  # dataset-level stats
                entry["dataset_stats"][analysis] = load_results(file)

# write out
for model, datasets in merged.items():
    out_dir = OUT_PARENT / model
    ensure_dir(out_dir)
    for dataset, content in datasets.items():
        path = out_dir / f"{dataset}_{model}_merged.json"
        json.dump(
            {
                "model": model,
                "dataset": dataset,
                "records": list(content["records"].values()),
                "dataset_stats": content["dataset_stats"],
            },
            path.open("w"),
            indent=2,
        )
        print("✓", path)

✓ ../experiments/output/merged/mistral_7b_cot/politi_hop_mistral_7b_cot_merged.json
✓ ../experiments/output/merged/mistral_7b_cot/hover_train_mistral_7b_cot_merged.json
✓ ../experiments/output/merged/mistral_7b_cot/covid_fact_mistral_7b_cot_merged.json
✓ ../experiments/output/merged/mistral_7b_non_cot/politi_hop_mistral_7b_non_cot_merged.json
✓ ../experiments/output/merged/mistral_7b_non_cot/hover_train_mistral_7b_non_cot_merged.json
✓ ../experiments/output/merged/mistral_7b_non_cot/covid_fact_mistral_7b_non_cot_merged.json
✓ ../experiments/output/merged/gpt4o_cot/covid_fact_gpt4o_cot_merged.json
✓ ../experiments/output/merged/gpt4o_cot/hover_train_gpt4o_cot_merged.json
✓ ../experiments/output/merged/gpt4o_cot/politi_hop_gpt4o_cot_merged.json
✓ ../experiments/output/merged/deepseek_r1_32b_cot/covid_fact_deepseek_r1_32b_cot_merged.json
✓ ../experiments/output/merged/deepseek_r1_32b_cot/hover_train_deepseek_r1_32b_cot_merged.json
✓ ../experiments/output/merged/deepseek_r1_32b_cot/politi_