In [1]:
import os
import json

In [2]:
ENTAIL_DIR      = 'experiments/output/entailment'
ORIG_GEval_DIR  = 'experiments/output/geval'
NEW_GEval_DIR   = 'experiments/output/geval_updated'

In [3]:

for root, _, files in os.walk(ENTAIL_DIR):
    for name in files:
        if not name.endswith('.results.json'):
            continue

        # paths
        entail_path = os.path.join(root, name)
        rel         = os.path.relpath(entail_path, ENTAIL_DIR)
        orig_path   = os.path.join(ORIG_GEval_DIR, rel)
        new_path    = os.path.join(NEW_GEval_DIR, rel)

        if not os.path.isfile(orig_path):
            print(f"⚠️  Missing g-eval file for {rel}")
            continue

        # load
        entail = json.load(open(entail_path))
        geval  = json.load(open(orig_path))

        # sanity check
        if len(entail) != len(geval):
            print(f"⚠️  Length mismatch in {rel}")
            continue

        for i, (e_e, e_g) in enumerate(zip(entail, geval)):
            if e_e['statement'] != e_g.get('statement'):
                raise ValueError(f"Statement mismatch at index {i} in {rel}")
            e_g['id'] = e_e['id']

        # ensure target dir exists
        os.makedirs(os.path.dirname(new_path), exist_ok=True)
        # write updated file
        with open(new_path, 'w') as f:
            json.dump(geval, f, indent=2)

        print(f"✅  Wrote {new_path}")

✅  Wrote experiments/output/geval_updated/mistral_7b_cot/politi_hop_mistral_7b_cot.results.json
✅  Wrote experiments/output/geval_updated/mistral_7b_cot/hover_train_mistral_7b_cot.results.json
✅  Wrote experiments/output/geval_updated/mistral_7b_cot/covid_fact_mistral_7b_cot.results.json
✅  Wrote experiments/output/geval_updated/mistral_7b_non_cot/hover_train_mistral_7b_no_cot.results.json
✅  Wrote experiments/output/geval_updated/mistral_7b_non_cot/covid_fact_mistral_7b_no_cot.results.json
✅  Wrote experiments/output/geval_updated/mistral_7b_non_cot/politi_hop_mistral_7b_no_cot.results.json
✅  Wrote experiments/output/geval_updated/gpt4o_cot/covid_fact_gpt4o_cot.results.json
✅  Wrote experiments/output/geval_updated/gpt4o_cot/hover_train_gpt4o_cot.results.json
✅  Wrote experiments/output/geval_updated/gpt4o_cot/politi_hop_gpt4o_cot.results.json
✅  Wrote experiments/output/geval_updated/deepseek_r1_32b_cot/covid_fact_depseek_r1_cot.results.json
✅  Wrote experiments/output/geval_updated