# Combine multiple datasets into one json

In [4]:
import json
from pathlib import Path
from glob import glob
datasets = ["scienceqa"]#["alpaca", "mmlu", "naturalqa", "triviaqa", "math"]
num_samples = 2000
testcases = ["13b", "7b"]#["13b", "brief_13b", "brief+_13b"]

for tc in testcases:
    output_dir = f"sample_{num_samples}_science_vs_7b"
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    combined = []
    for dataset in datasets:
        dataset_samples = "all" if dataset == "math" else 5000
        file_path = f"../{dataset}/samples_{num_samples}_seed_0/{dataset}_{dataset_samples}_{tc}.json"
        assert len(glob(file_path)) == 1, "More than one file found!"
        path = glob(file_path)[0]
        with open(path) as f:
            read = json.load(f)
        combined.extend(read)
    with open(f"{output_dir}/combined_{tc}.json", "w") as f:
        json.dump(combined, f, indent=4)

# Analyze the GPT4 evaluation

In [3]:
import json
files = ["summary_2000.json"]
for f_name in files:
    with open(f_name) as f:
        read = json.load(f)
    for key, item in read.items():
        count = {0: 0, 1: 0, 2: 0}
        for prompt_dict in item:
            count[prompt_dict["assistance_lvl"]] += 1
        print(f"{f_name} -- {key}: {count}")


summary_2000.json -- scienceqa: {0: 482, 1: 690, 2: 828}


# Merge ScienceQA into the previous evaluation

In [3]:
import json

summary = "details"
with open(f"assessment_results/vs_7b/details_2000.json") as f:
    A = json.load(f)
with open("assessment_results/science_vs_7b/details_2000.json") as f:
    B = json.load(f)

if summary == "summary":
    C = {**A, **B}
elif summary == "details":
    C = A + B

with open("assessment_results/vs_7b/details_2000.json", "w") as f:
    json.dump(C, f, indent=4)


# Correct some errors in assessment_results

In [1]:
import json

summary = {}
with open("assessment_results/vs_7b/details_2000.json") as f:
    annotations = json.load(f)
for item in annotations:
    winner = item["raw_completion"][8]
    if winner != "1" and winner != "2":
        print(f"Invalid winner: {item["raw_completion"]}")
        winner = "1"
    dataset = item["dataset_1"]
    winner_key = f"generator_{winner}"
    if "_7b" in item[winner_key]:
        winner_prompt = 1
    elif "_13b" in item[winner_key]:
        winner_prompt = 0
    else:
        raise ValueError(f"Invalid prompt: {item[winner_key]}")
    if dataset not in summary:
        summary[dataset] = []
    summary[dataset].append({
        "instruction": item["instruction"],
        "assistance_lvl": winner_prompt,
    })
with open("assessment_results/vs_7b/summary_2000.json", "w") as f:
    json.dump(summary, f, indent=4)

Invalid winner: r


In [2]:
import json

summary = {}
with open("assessment_results/science/details_2000.json") as f:
    annotations = json.load(f)
for item in annotations:
    winner = item["raw_completion"][8]
    if winner != "1" and winner != "2" and winner != "3":
        print(f"Invalid winner: {winner}")
        continue
    dataset = item["dataset_1"]
    winner_key = f"generator_{winner}"
    if "brief_" in item[winner_key]:
        winner_prompt = 1
    elif "brief+_" in item[winner_key]:
        winner_prompt = 2
    else:
        winner_prompt = 0
    if dataset not in summary:
        summary[dataset] = []
    summary[dataset].append({
        "instruction": item["instruction"],
        "assistance_lvl": winner_prompt,
    })
with open("summary_2000.json", "w") as f:
    json.dump(summary, f, indent=4)
