In [5]:
import jsonlines
import random

topics = [
    'abstract_algebra',
    'astronomy',
    'college_biology',
    'college_chemistry',
    'college_computer_science',
    'college_mathematics',
    'college_physics',
    'computer_security',
    'elementary_mathematics',
    'global_facts',
    'high_school_biology',
    'high_school_chemistry',
    'high_school_computer_science',
    'high_school_mathematics',
    'high_school_physics',
    'high_school_statistics',
    'human_aging',
    'human_sexuality',
    'miscellaneous',
    'high_school_world_history',
    'machine_learning',
]
models = [
    'Mixtral-8x7B-Instruct-v0.1',
    'Mistral-7B-Instruct-v0.2',
    'Llama-2-13b-chat-hf',
    'Llama-2-70b-chat-hf'
]

for topic in topics:
    raw_path = f"_raw/{topic}.jsonl"
    with jsonlines.open(raw_path) as reader:
        raw_lines = list(reader)
    random.shuffle(raw_lines)
    partitions = {
        "train": raw_lines[:40],
        "test": raw_lines[40:]
    }
    for model in models:
        for partition in partitions:
            raw_lines = partitions[partition]
            lines = [{
                "question": line["question"],
                "choices": line["choices"],
                "answer": line["answer"],
                model: {
                    "answer": line["models"][model]["answer"],
                    "reasoning": line["models"][model]["reasoning"]
                }
            } for line in raw_lines]
            with jsonlines.open(f"{topic}/{model}-{partition}.jsonl", "w") as writer:
                writer.write_all(lines)

In [2]:
from new_data import *

topics = [
    'abstract_algebra',
    'astronomy',
    'college_biology',
    'college_chemistry',
    'college_computer_science',
    'college_mathematics',
    'college_physics',
    'computer_security',
    'elementary_mathematics',
    'global_facts',
    'high_school_biology',
    'high_school_chemistry',
    'high_school_computer_science',
    'high_school_mathematics',
    'high_school_physics',
    'high_school_statistics',
    'human_aging',
    'human_sexuality',
    'miscellaneous',
    'high_school_world_history',
    'machine_learning',
]
models = [
    'Mixtral-8x7B-Instruct-v0.1',
    'Mistral-7B-Instruct-v0.2',
    'Llama-2-13b-chat-hf',
    'Llama-2-70b-chat-hf'
]

for topic in topics:
    s = None
    for model in models:
        test_batch = load_mmlu_batches(".", topic, model, "test", [60])
        q_set = set([e[0] for e in test_batch[0]])
        if s is None:
            s = q_set
        else:
            assert s == q_set

In [4]:
# get the accuracy for generated dataset
from core.data import *


def get_accuracy(topic: str, model: str, batch_nums: List[int]):
    accs = []
    for b in load_mmlu_batches('.', topic, model, "train", batch_nums[:-1]):
        accs.append(b.get_accuracy())
    for b in load_mmlu_batches('.', topic, model, "test", [batch_nums[-1]]):
        accs.append(b.get_accuracy())
    return accs


batch_nums = [8] * 5 + [60]

topics = [
    # 'abstract_algebra',
    # 'astronomy',
    # 'college_biology',
    # 'college_chemistry',
    # 'college_computer_science',
    'college_mathematics',
    # 'college_physics',
    # 'computer_security',
    # 'elementary_mathematics',
    # 'global_facts',
    'high_school_biology',
    'high_school_chemistry',
    # 'high_school_computer_science',
    'high_school_mathematics',
    'high_school_physics',
    # 'high_school_statistics',
    # 'human_aging',
    # 'human_sexuality',
    # 'miscellaneous',
    'high_school_world_history',
    'machine_learning',
]
models = [
    "gemma-1.1-7b-it",
    "gpt-3.5-turbo",
    "gpt-4-turbo",
    "gpt-4o",
    "Meta-Llama-3-8B-Instruct",
    "Meta-Llama-3-70B-Instruct",
    'Mistral-7B-Instruct-v0.2',
    'Mixtral-8x7B-Instruct-v0.1',
]
for topic in topics:
    print(topic)
    for model in models:
        accs = get_accuracy(topic, model, batch_nums)
        print(f"\t{model}\tAccuracy: {accs}\tOracle: {[max(a, 1 - a) for a in accs]}")

In [12]:
# re-shuffle the data
from data import *
import jsonlines

topics = [
    'abstract_algebra',
    'astronomy',
    'college_biology',
    'college_chemistry',
    'college_computer_science',
    'college_mathematics',
    'college_physics',
    'computer_security',
    'elementary_mathematics',
    'global_facts',
    'high_school_biology',
    'high_school_chemistry',
    'high_school_computer_science',
    'high_school_mathematics',
    'high_school_physics',
    'high_school_statistics',
    'human_aging',
    'human_sexuality',
    'miscellaneous',
    'high_school_world_history',
    'machine_learning',
]

for topic in topics:
    print(topic)
    bs = load_mmlu_batches('../datasets/mmlu/', topic, [40, 60])
    assert len(bs) == 2
    b = bs[0] + bs[1]
    b.shuffle()
    train = {}
    test = {}
    for model in b.raw:
        train[model] = b.raw[model][:40]
        test[model] = b.raw[model][40:]
    for model in train:
        for i in range(len(train[model])):
            train[model][i] = {
                'question': train[model][i][0],
                'choices' : train[model][i][1],
                'answer'  : train[model][i][2],
                model     : {
                    'answer'   : train[model][i][3],
                    'reasoning': train[model][i][4]
                }
            }
    for model in test:
        for i in range(len(test[model])):
            test[model][i] = {
                'question': test[model][i][0],
                'choices' : test[model][i][1],
                'answer'  : test[model][i][2],
                model     : {
                    'answer'   : test[model][i][3],
                    'reasoning': test[model][i][4]
                }
            }
    # write to file
    for model in b.raw:
        with jsonlines.open(
                f'../datasets/mmlu/{topic}/{topic}_{model}_train.jsonl',
                'w') as writer:
            writer.write_all(train[model])
        with jsonlines.open(
                f'../datasets/mmlu/{topic}/{topic}_{model}_test.jsonl',
                'w') as writer:
            writer.write_all(test[model])

In [8]:
# transform datasets (mmlu)
import jsonlines
import os
import random

dataset_path = '../datasets/mmlu/_raw'
dataset_out_path = '../datasets/mmlu'
files = [f for f in os.listdir(dataset_path) if f.endswith('.jsonl')]
for f in files:
    topic = f[:-6]
    # print(topic)
    # create folder for each topic
    topic_path = os.path.join(dataset_out_path, topic)
    os.makedirs(topic_path, exist_ok=True)
    with jsonlines.open(os.path.join(dataset_path, f), 'r') as lines:
        raw = [(obj['question'], obj['choices'], obj['answer'], obj['models'])
               for obj in lines]
    for model in raw[0][3]:
        # print(model)
        n = len(raw)
        model_raw = [(r[0], r[1], r[2], r[3][model]) for r in raw]
        correct_raw = [r for r in model_raw if r[2] == r[3]['answer']]
        wrong_raw = [r for r in model_raw if r[2] != r[3]['answer']]
        # split correct_raw by ratio
        ratio = 0.4
        correct_train = correct_raw[:int(len(correct_raw) * ratio)]
        correct_test = correct_raw[int(len(correct_raw) * ratio):]
        # split wrong_raw by ratio
        wrong_train = wrong_raw[:int(len(wrong_raw) * ratio)]
        wrong_test = wrong_raw[int(len(wrong_raw) * ratio):]
        train = correct_train + wrong_train
        test = correct_test + wrong_test
        random.shuffle(train)
        random.shuffle(test)
        # balance the number of entries of train and test
        while len(train) < ratio * n:
            train.append(test.pop())
        while len(test) < (1 - ratio) * n:
            test.append(train.pop())
        with jsonlines.open(
                os.path.join(topic_path, f'{topic}_{model}_train.jsonl'), 'w') as writer:
            new_lines = [{
                'question': r[0],
                'choices' : r[1],
                'answer'  : r[2],
                model     : r[3]
            } for r in train]
            writer.write_all(new_lines)
        with jsonlines.open(
                os.path.join(topic_path, f'{topic}_{model}_test.jsonl'),
                'w') as writer:
            new_lines = [{
                'question': r[0],
                'choices' : r[1],
                'answer'  : r[2],
                model     : r[3]
            } for r in test]
            writer.write_all(new_lines)