In [1]:
import pandas as pd
from datasets import load_dataset
from tqdm import tqdm
import pickle

In [2]:
def equally_spaced_sampling(ordered_list, num_samples):
    if num_samples > len(ordered_list):
        raise ValueError("Number of samples requested is more than the number of elements in the list")
    step = len(ordered_list) // num_samples
    return ordered_list[::step][:num_samples]

In [3]:
n=60

In [4]:
models_names = pd.read_csv('20230904.csv').Model
models_names = list(equally_spaced_sampling(models_names, n))
models_names[:10]

['uni-tianyan/Uni-TianYan',
 'garage-bAInd/Camel-Platypus2-70B',
 'TheBloke/llama-2-70b-Guanaco-QLoRA-fp16',
 'WizardLM/WizardMath-70B-V1.0',
 'Weyaxi/llama-2-alpacagpt4-1000step',
 'jondurbin/airoboros-65b-gpt4-1.4-peft',
 'TheBloke/robin-65b-v2-fp16',
 'yeontaek/llama-2-13B-ensemble-v5',
 'uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b',
 'Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf']

In [5]:
models = []
for m in models_names:
    creator, model = tuple(m.split("/")) 
    models.append('https://huggingface.co/datasets/open-llm-leaderboard/details_{:}__{:}'.format(creator, model))

In [6]:
scenarios = ['harness_arc_challenge_25', 'harness_hellaswag_10',
             'harness_hendrycksTest_5', 'harness_truthfulqa_mc_0',
             "harness_winogrande_5", "harness_gsm8k_5"]
      
mmlu_subscenarios = ['harness_hendrycksTest_abstract_algebra_5', 'harness_hendrycksTest_anatomy_5', 
                     'harness_hendrycksTest_astronomy_5', 'harness_hendrycksTest_business_ethics_5', 
                     'harness_hendrycksTest_clinical_knowledge_5', 'harness_hendrycksTest_college_biology_5', 
                     'harness_hendrycksTest_college_chemistry_5', 'harness_hendrycksTest_college_computer_science_5', 
                     'harness_hendrycksTest_college_mathematics_5', 'harness_hendrycksTest_college_medicine_5', 
                     'harness_hendrycksTest_college_physics_5', 'harness_hendrycksTest_computer_security_5', 
                     'harness_hendrycksTest_conceptual_physics_5', 'harness_hendrycksTest_econometrics_5', 
                     'harness_hendrycksTest_electrical_engineering_5', 'harness_hendrycksTest_elementary_mathematics_5', 
                     'harness_hendrycksTest_formal_logic_5', 'harness_hendrycksTest_global_facts_5', 
                     'harness_hendrycksTest_high_school_biology_5', 'harness_hendrycksTest_high_school_chemistry_5', 
                     'harness_hendrycksTest_high_school_computer_science_5', 'harness_hendrycksTest_high_school_european_history_5', 
                     'harness_hendrycksTest_high_school_geography_5', 'harness_hendrycksTest_high_school_government_and_politics_5', 
                     'harness_hendrycksTest_high_school_macroeconomics_5', 'harness_hendrycksTest_high_school_mathematics_5', 
                     'harness_hendrycksTest_high_school_microeconomics_5', 'harness_hendrycksTest_high_school_physics_5', 
                     'harness_hendrycksTest_high_school_psychology_5', 'harness_hendrycksTest_high_school_statistics_5',
                     'harness_hendrycksTest_high_school_us_history_5', 'harness_hendrycksTest_high_school_world_history_5', 
                     'harness_hendrycksTest_human_aging_5', 'harness_hendrycksTest_human_sexuality_5', 
                     'harness_hendrycksTest_international_law_5', 'harness_hendrycksTest_jurisprudence_5', 
                     'harness_hendrycksTest_logical_fallacies_5', 'harness_hendrycksTest_machine_learning_5', 
                     'harness_hendrycksTest_management_5', 'harness_hendrycksTest_marketing_5', 
                     'harness_hendrycksTest_medical_genetics_5', 'harness_hendrycksTest_miscellaneous_5',
                     'harness_hendrycksTest_moral_disputes_5', 'harness_hendrycksTest_moral_scenarios_5', 
                     'harness_hendrycksTest_nutrition_5', 'harness_hendrycksTest_philosophy_5', 
                     'harness_hendrycksTest_prehistory_5', 'harness_hendrycksTest_professional_accounting_5',
                     'harness_hendrycksTest_professional_law_5', 'harness_hendrycksTest_professional_medicine_5', 
                     'harness_hendrycksTest_professional_psychology_5', 'harness_hendrycksTest_public_relations_5',
                     'harness_hendrycksTest_security_studies_5', 'harness_hendrycksTest_sociology_5', 
                     'harness_hendrycksTest_us_foreign_policy_5', 'harness_hendrycksTest_virology_5', 
                     'harness_hendrycksTest_world_religions_5']


with open('data.pickle', 'rb') as handle:
    data2 = pickle.load(handle)

In [7]:
data = {}
for model2 in tqdm(models):
    model = model2.replace('https://huggingface.co/datasets/','')
    data[model] = {}
    for s in mmlu_subscenarios+scenarios:
        data[model][s] = None

100%|██████████| 60/60 [00:00<00:00, 103605.70it/s]


In [8]:
skipped = 0
log = []
for model2 in tqdm(models):
    skipped_aux=0
    model = model2.replace('https://huggingface.co/datasets/','')
    for s in mmlu_subscenarios+scenarios:
        if 'arc' in s: metric = 'acc_norm'
        elif 'hellaswag' in s: metric = 'acc_norm'
        elif 'truthfulqa' in s: metric = 'mc2'
        else: metric = 'acc'

        try:
            data[model][s] = load_dataset(model, s, split='latest')[metric]
            print("\nOK {:} {:}\n".format(model,s))
            log.append("\nOK {:} {:}\n".format(model,s))
        except:
            data[model][s] = None
            print("\nSKIP {:} {:}\n".format(model,s))
            skipped_aux+=1
            log.append("\nSKIP {:} {:}\n".format(model,s))

    if skipped_aux>0: skipped+=1
        
    with open('leaderboard_raw.pickle', 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    print("\nModels skipped so far: {:}\n".format(skipped))

  0%|          | 0/60 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

  table = cls._concat_blocks(blocks, axis=0)



OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

  2%|▏         | 1/60 [02:37<2:35:03, 157.69s/it]


OK open-llm-leaderboard/details_uni-tianyan__Uni-TianYan harness_gsm8k_5


Models skipped so far: 0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

  3%|▎         | 2/60 [05:12<2:30:46, 155.98s/it]


OK open-llm-leaderboard/details_garage-bAInd__Camel-Platypus2-70B harness_gsm8k_5


Models skipped so far: 0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

  5%|▌         | 3/60 [07:50<2:29:06, 156.95s/it]


OK open-llm-leaderboard/details_TheBloke__llama-2-70b-Guanaco-QLoRA-fp16 harness_gsm8k_5


Models skipped so far: 0


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_hendrycksTest_college_computer_science_5


SKIP open-llm-leaderboard/detail

  7%|▋         | 4/60 [10:09<2:20:00, 150.01s/it]


OK open-llm-leaderboard/details_WizardLM__WizardMath-70B-V1.0 harness_gsm8k_5


Models skipped so far: 1


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_hendrycksTest_college_computer_science

  8%|▊         | 5/60 [12:22<2:11:34, 143.53s/it]


OK open-llm-leaderboard/details_Weyaxi__llama-2-alpacagpt4-1000step harness_gsm8k_5


Models skipped so far: 2



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 10%|█         | 6/60 [15:01<2:13:55, 148.80s/it]


OK open-llm-leaderboard/details_jondurbin__airoboros-65b-gpt4-1.4-peft harness_gsm8k_5


Models skipped so far: 2



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 12%|█▏        | 7/60 [17:36<2:13:30, 151.14s/it]


OK open-llm-leaderboard/details_TheBloke__robin-65b-v2-fp16 harness_gsm8k_5


Models skipped so far: 2



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


SKIP open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


SKIP open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 13%|█▎        | 8/60 [20:19<2:13:59, 154.61s/it]


SKIP open-llm-leaderboard/details_yeontaek__llama-2-13B-ensemble-v5 harness_gsm8k_5


Models skipped so far: 3



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 15%|█▌        | 9/60 [23:02<2:13:50, 157.47s/it]


OK open-llm-leaderboard/details_uukuguy__speechless-llama2-hermes-orca-platypus-wizardlm-13b harness_gsm8k_5


Models skipped so far: 3



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 17%|█▋        | 10/60 [25:38<2:10:50, 157.01s/it]


OK open-llm-leaderboard/details_Yhyu13__oasst-rlhf-2-llama-30b-7k-steps-hf harness_gsm8k_5


Models skipped so far: 3



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 18%|█▊        | 11/60 [28:13<2:07:42, 156.39s/it]


OK open-llm-leaderboard/details_Gryphe__MythoMix-L2-13b harness_gsm8k_5


Models skipped so far: 3



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 20%|██        | 12/60 [30:49<2:05:02, 156.30s/it]


OK open-llm-leaderboard/details_yeontaek__Platypus2xOpenOrca-13B-IA3-v4 harness_gsm8k_5


Models skipped so far: 3



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 22%|██▏       | 13/60 [33:28<2:03:02, 157.07s/it]


OK open-llm-leaderboard/details_augtoma__qCammel-13 harness_gsm8k_5


Models skipped so far: 3



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 23%|██▎       | 14/60 [36:12<2:02:03, 159.20s/it]


OK open-llm-leaderboard/details_Undi95__ReMM-L2-13B harness_gsm8k_5


Models skipped so far: 3


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_hendryck

 25%|██▌       | 15/60 [38:27<1:53:54, 151.87s/it]


OK open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b harness_gsm8k_5


Models skipped so far: 4



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 27%|██▋       | 16/60 [41:03<1:52:15, 153.09s/it]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-LoRa harness_gsm8k_5


Models skipped so far: 4



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 28%|██▊       | 17/60 [43:43<1:51:15, 155.24s/it]


OK open-llm-leaderboard/details_duliadotio__dulia-13b-8k-alpha harness_gsm8k_5


Models skipped so far: 4



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 30%|███       | 18/60 [46:13<1:47:24, 153.43s/it]


OK open-llm-leaderboard/details_yeontaek__Platypus2-13B-QLoRa harness_gsm8k_5


Models skipped so far: 4


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_hendrycksTest_college_computer_science_5


SKIP open-llm-leaderboard/d

 32%|███▏      | 19/60 [49:34<1:54:46, 167.96s/it]


OK open-llm-leaderboard/details_meta-llama__Llama-2-13b-chat-hf harness_gsm8k_5


Models skipped so far: 5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 33%|███▎      | 20/60 [52:01<1:47:37, 161.44s/it]


OK open-llm-leaderboard/details_CalderaAI__13B-Ouroboros harness_gsm8k_5


Models skipped so far: 5


SKIP open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/deta

 35%|███▌      | 21/60 [54:05<1:37:40, 150.26s/it]


OK open-llm-leaderboard/details_openaccess-ai-collective__manticore-13b-chat-pyg harness_gsm8k_5


Models skipped so far: 6



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 37%|███▋      | 22/60 [56:38<1:35:46, 151.24s/it]


OK open-llm-leaderboard/details_TheBloke__vicuna-13B-1.1-HF harness_gsm8k_5


Models skipped so far: 6


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_hendrycksTest_college_computer_science_5


SKIP open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k

 38%|███▊      | 23/60 [58:39<1:27:39, 142.15s/it]


OK open-llm-leaderboard/details_Open-Orca__LlongOrca-7B-16k harness_gsm8k_5


Models skipped so far: 7



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 40%|████      | 24/60 [1:01:30<1:30:22, 150.61s/it]


OK open-llm-leaderboard/details_luffycodes__mcq-hal-vicuna-13b-v1.5 harness_gsm8k_5


Models skipped so far: 7



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 42%|████▏     | 25/60 [1:04:03<1:28:15, 151.31s/it]


OK open-llm-leaderboard/details_jondurbin__airoboros-l2-13b-gpt4-2.0 harness_gsm8k_5


Models skipped so far: 7



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 43%|████▎     | 26/60 [1:06:40<1:26:46, 153.12s/it]


OK open-llm-leaderboard/details_lmsys__vicuna-7b-v1.5 harness_gsm8k_5


Models skipped so far: 7



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 45%|████▌     | 27/60 [1:09:14<1:24:20, 153.34s/it]


OK open-llm-leaderboard/details_PocketDoc__Dans-PileOfSets-Mk1-llama-13b-merged harness_gsm8k_5


Models skipped so far: 7



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 47%|████▋     | 28/60 [1:11:49<1:22:07, 153.98s/it]


OK open-llm-leaderboard/details_LinkSoul__Chinese-Llama-2-7b harness_gsm8k_5


Models skipped so far: 7



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


SKIP open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


SKIP open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 48%|████▊     | 29/60 [1:14:16<1:18:28, 151.88s/it]


SKIP open-llm-leaderboard/details_xzuyn__LLaMa-2-PeanutButter_v4-7B harness_gsm8k_5


Models skipped so far: 8



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 50%|█████     | 30/60 [1:16:53<1:16:38, 153.28s/it]


OK open-llm-leaderboard/details_Aspik101__trurl-2-7b-pl-instruct_unload harness_gsm8k_5


Models skipped so far: 8



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 52%|█████▏    | 31/60 [1:19:31<1:14:44, 154.65s/it]


OK open-llm-leaderboard/details_Aspik101__Llama-2-7b-hf-instruct-pl-lora_unload harness_gsm8k_5


Models skipped so far: 8



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


SKIP open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 53%|█████▎    | 32/60 [1:22:08<1:12:34, 155.53s/it]


SKIP open-llm-leaderboard/details_TinyPixel__llama2-7b-instruct harness_gsm8k_5


Models skipped so far: 9



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 55%|█████▌    | 33/60 [1:24:44<1:10:02, 155.65s/it]


OK open-llm-leaderboard/details_elyza__ELYZA-japanese-Llama-2-7b-fast-instruct harness_gsm8k_5


Models skipped so far: 9



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 57%|█████▋    | 34/60 [1:27:38<1:09:52, 161.25s/it]


OK open-llm-leaderboard/details_jondurbin__airoboros-7b-gpt4-1.4 harness_gsm8k_5


Models skipped so far: 9



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 58%|█████▊    | 35/60 [1:30:37<1:09:19, 166.36s/it]


OK open-llm-leaderboard/details_mosaicml__mpt-7b-8k-chat harness_gsm8k_5


Models skipped so far: 9



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 60%|██████    | 36/60 [1:34:20<1:13:24, 183.54s/it]


OK open-llm-leaderboard/details_openlm-research__open_llama_13b harness_gsm8k_5


Models skipped so far: 9


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_college_computer_science_5


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_hendrycksTest_college_mathematics_5


SKIP open-llm-leaderb

 62%|██████▏   | 37/60 [1:34:33<50:39, 132.16s/it]  


SKIP open-llm-leaderboard/details_LMFlow__Robin-7b-v2 harness_gsm8k_5


Models skipped so far: 10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


SKIP open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 63%|██████▎   | 38/60 [1:37:09<51:10, 139.57s/it]


SKIP open-llm-leaderboard/details_AGI-inc__lora_moe_7b harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 65%|██████▌   | 39/60 [1:39:44<50:24, 144.00s/it]


OK open-llm-leaderboard/details_TheBloke__koala-7B-HF harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 67%|██████▋   | 40/60 [1:42:25<49:44, 149.24s/it]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-7B-Instruct harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 68%|██████▊   | 41/60 [1:45:07<48:25, 152.92s/it]


OK open-llm-leaderboard/details_klosax__open_llama_13b_600bt_preview harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 70%|███████   | 42/60 [1:47:42<46:03, 153.50s/it]


OK open-llm-leaderboard/details_togethercomputer__RedPajama-INCITE-Base-7B-v0.1 harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 72%|███████▏  | 43/60 [1:50:17<43:39, 154.06s/it]


OK open-llm-leaderboard/details_digitous__Javalion-R harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 73%|███████▎  | 44/60 [1:52:54<41:19, 154.95s/it]


OK open-llm-leaderboard/details_digitous__Skegma-GPTJ harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 75%|███████▌  | 45/60 [1:55:33<39:04, 156.28s/it]


OK open-llm-leaderboard/details_TehVenom__ChanMalion harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 77%|███████▋  | 46/60 [1:58:11<36:34, 156.75s/it]


OK open-llm-leaderboard/details_KoboldAI__OPT-13B-Nerybus-Mix harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 78%|███████▊  | 47/60 [2:00:53<34:16, 158.17s/it]


OK open-llm-leaderboard/details_KoboldAI__OPT-6.7B-Erebus harness_gsm8k_5


Models skipped so far: 11



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


SKIP open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 80%|████████  | 48/60 [2:03:52<32:53, 164.47s/it]


SKIP open-llm-leaderboard/details_conceptofmind__Open-LLongMA-3b harness_gsm8k_5


Models skipped so far: 12



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 82%|████████▏ | 49/60 [2:06:32<29:54, 163.15s/it]


OK open-llm-leaderboard/details_Writer__camel-5b-hf harness_gsm8k_5


Models skipped so far: 12



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 83%|████████▎ | 50/60 [2:09:11<26:57, 161.79s/it]


OK open-llm-leaderboard/details_pythainlp__wangchanglm-7.5B-sft-en-sharded harness_gsm8k_5


Models skipped so far: 12



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 85%|████████▌ | 51/60 [2:11:38<23:37, 157.53s/it]


OK open-llm-leaderboard/details_facebook__opt-iml-max-1.3b harness_gsm8k_5


Models skipped so far: 12



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 87%|████████▋ | 52/60 [2:14:16<21:02, 157.75s/it]


OK open-llm-leaderboard/details_RWKV__rwkv-raven-1b5 harness_gsm8k_5


Models skipped so far: 12


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_hendrycksTest_college_computer_science_5


SKIP open-llm-leaderboard/details_rinna__bilingual-gpt-neox

 88%|████████▊ | 53/60 [2:16:35<17:43, 151.90s/it]


OK open-llm-leaderboard/details_rinna__bilingual-gpt-neox-4b harness_gsm8k_5


Models skipped so far: 13


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_hendrycksTest_college_computer_science_5


SKIP open-llm-leaderboard/details_Y

 90%|█████████ | 54/60 [2:18:50<14:41, 146.94s/it]


OK open-llm-leaderboard/details_YeungNLP__firefly-bloom-2b6-v2 harness_gsm8k_5


Models skipped so far: 14



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 92%|█████████▏| 55/60 [2:21:56<13:13, 158.63s/it]


OK open-llm-leaderboard/details_yhyhy3__med-orca-instruct-33b harness_gsm8k_5


Models skipped so far: 14



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 93%|█████████▎| 56/60 [2:24:42<10:43, 160.84s/it]


OK open-llm-leaderboard/details_PygmalionAI__pygmalion-350m harness_gsm8k_5


Models skipped so far: 14



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 95%|█████████▌| 57/60 [2:27:28<08:07, 162.49s/it]


OK open-llm-leaderboard/details_vicgalle__alpaca-7b harness_gsm8k_5


Models skipped so far: 14


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_abstract_algebra_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_anatomy_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_astronomy_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_business_ethics_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_clinical_knowledge_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_college_biology_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_college_chemistry_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_hendrycksTest_college_computer_science_5


SKIP open-llm-leaderboard/details_microsoft__CodeGPT-small-py harnes

 97%|█████████▋| 58/60 [2:29:50<05:12, 156.37s/it]


OK open-llm-leaderboard/details_microsoft__CodeGPT-small-py harness_gsm8k_5


Models skipped so far: 15



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

 98%|█████████▊| 59/60 [2:32:30<02:37, 157.20s/it]


OK open-llm-leaderboard/details_EleutherAI__gpt-neo-125m harness_gsm8k_5


Models skipped so far: 15



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_abstract_algebra_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_anatomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_astronomy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_business_ethics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_clinical_knowledge_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_college_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_college_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_college_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_college_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_college_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_college_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_computer_security_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_conceptual_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_econometrics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_electrical_engineering_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_elementary_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_formal_logic_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_global_facts_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_biology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_chemistry_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_computer_science_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_european_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_geography_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_government_and_politics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_macroeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_mathematics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_microeconomics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_physics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_statistics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_us_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_high_school_world_history_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_human_aging_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_human_sexuality_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_international_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_jurisprudence_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_logical_fallacies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_machine_learning_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_management_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_marketing_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_medical_genetics_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_miscellaneous_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_moral_disputes_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_moral_scenarios_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_nutrition_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_philosophy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_prehistory_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_professional_accounting_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_professional_law_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_professional_medicine_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_professional_psychology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_public_relations_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_security_studies_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_sociology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_us_foreign_policy_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_virology_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_world_religions_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_arc_challenge_25



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hellaswag_10



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_hendrycksTest_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_truthfulqa_mc_0



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_winogrande_5



Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

100%|██████████| 60/60 [2:35:13<00:00, 155.22s/it]


OK open-llm-leaderboard/details_Locutusque__gpt2-conversational-or-qa harness_gsm8k_5


Models skipped so far: 15






In [9]:
print(s,metric)

harness_gsm8k_5 acc


In [10]:
load_dataset(model, s, split='latest')

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/57 [00:00<?, ?it/s]

Dataset({
    features: ['acc', 'answer', 'cont_tokens', 'example', 'full_prompt', 'hashes', 'input_tokens', 'instruction', 'num_asked_few_shots', 'num_effective_few_shots', 'padded', 'predictions', 'question', 'truncated'],
    num_rows: 1319
})