### Process MMLU

In [1]:
from datasets import load_dataset
import os
import pandas as pd
from utils import save_to
name = "lighteval/mmlu"
configs = ['abstract_algebra', 'anatomy', 'astronomy', 'business_ethics', 'clinical_knowledge', 'college_biology', 'college_chemistry', 'college_computer_science', 'college_mathematics', 'college_medicine', 'college_physics', 'computer_security', 'conceptual_physics', 'econometrics', 'electrical_engineering', 'elementary_mathematics', 'formal_logic', 'global_facts', 'high_school_biology', 'high_school_chemistry', 'high_school_computer_science', 'high_school_european_history', 'high_school_geography', 'high_school_government_and_politics', 'high_school_macroeconomics', 'high_school_mathematics', 'high_school_microeconomics', 'high_school_physics', 'high_school_psychology', 'high_school_statistics', 'high_school_us_history', 'high_school_world_history', 'human_aging', 'human_sexuality', 'international_law', 'jurisprudence', 'logical_fallacies', 'machine_learning', 'management', 'marketing', 'medical_genetics', 'miscellaneous', 'moral_disputes', 'moral_scenarios', 'nutrition', 'philosophy', 'prehistory', 'professional_accounting', 'professional_law', 'professional_medicine', 'professional_psychology', 'public_relations', 'security_studies', 'sociology', 'us_foreign_policy', 'virology', 'world_religions']

df = pd.DataFrame()
for config in configs:
    dataset = load_dataset(name, config, token=os.environ['HF_AUTH_TOKEN'])
    df = pd.concat([df, pd.DataFrame(dataset['dev'])])
    
df.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,question,subject,choices,answer
0,Find all c in Z_3 such that Z_3[x]/(x^2 + c) i...,abstract_algebra,"[0, 1, 2, 3]",1
1,Statement 1 | If aH is an element of a factor ...,abstract_algebra,"[True, True, False, False, True, False, False,...",1
2,Statement 1 | Every element of a group generat...,abstract_algebra,"[True, True, False, False, True, False, False,...",2
3,Statement 1| Every function from a finite set ...,abstract_algebra,"[True, True, False, False, True, False, False,...",0
4,Find the characteristic of the ring 2Z.,abstract_algebra,"[0, 3, 12, 30]",0


In [2]:
# rename question to user
pd.set_option('display.max_colwidth', None)
df.rename(columns={"question": "user", "subject": "dataset"}, inplace=True)
df['dataset'] = df['dataset'].apply(lambda x: "mmlu_" + x)
def fix_question(row):
    letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
    options = "\n".join([f"({letters[i]}) {row['choices'][i]}" for i in range(len(row['choices']))])
    question = row['user'] + "\n" + options
    return question
df['user'] = df.apply(fix_question, axis=1)
def fix_answer(row):
    letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
    answer = f"({letters[row['answer']]}) {row['choices'][row['answer']]}"
    return answer
df['answer'] = df.apply(fix_answer, axis=1)
df['thought'] = ""
df = df[['dataset', 'user', 'thought', 'answer']]
df.head()

Unnamed: 0,dataset,user,thought,answer
0,mmlu_abstract_algebra,Find all c in Z_3 such that Z_3[x]/(x^2 + c) is a field.\n(A) 0\n(B) 1\n(C) 2\n(D) 3,,(B) 1
1,mmlu_abstract_algebra,"Statement 1 | If aH is an element of a factor group, then |aH| divides |a|. Statement 2 | If H and K are subgroups of G then HK is a subgroup of G.\n(A) True, True\n(B) False, False\n(C) True, False\n(D) False, True",,"(B) False, False"
2,mmlu_abstract_algebra,"Statement 1 | Every element of a group generates a cyclic subgroup of the group. Statement 2 | The symmetric group S_10 has 10 elements.\n(A) True, True\n(B) False, False\n(C) True, False\n(D) False, True",,"(C) True, False"
3,mmlu_abstract_algebra,"Statement 1| Every function from a finite set onto itself must be one to one. Statement 2 | Every subgroup of an abelian group is abelian.\n(A) True, True\n(B) False, False\n(C) True, False\n(D) False, True",,"(A) True, True"
4,mmlu_abstract_algebra,Find the characteristic of the ring 2Z.\n(A) 0\n(B) 3\n(C) 12\n(D) 30,,(A) 0


In [3]:
few_shot_prompts = pd.read_json("/home/cd2853/rational_metareasoning/data/few_shot_prompts.json")
few_shot_prompts_mmlu = few_shot_prompts[few_shot_prompts['dataset'].str.contains("mmlu")]
few_shot_prompts_non_mmlu = few_shot_prompts[~few_shot_prompts['dataset'].str.contains("mmlu")]
df = pd.concat([few_shot_prompts_mmlu, df])
# if duplicate user, keep the one with the longer thought
df = df.sort_values('thought', ascending=False).drop_duplicates('user').sort_index()
df = pd.concat([few_shot_prompts_non_mmlu, df])
df.head()

Unnamed: 0,dataset,user,thought,answer
0,folio,"Premises:\n- Phuoc Binh national park is a national park in Vietnam. \n- Any national park in Vietnam is classified as a nature reserve. \n- There is a national park in Vietnam classified as a UNESCO World Heritage Site.\n- All national parks in Vietnam are either managed by the Ministry of Agriculture or managed by the People's Committee. \n- Phuoc Binh is not managed by the Ministry of Agriculture.\nConclusion:\nThere is a nature reserve in Vietnam.\nQuestion: is the previous conclusion True, False or Unknown given the premises?","To determine if the conclusion ""There is a nature reserve in Vietnam"" is true, we need to verify if any of the premises imply the existence of a nature reserve in Vietnam.\n\n1. The first premise states that Phuoc Binh national park is a national park in Vietnam.\n2. The second premise states that any national park in Vietnam is classified as a nature reserve.\n\nSince Phuoc Binh national park is a national park in Vietnam (premise 1), and any national park in Vietnam is classified as a nature reserve (premise 2), it follows that Phuoc Binh national park is a nature reserve.\n\nTherefore, there is at least one nature reserve in Vietnam.\n",True
1,folio,"Premises:\n- All products designed by Apple are sold at Apple Stores.\n- All products with Apple logos are designed by Apple.\n- All Macbooks have Apple logos.\n- All products with Apple M2 chips are Mackbooks.\n- A Thinkpad X1 is not both sold in Apple Stores and is a Macbook.\nConclusion:\nThe Thinkpad X1 either has an Apple M2 chip or is a Macbook.\nQuestion: is the previous conclusion True, False or Unknown given the premises?","I need to analyze the premises and see if they logically lead to the conclusion. Let's break down the premises and see if the conclusion follows.\n\n1. All products designed by Apple are sold at Apple Stores.\n2. All products with Apple logos are designed by Apple.\n3. All Macbooks have Apple logos.\n4. All products with Apple M2 chips are Macbooks.\n5. A Thinkpad X1 is not both sold in Apple Stores and is a Macbook.\n\nThe conclusion is: The Thinkpad X1 either has an Apple M2 chip or is a Macbook.\n\nLet's analyze the premises step by step:\n\n- From premise 4, if a product has an Apple M2 chip, it must be a Macbook.\n- From premise 3, if something is a Macbook, it has an Apple logo.\n- From premise 2, if something has an Apple logo, it is designed by Apple.\n- From premise 1, if something is designed by Apple, it is sold at Apple Stores.\n- Premise 5 states that a Thinkpad X1 is not both sold in Apple Stores and is a Macbook.\n\nNow, let's see if the Thinkpad X1 can have an Apple M2 chip or be a Macbook:\n- If the Thinkpad X1 had an Apple M2 chip, it would be a Macbook (by premise 4).\n- If the Thinkpad X1 were a Macbook, it would have an Apple logo (by premise 3), be designed by Apple (by premise 2), and be sold at Apple Stores (by premise 1).\n\nPremise 5 contradicts the possibility of the Thinkpad X1 being a Macbook or having an Apple M2 chip because it would then be sold at Apple Stores, which it is not.\n\nTherefore, the conclusion that the Thinkpad X1 either has an Apple M2 chip or is a Macbook is False.\n",False
2,folio,"Premises:\n- Ernest Pohl was a Polish football player. \n- A football player in the Polish First Division has scored over 180 goals. \n- Ernest Pohl scored more than 180 goals in the Polish First Division. \n- Górnik Zabrze's stadium was named after a soccer player from Ruda Śląska. \n- Ernest Pohl is from Ruda Śląska. \nConclusion:\nGórnik Zabrze's stadium was named after Ernest Pohl.\nQuestion: is the previous conclusion True, False or Unknown given the premises?","I need to determine whether the conclusion logically follows from the given premises. Here are the premises:\n\n1. Ernest Pohl was a Polish football player.\n2. A football player in the Polish First Division has scored over 180 goals.\n3. Ernest Pohl scored more than 180 goals in the Polish First Division.\n4. Górnik Zabrze's stadium was named after a soccer player from Ruda Śląska.\n5. Ernest Pohl is from Ruda Śląska.\n\nThe conclusion is that Górnik Zabrze's stadium was named after Ernest Pohl. \n\nWhile the premises provide information that Ernest Pohl is a notable football player from Ruda Śląska and that Górnik Zabrze's stadium was named after a soccer player from Ruda Śląska, they do not explicitly state that the stadium was named after Ernest Pohl. There could be other soccer players from Ruda Śląska.\n",Unknown
3,folio,"Premises:\n- All brain study designs are either block designs or event-related designs. \n- All event-related brain study designs are brain image acquisition.\n- All brain image acquisition in brain study designs is preceded by data processing.\n- Nothing in brain study designs preceded by data processing analyzes data.\n- Picture memory is a type of brain study design that is not either event-related or analyzing data.\nConclusion:\nIf picture memory is not analyzing data, then picture memory is a block design and analyzing data.\nQuestion: is the previous conclusion True, False or Unknown given the premises?","Let's analyze the premises and the conclusion step by step.\n\n1. All brain study designs are either block designs or event-related designs.\n2. All event-related brain study designs are brain image acquisition.\n3. All brain image acquisition in brain study designs is preceded by data processing.\n4. Nothing in brain study designs preceded by data processing analyzes data.\n5. Picture memory is a type of brain study design that is not either event-related or analyzing data.\n\nGiven these premises, the conclusion states:\n""If picture memory is not analyzing data, then picture memory is a block design and analyzing data.""\n\nLet's break down the conclusion:\n- From premise 5, we know that picture memory is not analyzing data.\n- Picture memory is a type of brain study design, but it is not event-related (premise 5).\n- Therefore, by premise 1, picture memory must be a block design.\n\nNow, the conclusion states that picture memory is both a block design and analyzing data. But from premise 5, we know that picture memory is not analyzing data. This directly contradicts the conclusion.\n",False
4,folio,"Premises:\n- New York City is Located in the United States of America.\n- The United States of America is part of North America.\n- North America is in the western hemisphere of the earth.\n- New York City is a highly developed city.\n- If place A is located in place B and place B is located in place C, then place A is located in place C.\nConclusion:\nNew York City is located in New York State.\nQuestion: is the previous conclusion True, False or Unknown given the premises?","The premises given do not provide any information about the specific location of New York City within New York State. They only describe the broader geographical context and the development status of New York City. The logical chain provided relates to New York City being part of the United States, North America, and the western hemisphere but does not mention New York State.\n",Unknown


In [4]:
save_to(df, "/home/cd2853/rational_metareasoning/data/few_shot_prompts.json")