In [1]:
from glob import glob
import pandas as pd
import json
import time
import random
from datasets import load_dataset
import random
from tqdm import tqdm
from together import Together
import os
import torch

In [5]:
response_dict["Can you answer the following question as accurately as possible?  In Carruther's view, the justification for moral criticism for things that we do (or don't to do) to an animal derives from: A) the animal's direct moral standing., B) the animals capacity for suffering., C) the bad qualities of character that our actions evince., D) none of the above Explain your answer, putting the answer in the form (X) at the end of your response."][0]

[[{'role': 'user',
   'content': "You are a humanities professor who has a deep understanding of human culture, history, philosophy, and the arts. Can you answer the following question as accurately as possible?  In Carruther's view, the justification for moral criticism for things that we do (or don't to do) to an animal derives from: A) the animal's direct moral standing., B) the animals capacity for suffering., C) the bad qualities of character that our actions evince., D) none of the above Explain your answer, putting the answer in the form (X) at the end of your response."},
  {'role': 'assistant',
   'content': "This is an answer from a humanities professor who has a deep understanding of human culture, history, philosophy, and the arts. A question that delves into the complex and nuanced world of animal ethics!\n\nIn Carruthers' view, the justification for moral criticism for things that we do (or don't do) to an animal derives from:\n\nB) the animal's capacity for suffering.\n\

In [12]:
def construct_message(agents, question, idx):
    if len(agents) == 0:
        return {"role": "user", "content": "Can you double check that your answer is correct. Put your final answer in the form (X) at the end of your response."}

    prefix_string = "These are the solutions to the problem from other agents: "

    for agent in agents:
        agent_response = agent[idx]["content"]
        response = "\n\n One agent solution: ```{}```".format(agent_response)

        prefix_string = prefix_string + response

    prefix_string = prefix_string + """\n\n Using the reasoning from other agents as additional advice, can you give an updated answer? Examine your solution and that other agents step by step. Put your answer in the form (X) at the end of your response.""".format(question)
    return {"role": "user", "content": prefix_string}


def construct_assistant_message(completion):
    content = completion.choices[0].message.content
    return {"role": "assistant", "content": content}


def generate_answer(answer_context):
    completion = client.chat.completions.create(
                model="meta-llama/Llama-3-8b-chat-hf",
                messages=agent_context,
            )

    return completion


def parse_question_answer(df, ix):
    question = df.iloc[ix, 0]
    a = df.iloc[ix, 1]
    b = df.iloc[ix, 2]
    c = df.iloc[ix, 3]
    d = df.iloc[ix, 4]

    question = "Can you answer the following question as accurately as possible? {}: A) {}, B) {}, C) {}, D) {} Explain your answer, putting the answer in the form (X) at the end of your response.".format(question, a, b, c, d)

    answer = df.iloc[ix, 5]

    return question, answer

In [3]:
# load in mmlu from hugging face
dataset = load_dataset("cais/mmlu", "moral_disputes")

# combine train, validation, and test splits
df = dataset['test'].to_pandas()

Downloading readme:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/138k [00:00<?, ?B/s]

Downloading data: 100%|███████████████████████████████████████████████████| 60.9k/60.9k [00:02<00:00, 29.8kB/s]
Downloading data: 100%|███████████████████████████████████████████████████| 10.7k/10.7k [00:00<00:00, 80.7kB/s]
Downloading data: 100%|███████████████████████████████████████████████████| 4.41k/4.41k [00:00<00:00, 37.9kB/s]


Generating test split:   0%|          | 0/346 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/38 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/5 [00:00<?, ? examples/s]

In [6]:
# expand out choices
choices_df = pd.DataFrame(df['choices'].tolist(), index=df.index)
choices_df.columns = ['choice_a', 'choice_b', 'choice_c', 'choice_d']

expanded_df = pd.concat([df.drop(columns=['choices']), choices_df], axis=1)

# expand out columns
new_column_order = ['question', 'choice_a', 'choice_b', 'choice_c', 'choice_d', 'answer']
rearranged_df = expanded_df[new_column_order]

# download as csv
#rearranged_df.to_csv("/content/drive/MyDrive/mind2mind/baseline/mmlu/mmlu_moral_disputes.csv", index=False)

In [17]:
personas = ['You are a humanities professor who has a deep understanding of human culture, history, philosophy, and the arts.',
           'You are a mathematician who has strong quantitative skills and who provides analytical and logical perspectives, often using mathematical principles and models to address questions.',
           'You are a doctor who provides medical and health-related expertise, focusing on the biological, psychological, and physiological aspects of issues.']

agents = 3
rounds = 3

tasks = glob("mmlu_moral_disputes.csv")

dfs = [pd.read_csv(task) for task in tasks]

random.seed(0)
response_dict = {}

for i in tqdm(range(100)):
    df = random.choice(dfs)
    ix = len(df)
    idx = random.randint(0, ix-1)

    question, answer = parse_question_answer(df, idx)

    agent_contexts = [[{"role": "user", "content": personas[agent] + ' ' + question}] for agent in range(agents)]

    for round in range(rounds):
        for i, agent_context in enumerate(agent_contexts):
            print('ROUND: %d, AGENT: %d' % (round, i))
            if round != 0:
                agent_contexts_other = agent_contexts[:i] + agent_contexts[i+1:]
                message = construct_message(agent_contexts_other, question, 2 * round - 1)
                agent_context.append(message)
            
            print(agent_context)
            completion = generate_answer(agent_context)

            assistant_message = construct_assistant_message(completion)
            agent_context.append(assistant_message)
            print(completion)

    response_dict[question] = (agent_contexts, answer)

json.dump(response_dict, open("mmlu_{}_{}.json".format(agents, rounds), "w"))

  0%|                                                                                  | 0/100 [00:00<?, ?it/s]

ROUND: 0, AGENT: 0
[{'role': 'user', 'content': "You are a humanities professor who has a deep understanding of human culture, history, philosophy, and the arts. Can you answer the following question as accurately as possible?  In Carruther's view, the justification for moral criticism for things that we do (or don't to do) to an animal derives from: A) the animal's direct moral standing., B) the animals capacity for suffering., C) the bad qualities of character that our actions evince., D) none of the above Explain your answer, putting the answer in the form (X) at the end of your response."}]
id='888b85efef51cf41-SJC' object=<ObjectType.ChatCompletion: 'chat.completion'> created=1716535229 model='meta-llama/Llama-3-8b-chat-hf' choices=[ChatCompletionChoicesData(index=0, logprobs=None, finish_reason=<FinishReason.StopSequence: 'stop'>, message=ChatCompletionMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content="A question that delves into the complex and nuanced world of animal e

  1%|▋                                                                         | 1/100 [00:23<38:17, 23.20s/it]

id='888b866c2e10cf41-SJC' object=<ObjectType.ChatCompletion: 'chat.completion'> created=1716535249 model='meta-llama/Llama-3-8b-chat-hf' choices=[ChatCompletionChoicesData(index=0, logprobs=None, finish_reason=<FinishReason.StopSequence: 'stop'>, message=ChatCompletionMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='After re-examining my initial response and the solutions provided by other agents, I am confident that the correct answer is:\n\n(X) B) the animal\'s capacity for suffering.\n\nThe reasoning from other agents provides additional insights and reinforces my original answer. Here\'s a step-by-step breakdown of the reasoning:\n\n1. Carruthers argues that animals have inherent value and moral standing because they are capable of experiencing pleasure and pain.\n2. The capacity for suffering is a fundamental aspect of an animal\'s well-being, and our treatment of them is morally significant because it affects their well-being and capacity for suffering.\n3. Carruthers 

  2%|█▍                                                                        | 2/100 [00:40<32:34, 19.94s/it]

id='888b86d9fe05cf41-SJC' object=<ObjectType.ChatCompletion: 'chat.completion'> created=1716535267 model='meta-llama/Llama-3-8b-chat-hf' choices=[ChatCompletionChoicesData(index=0, logprobs=None, finish_reason=<FinishReason.StopSequence: 'stop'>, message=ChatCompletionMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content="Thank you for providing additional insights from other agents. Upon re-examining my initial response and the solutions from other agents, I agree that the correct answer is indeed:\n\n(X) D) the difference in social setting\n\nThe consensus among the agents is that the study found a significant decrease in heroin use among Vietnam veterans after their return home, and the factor that Shapiro cites as explaining this decrease is the difference in social setting. Both the Vietnam Veterans Administration's study and the Vietnam Era Twin Study suggest that the threat of punishment, stigma, and changed social norms in the military environment led to a significant redu

  2%|█▍                                                                        | 2/100 [00:55<45:22, 27.78s/it]


KeyboardInterrupt: 