In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Interact with Llama model
def generate_completion(model, tokenizer, messages, max_length = 1000, temperature = 0.7):
    prompt = ""
    for message in messages:
        role = message["role"]
        content = message["content"]
        
        if role == "system":
            prompt += f"System: {content}\n"
        elif role == "user":
            prompt += f"User: {content}\n"
        elif role == "assistant":
            prompt += f"Assistant: {content}\n"
    
    prompt += "Assistant: "
    
    inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
    input_ids = inputs.input_ids
        
    outputs = model.generate(input_ids, max_length=max_length, temperature=temperature)
            
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text[len(prompt):]
    
    completion = {
        "choices": [{
            "message": {
                "role": "assistant",
                "content": response.strip()
            },
            "finish_reason": "stop"
        }],
        "usage": {
            "prompt_tokens": len(inputs["input_ids"][0]),
            "completion_tokens": len(outputs[0]) - len(inputs["input_ids"][0]),
            "total_tokens": len(outputs[0])
        }
    }
    
    return completion

In [10]:
import openai
import json
import numpy as np
import random

def construct_message(agents, question, idx):
    if len(agents) == 0:
        return {"role": "user", "content": "Can you double check that your answer is correct. Please reiterate your answer, with your final answer a single numerical number, in the form \\boxed{{answer}}."}

    prefix_string = "These are the solutions to the problem from other agents: "

    for agent in agents:
        agent_response = agent[idx]["content"]
        response = "\n\n One agent solution: ```{}```".format(agent_response)

        prefix_string = prefix_string + response

    prefix_string = prefix_string + """\n\n Using the solutions from other agents as additional information, can you provide your answer to the math problem? \n The original math problem is {}. Your final answer should be a single numerical number, in the form \\boxed{{answer}}, at the end of your response.""".format(question)
    return {"role": "user", "content": prefix_string}


def construct_assistant_message(completion):
    content = completion["choices"][0]["message"]["content"]
    return {"role": "assistant", "content": content}


def read_jsonl(path: str):
    with open(path) as fh:
        return [json.loads(line) for line in fh.readlines() if line]

if __name__ == "__main__":
    agents = 3
    rounds = 2
    random.seed(0)

    generated_description = {}

    questions = read_jsonl("/Users/akommula/code/research/multiagent_debate/grade-school-math/grade_school_math/data/test.jsonl")
    random.shuffle(questions)

    for data in questions[:100]:
        question = data['question']
        answer = data['answer']

        agent_contexts = [[{"role": "user", "content": """Can you solve the following math problem? {} Explain your reasoning. Your final answer should be a single numerical number, in the form \\boxed{{answer}}, at the end of your response. """.format(question)}] for agent in range(agents)]

        for round in range(rounds):        
            for i, agent_context in enumerate(agent_contexts):
                if round != 0:
                    agent_contexts_other = agent_contexts[:i] + agent_contexts[i+1:]
                    message = construct_message(agent_contexts_other, question, 2*round - 1)
                    agent_context.append(message)

                print(f"Round: {round}, Agent: {i}, Other Context: {agent_contexts_other}; Agent Context: {agent_context}.\n\n\n")

                completion = generate_completion(model, tokenizer, agent_context)

                print(f"Completion: {completion}\n\n\n")

                assistant_message = construct_assistant_message(completion)
                agent_context.append(assistant_message)

        generated_description[question] = (agent_contexts, answer)
        break

    json.dump(generated_description, open("gsm_{}_{}.json".format(agents, rounds), "w"))
    
    import pdb
    pdb.set_trace()
    print(answer)
    print(agent_context)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Round: 0
Agent Context:  [{'role': 'user', 'content': 'Can you solve the following math problem? Amber, Micah, and Ahito ran 52 miles in total. Amber ran 8 miles. Micah ran 3.5 times what Amber ran. How many miles did Ahito run? Explain your reasoning. Your final answer should be a single numerical number, in the form \\boxed{answer}, at the end of your response. '}]
Original Context: [{'role': 'user', 'content': 'Can you solve the following math problem? Amber, Micah, and Ahito ran 52 miles in total. Amber ran 8 miles. Micah ran 3.5 times what Amber ran. How many miles did Ahito run? Explain your reasoning. Your final answer should be a single numerical number, in the form \\boxed{answer}, at the end of your response. '}]
Modified Prompt: User: Can you solve the following math problem? Amber, Micah, and Ahito ran 52 miles in total. Amber ran 8 miles. Micah ran 3.5 times what Amber ran. How many miles did Ahito run? Explain your reasoning. Your final answer should be a single numerical

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


tensor([128000,   1502,     25,   3053,    499,  11886,    279,   2768,   7033,
          3575,     30,  47764,     11,  28095,   1494,     11,    323,  16770,
          6491,  10837,    220,   4103,   8931,    304,   2860,     13,  47764,
         10837,    220,     23,   8931,     13,  28095,   1494,  10837,    220,
            18,     13,     20,   3115,   1148,  47764,  10837,     13,   2650,
          1690,   8931,   1550,  16770,   6491,   1629,     30,  83017,    701,
         33811,     13,   4718,   1620,   4320,   1288,    387,    264,   3254,
         35876,   1396,     11,    304,    279,   1376,   1144,  80175,     90,
          9399,   2186,    520,    279,    842,    315,    701,   2077,     13,
           720,  72803,     25,    220,     16,      8,    220,   4103,    489,
           220,     23,    489,    220,     18,     13,     20,  25800,    220,
            23,    284,    220,   4103,    489,    220,     23,    489,    220,
          1544,    284,    220,   4044, 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


tensor([128000,   1502,     25,   3053,    499,  11886,    279,   2768,   7033,
          3575,     30,  47764,     11,  28095,   1494,     11,    323,  16770,
          6491,  10837,    220,   4103,   8931,    304,   2860,     13,  47764,
         10837,    220,     23,   8931,     13,  28095,   1494,  10837,    220,
            18,     13,     20,   3115,   1148,  47764,  10837,     13,   2650,
          1690,   8931,   1550,  16770,   6491,   1629,     30,  83017,    701,
         33811,     13,   4718,   1620,   4320,   1288,    387,    264,   3254,
         35876,   1396,     11,    304,    279,   1376,   1144,  80175,     90,
          9399,   2186,    520,    279,    842,    315,    701,   2077,     13,
           720,  72803,     25,    220,   4194,   2028,    374,    459,   1825,
          3575,     13, 128001]) <class 'torch.Tensor'>
Outputs: tensor([[128000,   1502,     25,   3053,    499,  11886,    279,   2768,   7033,
           3575,     30,  47764,     11,  28095,   149

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


tensor([128000,   1502,     25,   3053,    499,  11886,    279,   2768,   7033,
          3575,     30,  47764,     11,  28095,   1494,     11,    323,  16770,
          6491,  10837,    220,   4103,   8931,    304,   2860,     13,  47764,
         10837,    220,     23,   8931,     13,  28095,   1494,  10837,    220,
            18,     13,     20,   3115,   1148,  47764,  10837,     13,   2650,
          1690,   8931,   1550,  16770,   6491,   1629,     30,  83017,    701,
         33811,     13,   4718,   1620,   4320,   1288,    387,    264,   3254,
         35876,   1396,     11,    304,    279,   1376,   1144,  80175,     90,
          9399,   2186,    520,    279,    842,    315,    701,   2077,     13,
           720,  72803,     25,    220,     16,     13,   3053,    499,  11886,
           279,   2768,   7033,   3575,     30,  47764,     11,  28095,   1494,
            11,    323,  16770,   6491,  10837,    220,   4103,   8931,    304,
          2860,     13,  47764,  10837, 

ValueError: Input length of input_ids is 1937, but `max_length` is set to 1000. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.