In [16]:
# Imports

from dataclasses import dataclass
from typing import List

import random

import asyncio
import re
import math
import random
import numpy as np
from sympy import simplify

random.seed(0)

from async_engine.batched_api import BatchingAPI
from async_engine.api import API

from src.prompts.adapt import gameof24 as llama_prompts
from utils import parse_suggestions, create_box

In [17]:
# State class

@dataclass(frozen=True)
class GameOf24State:
    # game of 24 puzzle, for example 1 1 4 6
    puzzle: str

    # initialized to the same value as puzzle, but is updated as the game progresses
    current_state: str

    steps: List[str]

    #Randomness used for resampling (random seed)
    randomness: int

    def __hash__(self):
        return hash((self.puzzle, self.current_state, " -> ".join(self.steps)))
    
    def items(self):
        return self.puzzle, self.current_state, self.steps, self.randomness
    
    def duplicate(self, randomness=None):
        return GameOf24State(
            puzzle=self.puzzle,
            current_state=self.current_state,
            steps=self.steps,
            randomness=randomness if randomness is not None else self.randomness)

In [18]:
#Testing the game of 24

states = []
puzzle = "1 1 4 6"
example = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))

# for step in range(2):
#     print(f"Step {step} : Stepping")

print("Initial State")
print(example.items(), "\n")

print("Step: 0")
example = GameOf24State(puzzle="1 1 4 6", current_state="1 4 6", steps=example.steps + ["1 * 1 = 1"], randomness=random.randint(0, 1000))
print(example.items(), "\n")


print("Step: 1")
example = GameOf24State(puzzle="1 1 4 6", current_state="1 24", steps=example.steps + ["4 * 6 = 24"], randomness=random.randint(0, 1000))
print(example.items(), "\n")

print("Step: 2")
example = GameOf24State(puzzle="1 1 4 6", current_state="24", steps=example.steps + ["1 * 24 = 24"], randomness=random.randint(0, 1000))
print(example.items(), "\n")

Initial State
('1 1 4 6', '1 1 4 6', [], 864) 

Step: 0
('1 1 4 6', '1 4 6', ['1 * 1 = 1'], 394) 

Step: 1
('1 1 4 6', '1 24', ['1 * 1 = 1', '4 * 6 = 24'], 776) 

Step: 2
('1 1 4 6', '24', ['1 * 1 = 1', '4 * 6 = 24', '1 * 24 = 25'], 911) 



In [19]:
#Reflexion agent :O

class GameOf24Agent:

    @staticmethod
    async def step(state: GameOf24State, api, namespace, reflection: list)-> GameOf24State:
        """
        Given a state, returns the next state one.
        """

        # set up the prompt, based on the current state

        # ToT uses bfs_prompt to generate next steps but then uses
        # the cot_prompt to get the final expression. 
        # For example, input : 1 1 4 6
        # Step 0 : '1 - 1 = 0 (left: 0 4 6)'          BFS prompt
        # Step 1 : '0 + 4 = 4 (left: 4 6)'            BFS prompt
        # Step 2 : '4 * 6 = 24 (left: 24)'            BFS prompt
        # Step 3 : Answer : ((1 - 1) + 4) * 6 = 24    CoT prompt


        # set up the prompt, based on the current state

        current_state = state.current_state
        
        if current_state.strip() == "24":
            # CoT prompt
            steps = "\n".join(state.steps) + "\n"
            
            if len(reflection) == 0:
                prompt = llama_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps + "Answer: "
            else:
                prompt = llama_prompts.bfs_reflexion_prompt.format(input=current_state, puzzle = "1 1 4 6", reflection=reflection[0]) 
            

            # Set up CoT prompt
            # if any(author in api.model for author in ["meta", "google", "mistral", "gpt-4o"]):
            #     prompt = llama_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps + "Answer: "
            # else:
            #     prompt = totor_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps

            # Get the final expression
            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # State does not change, only the steps
            selected_suggestion = suggestions
            selected_state = state.current_state
            


        else:
            if len(reflection) == 0:
                prompt = llama_prompts.bfs_prompt.format(input=current_state) 
            else:
                prompt = llama_prompts.bfs_reflexion_prompt.format(input=current_state, puzzle = "1 1 4 6", reflection=reflection[0]) 
                
            # Set up BFS prompt
            # if any(author in api.model for author in ["meta", "google", "mistral", "gpt-4o"]):
            #     prompt = llama_prompts.bfs_prompt.format(input=current_state) + "Keep in mind the following critique from the last step: \n" + reflexion_suggestions
            # else:
            #     prompt = totor_prompts.bfs_prompt.format(input=current_state) + "Keep in mind the following critique from the last step: \n" + reflexion_suggestions

            # Get the next state
            # suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # parse suggestions, based on the current state
            parsed_suggestions = parse_suggestions(suggestions)
            if parsed_suggestions == []:
                print(f"No suggestions were parsed from state: {state}")
                print(f"\nPrompt: {prompt}\nSuggestions: {suggestions}\nParsed suggestions: {' | '.join(parsed_suggestions)}\n")
                assert False, "No suggestions found."
            
            suggestions = parsed_suggestions
            
            random.seed(state.randomness)
            selected_suggestion = random.choice(suggestions)
            selected_state = GameOf24Agent.parse_next_state(selected_suggestion)

        # set up new state object
        next_state = GameOf24State(
            puzzle=state.puzzle,
            current_state=selected_state,
            steps=state.steps + [selected_suggestion],
            randomness=random.randint(0, 1000)
        )
        return next_state
    
    @staticmethod
    def parse_next_state(suggestion: str) -> str:
        return suggestion.split('left: ')[-1].split(')')[0]
    
    @staticmethod
    def verify(state: GameOf24State)-> dict:
            """
            Verifies the output of a given task
                1. Checks if the numbers used are the same as the ones provided.
                2. Checks if the operations performed result to 24.

            States 
                {"r": 0} : Not finished.
                {"r": 1} : Finished and correct.
                {"r": -1} : Finished and incorrect.
            """
            current_states = state.current_state.split(" ")
            if len(current_states) !=1 or len(state.steps)<=3:
                # More than one number left
                return {'r':0}
            elif current_states[0] != "24":
                # One number left and it is not 24
                return {'r':-1}
            else:
                # One number left and it is 24
                expression = state.steps[-1].lower().replace('answer: ', '').split('=')[0]
                numbers = re.findall(r'\d+', expression)
                problem_numbers = re.findall(r'\d+', state.puzzle)
                if sorted(numbers) != sorted(problem_numbers):
                    # Numbers used are not the same as the ones provided
                    return {'r': -1}
                try:
                    if simplify(expression) == 24:
                        return {'r': 1}
                    else:
                        # Operations performed do not result to 24
                        return {'r': -1}
                except Exception as e:
                    print(e)
                    return {'r': -1}

    @staticmethod
    def generate_reflection(puzzle: str, steps, state: GameOf24State, api, namespace) -> str:
        prompt = llama_prompts.reflexion_prompt.format(puzzle=puzzle, steps=steps)
        reflection = api.buffered_request(prompt, key=hash(state), namespace=namespace)
        return reflection


# Solve 1 1 4 6 puzzle:

In [20]:
# Initialization

step_api_config = eval_api_config = {
    "max_tokens": 1000,
    "temperature": 0,
    "top_p": 1,
    "request_timeout": 120,
    "top_k": 50
}

# eligible providers ["TogehterAI", "OpenAI", "Groq"]
model = "llama-3.3-70b-versatile"
provider = "Groq"
models = {
    "step": {"model_name":model, "provider":provider},
    "eval": {"model_name":model, "provider":provider},
}

api = API(eval_api_config, models=models.values(), resources=2, verbose=False)

puzzle = "1 1 4 6"
num_steps = 4
num_agents = 2




In [21]:
for agent_id, state in enumerate(states):
    print(agent_id)
    print(state)

In [22]:
# Attempting to solve the puzzle (without reflexion)

#Create initial state/environment
states = []
for _ in range(num_agents):
    states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))
step_batcher = BatchingAPI(api, batch_size=1, timeout=2, model=models["step"]["model_name"], tab="step")

finished_states = []

#Stepping
for step in range(num_steps):
    
    print(f"Step {step} : Stepping")
    agent_tasks = [
        asyncio.create_task(
        GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflection="")
        )
        for agent_id, state in enumerate(states)
    ]
    states = await asyncio.gather(*agent_tasks)
    for agent_id, state in enumerate(states):
        print(f"Current step for agent {agent_id}: {state.steps[-1]} \n")

    # Evaluate whether a puzzle has been solved
    i = 0
    while i < len(states):
        print("i: ", i)
        if GameOf24Agent.verify(states[i]) == {"r": 1}:
            print(f"Puzzle finished: {states[i].puzzle}")
            finished_states.append(states.pop(i))
        else:
            i += 1

    # If all puzzles have been solved, break
    if len(states) == 0:
        break

Step 0 : Stepping
Current step for agent 0: 6 + 4 = 10 (left: 1 1 10) 

Current step for agent 1: 4 - 1 = 3 (left: 1 3 6) 

i:  0
i:  1
Step 1 : Stepping
Current step for agent 0: 1 + 10 = 11 (left: 1 11) 

Current step for agent 1: 1 * 6 = 6 (left: 3 6) 

i:  0
i:  1
Step 2 : Stepping
Current step for agent 0: 11 - 1 = 10 (left: 10) 

Current step for agent 1: 3 * 6 = 18 (left: 18) 

i:  0
i:  1
Step 3 : Stepping
Current step for agent 0: 10 - 10 = 0 (left: 0) 

Current step for agent 1: No possible next steps, only one number remains. 

i:  0
i:  1


In [23]:
print(states)

[GameOf24State(puzzle='1 1 4 6', current_state='0', steps=['6 + 4 = 10 (left: 1 1 10)', '1 + 10 = 11 (left: 1 11)', '11 - 1 = 10 (left: 10)', '10 - 10 = 0 (left: 0)'], randomness=983), GameOf24State(puzzle='1 1 4 6', current_state='No possible next steps, only one number remains.', steps=['4 - 1 = 3 (left: 1 3 6)', '1 * 6 = 6 (left: 3 6)', '3 * 6 = 18 (left: 18)', 'No possible next steps, only one number remains.'], randomness=486)]


In [24]:
# Generate reflexions if the puzzle is not solved

agent_reflections = [
    asyncio.create_task(
    GameOf24Agent.generate_reflection(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
    )
    for agent_id, state in enumerate(states)
]
reflection = await asyncio.gather(*agent_reflections)


for agent_id, reflection in enumerate(reflection):
    print(f"Reflection {agent_id}: {reflection}")

Reflection 0: The previous attempt contains a mistake. The error occurs when the operation results in a number that moves further away from the target of 24, specifically the step "11 - 1 = 10" which reduces the value instead of increasing it or maintaining a useful intermediate result. 

This mistake can be generalized as a failure to consider the overall goal and the most promising paths towards achieving it. To avoid similar mistakes, it's essential to evaluate each potential operation based on its likelihood of leading to the target result of 24, favoring operations that increase the value or create useful intermediate results.
Reflection 1: The mistake in the previous attempt is the sequential approach, which led to a dead end with only one number remaining. This mistake can be avoided by considering all possible combinations of operations and numbers from the start, rather than progressing step-by-step with a fixed sequence. A more effective approach would be to explore multiple 

In [25]:
print(len(reflection))

582


In [26]:
# Reattempting to solve the puzzle (with reflexion)

#Resetting
states = []
for _ in range(num_agents):
    states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))
finished_states = []



#Stepping
for step in range(num_steps):
    
    print(f"Step {step} : Stepping")
    agent_tasks = [
        asyncio.create_task(
        GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflection=reflection[agent_id])
        )
        for agent_id, state in enumerate(states)
    ]

    states = await asyncio.gather(*agent_tasks)

    for agent_id, state in enumerate(states):
        print(f"Current step for agent {agent_id}: {state.steps[-1]} \n")

    # Evaluate whether a puzzle has been solved
    i = 0
    while i < len(states):
        print("i: ", i)
        if GameOf24Agent.verify(states[i]) == {"r": 1}:
            print(f"Puzzle finished: {states[i].puzzle}")
            finished_states.append(states.pop(i))
        else:
            i += 1

    # If all puzzles have been solved, break
    if len(states) == 0:
        break

Step 0 : Stepping
Current step for agent 0: 4 * 6 = 24 (left: 1 24) 

Current step for agent 1: 1 + 6 = 7 (left: 1 4 7) 

i:  0
i:  1
Step 1 : Stepping
Current step for agent 0: 1 * 24 = 24 (left: 24) 

Current step for agent 1: 1 - 7 = -6 (left: -6 4) 

i:  0
i:  1
Step 2 : Stepping
Current step for agent 0: 24 / 2 = 12 (left: 12)
24 * 2 = 48 (not possible since 2 is not in the input)
24 + 2 = 26 (not possible since 2 is not in the input)
24 - 2 = 22 (not possible since 2 is not in the input)
Since there is only one number, the possible next steps are limited to operations with itself or the number 1 which is not in the input, however one possible operation is 
24 / 1 = 24 (left: 24)
24 * 1 = 24 (not possible since 1 is not in the input)
24 + 1 = 25 (not possible since 1 is not in the input)
24 - 1 = 23 (not possible since 1 is not in the input)
24 / 24 = 1 (left: 1)
24 * 24 = 576 (left: 576)
24 + 24 = 48 (left: 48)
24 - 24 = 0 (left: 0) 

Current step for agent 1: -6 + 4 = -2 (left: 

In [27]:
print(states)

[GameOf24State(puzzle='1 1 4 6', current_state='24', steps=['4 * 6 = 24 (left: 1 24)', '1 * 24 = 24 (left: 24)', '24 / 2 = 12 (left: 12)\n24 * 2 = 48 (not possible since 2 is not in the input)\n24 + 2 = 26 (not possible since 2 is not in the input)\n24 - 2 = 22 (not possible since 2 is not in the input)\nSince there is only one number, the possible next steps are limited to operations with itself or the number 1 which is not in the input, however one possible operation is \n24 / 1 = 24 (left: 24)\n24 * 1 = 24 (not possible since 1 is not in the input)\n24 + 1 = 25 (not possible since 1 is not in the input)\n24 - 1 = 23 (not possible since 1 is not in the input)\n24 / 24 = 1 (left: 1)\n24 * 24 = 576 (left: 576)\n24 + 24 = 48 (left: 48)\n24 - 24 = 0 (left: 0)', '24 / 2 = 12 (left: 12)\n24 * 2 = 48 (left: 48) \n24 + 2 = 26 (left: 26)\n24 - 2 = 22 (left: 22)\n2 + 24 = 26 (left: 26)\n2 * 24 = 48 (left: 48)\n2 - 24 = -22 (left: -22)\n2 / 24 = 1/12 (left: 1/12)'], randomness=793), GameOf24Sta

In [28]:
agent_reflections = [
    asyncio.create_task(
    GameOf24Agent.generate_reflection(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
    )
    for agent_id, state in enumerate(states)
]
reflection = await asyncio.gather(*agent_reflections)


for agent_id, reflection in enumerate(reflection):
    print(f"Reflection {agent_id}: {reflection}")

Reflection 0: The previous attempt at solving the puzzle contains a mistake in that it introduces numbers not present in the original input, such as 2, and explores operations with these non-input numbers. This mistake can be avoided by strictly adhering to the original input numbers (1, 1, 4, 6) and only using the basic arithmetic operations (+, -, *, /) with these numbers. A general reflection is that in math puzzles like the game of 24, it's crucial to stay within the given constraints and avoid introducing extraneous elements that are not part of the original problem statement.
Reflection 1: The previous attempt contains a mistake. The error occurs when the operation "-2 + -2 = -4" is performed, as there is only one "-2" left to operate on, and it's being added to itself, which is not a valid operation in this context.

This mistake can be avoided by ensuring that each operation involves two distinct numbers from the remaining pool. A general reflection is that players should caref

In [29]:
# Reattempting to solve for the second time the puzzle (with reflexion)

#Resetting
states = []
for _ in range(num_agents):
    states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))
finished_states = []



#Stepping
for step in range(num_steps):
    
    print(f"Step {step} : Stepping")
    agent_tasks = [
        asyncio.create_task(
        GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflection=reflection[agent_id])
        )
        for agent_id, state in enumerate(states)
    ]

    states = await asyncio.gather(*agent_tasks)

    for agent_id, state in enumerate(states):
        print(f"Current step for agent {agent_id}: {state.steps[-1]} \n")

    # Evaluate whether a puzzle has been solved
    i = 0
    while i < len(states):
        print("i: ", i)
        if GameOf24Agent.verify(states[i]) == {"r": 1}:
            print(f"Puzzle finished: {states[i].puzzle}")
            finished_states.append(states.pop(i))
        else:
            i += 1

    # If all puzzles have been solved, break
    if len(states) == 0:
        break

Step 0 : Stepping
Current step for agent 0: 4 - 6 = -2 (left: -2 1 1) 

Current step for agent 1: 4 + 1 = 5 (left: 1 5 6) 

i:  0
i:  1
Step 1 : Stepping
Current step for agent 0: 1 - 1 = 0 (left: -2 0) 

Current step for agent 1: 6 / 1 = 6 (left: 5 6) 

i:  0
i:  1
Step 2 : Stepping
Current step for agent 0: -2 + 0 = 0 - 2 (left: -2) 

Current step for agent 1: 5 * 6 = 30 (left: 30) 

i:  0
i:  1
Step 3 : Stepping
Current step for agent 0: -2 - -2 = 0 (left: 0) 

Current step for agent 1: 30 - 30 = 0 (left: 0) 

i:  0
i:  1
