In [2]:
from dataclasses import dataclass
from typing import List


@dataclass(frozen=True)
class GameOf24State:
    # game of 24 puzzle, for example 1 1 4 6
    puzzle: str

    # initialized to the same value as puzzle, but is updated as the game progresses
    current_state: str

    steps: List[str]

    #Randomness used for resampling (random seed)
    randomness: int

    def __hash__(self):
        return hash((self.puzzle, self.current_state, " -> ".join(self.steps)))
    
    def items(self):
        return self.puzzle, self.current_state, self.steps, self.randomness
    
    def duplicate(self, randomness=None):
        return GameOf24State(
            puzzle=self.puzzle,
            current_state=self.current_state,
            steps=self.steps,
            randomness=randomness if randomness is not None else self.randomness)

In [3]:
#Testing the game of 24

import random

states = []
puzzle = "1 1 4 6"
game_env = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))

# for step in range(2):
#     print(f"Step {step} : Stepping")

print(game_env.items)

print("Stepping...")
game_env = GameOf24State(puzzle="1 1 4 6", current_state="1 4 6", steps=game_env.steps + ["1 * 1 = 1"], randomness=random.randint(0, 1000))

print(game_env.items)

print("Stepping...")


game_env = GameOf24State(puzzle="1 1 4 6", current_state="1 24", steps=game_env.steps + ["4 * 6 = 24"], randomness=random.randint(0, 1000))

print(game_env.items)

<bound method GameOf24State.items of GameOf24State(puzzle='1 1 4 6', current_state='1 1 4 6', steps=[], randomness=853)>
Stepping...
<bound method GameOf24State.items of GameOf24State(puzzle='1 1 4 6', current_state='1 4 6', steps=['1 * 1 = 1'], randomness=932)>
Stepping...
<bound method GameOf24State.items of GameOf24State(puzzle='1 1 4 6', current_state='1 24', steps=['1 * 1 = 1', '4 * 6 = 24'], randomness=842)>


In [None]:
#Reflexion agent :O

import asyncio
import re
import math
import random
import numpy as np
from sympy import simplify

random.seed(0)

from src.prompts.totor import gameof24 as totor_prompts
from src.prompts.adapt import gameof24 as llama_prompts
from src.states.gameof24 import GameOf24State
from utils import parse_suggestions, create_box


class GameOf24Agent:

    @staticmethod
    async def step(state: GameOf24State, api, namespace)-> GameOf24State:
        """
        Given a state, returns the next state one.
        """

        # set up the prompt, based on the current state

        # ToT uses bfs_prompt to generate next steps but then uses
        # the cot_prompt to get the final expression. 
        # For example, input : 1 1 4 6
        # Step 0 : '1 - 1 = 0 (left: 0 4 6)'          BFS prompt
        # Step 1 : '0 + 4 = 4 (left: 4 6)'            BFS prompt
        # Step 2 : '4 * 6 = 24 (left: 24)'            BFS prompt
        # Step 3 : Answer : ((1 - 1) + 4) * 6 = 24    CoT prompt


        # set up the prompt, based on the current state
        current_state = state.current_state
        print(current_state)
        if current_state.strip() == "24":
            # CoT prompt
            steps = "\n".join(state.steps) + "\n"
            
            # Set up CoT prompt
            if any(author in api.model for author in ["meta", "google", "mistral", "gpt-4o"]):
                prompt = llama_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps + "Answer: "
            else:
                prompt = totor_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps

            # Get the final expression
            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # State does not change, only the steps
            selected_suggestion = suggestions
            selected_state = state.current_state

            prompt = "We are playing game of 24 make sure not to use duplicates" + "This is the current state" + state.current_state
            # Get the next state
            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)
        else:
            reflexion_prompt = "You are reflecting"
            # Get the next state
            reflexion_suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)
                
            # Set up BFS prompt
            if any(author in api.model for author in ["meta", "google", "mistral", "gpt-4o"]):
                prompt = llama_prompts.bfs_prompt.format(input=current_state)
            else:
                prompt = totor_prompts.bfs_prompt.format(input=current_state)

            # Get the next state
            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # parse suggestions, based on the current state
            parsed_suggestions = parse_suggestions(suggestions)
            if parsed_suggestions == []:
                print(f"No suggestions were paresed from state: {state}")
                print(f"\nPrompt: {prompt}\nSuggestions: {suggestions}\nParsed suggestions: {' | '.join(parsed_suggestions)}\n")
                assert False, "No suggestions found."
            
            suggestions = parsed_suggestions
            
            random.seed(state.randomness)
            selected_suggestion = random.choice(suggestions)
            selected_state = GameOf24Agent.parse_next_state(selected_suggestion)

        # set up new state object
        next_state = GameOf24State(
            puzzle=state.puzzle,
            current_state=selected_state,
            steps=state.steps + [selected_suggestion],
            randomness=random.randint(0, 1000)
        )
        return next_state
    
    @staticmethod
    def parse_next_state(suggestion: str) -> str:
        return suggestion.split('left: ')[-1].split(')')[0]

    @staticmethod
    async def reflection(state: GameOf24State, api, namespace):
        prompt = "We are playing game of 24 make sure not to use duplicates" + "This is the current state" + state.current_state
        # Get the next state
        suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

        return suggestions


In [25]:
from async_engine.batched_api import BatchingAPI
from async_engine.api import API


step_api_config = eval_api_config = {
    "max_tokens": 100,
    "temperature": 0.7,
    "top_p": 1,
    "request_timeout": 120,
    "top_k": 50
}

model = "llama-3.2-11b-vision-preview"
# provider = "TogetherAI" if "meta" in model else "OpenAI"
provider = "Groq"
# provider = "TogetherAI"
models = {
    "step": {"model_name":model, "provider":provider},
    "eval": {"model_name":model, "provider":provider},
}

api = API(eval_api_config, models=models.values(), resources=2, verbose=False)


states = []
puzzle = "1 1 4 6"
num_steps = 4

#Create initial state/environment
game_env = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))
step_batcher = BatchingAPI(api, batch_size=1, timeout=2, model=models["step"]["model_name"], tab="step")

states.append(game_env)

agent_coroutines = []
for step in range(num_steps):
    
    print(f"Step {step} : Stepping")
    
    #Stepping - This does not work for us, without using asyncio.create_task(), why?
    agent_tasks = [
        asyncio.create_task(
        GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
        )
        for agent_id, state in enumerate(states)
    ]
    states = await asyncio.gather(*agent_tasks)

    print(states)

    agent_tasks = [
        asyncio.create_task(
            GameOf24Agent.reflection(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
        )
        for agent_id, state in enumerate(states)
    ]
    reflections = await asyncio.gather(*agent_tasks)
    # print(reflections)




Step 0 : Stepping
1 1 4 6
[GameOf24State(puzzle='1 1 4 6', current_state='1 2 4', steps=['1. 1 + 1 = 2 (left: 1 2 4)'], randomness=952)]
Step 1 : Stepping
1 2 4
[GameOf24State(puzzle='1 1 4 6', current_state="To find the next possible steps for the input 1 2 4, we'll apply the same operations as before: addition, subtraction, multiplication, and division.", steps=['1. 1 + 1 = 2 (left: 1 2 4)', "To find the next possible steps for the input 1 2 4, we'll apply the same operations as before: addition, subtraction, multiplication, and division."], randomness=690)]
Step 2 : Stepping
To find the next possible steps for the input 1 2 4, we'll apply the same operations as before: addition, subtraction, multiplication, and division.
[GameOf24State(puzzle='1 1 4 6', current_state='Possible next steps:', steps=['1. 1 + 1 = 2 (left: 1 2 4)', "To find the next possible steps for the input 1 2 4, we'll apply the same operations as before: addition, subtraction, multiplication, and division.", 'Possi