In [1]:
# Imports

from dataclasses import dataclass
from typing import List

import random

import asyncio
import re
import math
import random
import numpy as np
from sympy import simplify

random.seed(0)

from async_engine.batched_api import BatchingAPI
from async_engine.api import API

from src.prompts.adapt import gameof24 as llama_prompts
from utils import parse_suggestions, create_box

In [2]:
# State class

@dataclass(frozen=True)
class GameOf24State:
    # game of 24 puzzle, for example 1 1 4 6
    puzzle: str

    # initialized to the same value as puzzle, but is updated as the game progresses
    current_state: str

    steps: List[str]

    #Randomness used for resampling (random seed)
    randomness: int

    def __hash__(self):
        return hash((self.puzzle, self.current_state, " -> ".join(self.steps)))
    
    def items(self):
        return self.puzzle, self.current_state, self.steps, self.randomness
    
    def duplicate(self, randomness=None):
        return GameOf24State(
            puzzle=self.puzzle,
            current_state=self.current_state,
            steps=self.steps,
            randomness=randomness if randomness is not None else self.randomness)

In [3]:
#Testing the game of 24

states = []
puzzle = "1 1 4 6"
example = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))

# for step in range(2):
#     print(f"Step {step} : Stepping")

print("Initial State")
print(example.items(), "\n")

print("Step: 0")
example = GameOf24State(puzzle="1 1 4 6", current_state="1 4 6", steps=example.steps + ["1 * 1 = 1"], randomness=random.randint(0, 1000))
print(example.items(), "\n")


print("Step: 1")
example = GameOf24State(puzzle="1 1 4 6", current_state="1 24", steps=example.steps + ["4 * 6 = 24"], randomness=random.randint(0, 1000))
print(example.items(), "\n")

print("Step: 2")
example = GameOf24State(puzzle="1 1 4 6", current_state="24", steps=example.steps + ["1 * 24 = 24"], randomness=random.randint(0, 1000))
print(example.items(), "\n")

Initial State
('1 1 4 6', '1 1 4 6', [], 864) 

Step: 0
('1 1 4 6', '1 4 6', ['1 * 1 = 1'], 394) 

Step: 1
('1 1 4 6', '1 24', ['1 * 1 = 1', '4 * 6 = 24'], 776) 

Step: 2
('1 1 4 6', '24', ['1 * 1 = 1', '4 * 6 = 24', '1 * 24 = 24'], 911) 



In [4]:
#Reflexion agent :O

class GameOf24Agent:

    @staticmethod
    async def step(state: GameOf24State, api, namespace, reflexion: list)-> GameOf24State:
        """
        Given a state, returns the next state one.
        """

        # set up the prompt, based on the current state

        # ToT uses bfs_prompt to generate next steps but then uses
        # the cot_prompt to get the final expression. 
        # For example, input : 1 1 4 6
        # Step 0 : '1 - 1 = 0 (left: 0 4 6)'          BFS prompt
        # Step 1 : '0 + 4 = 4 (left: 4 6)'            BFS prompt
        # Step 2 : '4 * 6 = 24 (left: 24)'            BFS prompt
        # Step 3 : Answer : ((1 - 1) + 4) * 6 = 24    CoT prompt


        # set up the prompt, based on the current state
        current_state = state.current_state
        
        if current_state.strip() == "24":
            # CoT prompt
            steps = "\n".join(state.steps) + "\n"
            
            prompt = llama_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps + "Answer: "

            # Get the final expression
            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # State does not change, only the steps
            selected_suggestion = suggestions
            selected_state = state.current_state
            


        else:
            if len(reflexion) == 0:
                prompt = llama_prompts.bfs_prompt.format(input=current_state) 
            else:
                prompt = llama_prompts.bfs_reflexion_prompt.format(input=current_state, puzzle = "1 1 4 6", reflexion=reflexion[0]) 

            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # parse suggestions, based on the current state
            parsed_suggestions = parse_suggestions(suggestions)
            if parsed_suggestions == []:
                print(f"No suggestions were parsed from state: {state}")
                print(f"\nPrompt: {prompt}\nSuggestions: {suggestions}\nParsed suggestions: {' | '.join(parsed_suggestions)}\n")
                assert False, "No suggestions found."
            
            suggestions = parsed_suggestions
            
            random.seed(state.randomness)
            selected_suggestion = random.choice(suggestions)
            selected_state = GameOf24Agent.parse_next_state(selected_suggestion)

        # set up new state object
        next_state = GameOf24State(
            puzzle=state.puzzle,
            current_state=selected_state,
            steps=state.steps + [selected_suggestion],
            randomness=random.randint(0, 1000)
        )
        return next_state
    
    @staticmethod
    def parse_next_state(suggestion: str) -> str:
        return suggestion.split('left: ')[-1].split(')')[0]
    
    @staticmethod
    def verify(state: GameOf24State)-> dict:
            """
            Verifies the output of a given task
                1. Checks if the numbers used are the same as the ones provided.
                2. Checks if the operations performed result to 24.

            States 
                {"r": 0} : Not finished.
                {"r": 1} : Finished and correct.
                {"r": -1} : Finished and incorrect.
            """
            current_states = state.current_state.split(" ")
            if len(current_states) !=1 or len(state.steps)<=3:
                # More than one number left
                return {'r':0}
            elif current_states[0] != "24":
                # One number left and it is not 24
                return {'r':-1}
            else:
                # One number left and it is 24
                expression = state.steps[-1].lower().replace('answer: ', '').split('=')[0]
                numbers = re.findall(r'\d+', expression)
                problem_numbers = re.findall(r'\d+', state.puzzle)
                if sorted(numbers) != sorted(problem_numbers):
                    # Numbers used are not the same as the ones provided
                    return {'r': -1}
                try:
                    if simplify(expression) == 24:
                        return {'r': 1}
                    else:
                        # Operations performed do not result to 24
                        return {'r': -1}
                except Exception as e:
                    print(e)
                    return {'r': -1}

    @staticmethod
    def generate_reflexion(puzzle: str, steps, state: GameOf24State, api, namespace) -> str:
        prompt = llama_prompts.reflexion_prompt.format(puzzle=puzzle, steps=steps)
        reflexion = api.buffered_request(prompt, key=hash(state), namespace=namespace)
        return reflexion

    @staticmethod
    def generate_summary(reflexion, state: GameOf24State, api, namespace) -> str:
        prompt = llama_prompts.summary_prompt.format(reflexion=reflexion)
        reflexion = api.buffered_request(prompt, key=hash(state), namespace=namespace)
        return reflexion


# Solve 1 1 4 6 puzzle:

In [5]:
# Initialization

step_api_config = eval_api_config = {
    "max_tokens": 1000,
    "temperature": 0,
    "top_p": 1,
    "request_timeout": 120,
    "top_k": 50
}

# eligible providers ["TogehterAI", "OpenAI", "Groq"]
model = "llama-3.3-70b-versatile"
provider = "Groq"
models = {
    "step": {"model_name":model, "provider":provider},
    "eval": {"model_name":model, "provider":provider},
}

api = API(eval_api_config, models=models.values(), resources=2, verbose=False)

puzzle = "1 1 4 6"
num_steps = 4
num_agents = 2




In [6]:
for agent_id, state in enumerate(states):
    print(agent_id)
    print(state)

In [7]:
step_batcher = BatchingAPI(api, batch_size=1, timeout=2, model=models["step"]["model_name"], tab="step")

In [8]:
reflexion = ""

In [9]:
# Attempting to solve the puzzle
async def solvePuzzle(reflexion):
    #Create initial state/environment
    states = []
    for _ in range(num_agents):
        states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))

    finished_states = []
    #Stepping
    for step in range(num_steps):
        
        print(f"Step {step} : Stepping")
        agent_tasks = [
            asyncio.create_task(
            GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflexion=reflexion)
            )
            for agent_id, state in enumerate(states)
        ]
        states = await asyncio.gather(*agent_tasks)
        for agent_id, state in enumerate(states):
            print(f"Current step for agent {agent_id}: {state.steps[-1]} \n")

        # Evaluate whether a puzzle has been solved
        i = 0
        while i < len(states):
            if GameOf24Agent.verify(states[i]) == {"r": 1}:
                print(f"Puzzle finished: {states[i].puzzle}")
                finished_states.append(states.pop(i))
            else:
                i += 1

        # If all puzzles have been solved, break
        if len(states) == 0:
            break
    return states

In [None]:
async def makeReflexion(reflexion_type, num_reflexions, k, states, reflexion):
    step = 3
    if num_reflexions == 1:
        agent_reflexions = [
            asyncio.create_task(
            GameOf24Agent.generate_reflexion(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
            )
            for agent_id, state in enumerate(states)
        ]
        reflexion = await asyncio.gather(*agent_reflexions)
    if num_reflexions > 1:
        for i in range(num_reflexions):
            agent_reflexions = [
                asyncio.create_task(
                GameOf24Agent.generate_reflexion(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
                )
                for agent_id, state in enumerate(states)
            ]
            new_reflexions = await asyncio.gather(*agent_reflexions)

            for agent_id, agent_reflexion in enumerate(new_reflexions):
                reflexion.append(agent_reflexion)

    if reflexion_type == "list":
        #for agent_id, agent_reflexion in enumerate(reflexion):
        #    print(f"reflexion {agent_id}: {agent_reflexion}")
        print("reflexion", reflexion)
        return reflexion

    elif reflexion_type == "k most recent":
        reflexion = reflexion[-k:]
        print("reflexion", reflexion)
        return reflexion

    elif reflexion_type == "summary":
        agent_reflexions = [
                asyncio.create_task(
                GameOf24Agent.generate_summary(reflexion=reflexion, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
                )
                for agent_id, state in enumerate(states)
            ]
        summary = await asyncio.gather(*agent_reflexion)
        print("summary", summary)
        return summary
    else:
        print("unknown type")

    return reflexion

In [34]:
async def runReflexionGameOf24(typeOfReflexion, num_iterations, k):
    reflexion = ""
    
    #Without reflexion first
    states = await solvePuzzle(reflexion)
    print(states)
    #Reflect and go again i times
    for i in range(num_iterations):
        reflexion = await makeReflexion(typeOfReflexion, i+1, k, states, reflexion)
        states = await solvePuzzle(reflexion)

In [35]:
await runReflexionGameOf24("list", 2, 2)

Step 0 : Stepping
Current step for agent 0: 1 * 6 = 6 (left: 1 4 6) 

Current step for agent 1: 6 * 1 = 6 (left: 1 4 6) 

Step 1 : Stepping
Current step for agent 0: 4 / 6 = 0.6667 (left: 0.6667 1) 

Current step for agent 1: 4 / 1 = 4 (left: 4 6) 

Step 2 : Stepping
Current step for agent 0: 0.6667 - 1 = -0.3333 (left: -0.3333) 

Current step for agent 1: 6 / 4 = 1.5 (left: 1.5) 

Step 3 : Stepping
Current step for agent 0: -0.3333 + -0.3333 = -0.6666 (left: -0.6666) 

Current step for agent 1: No possible next steps, only one number remains. 

[GameOf24State(puzzle='1 1 4 6', current_state='-0.6666', steps=['1 * 6 = 6 (left: 1 4 6)', '4 / 6 = 0.6667 (left: 0.6667 1)', '0.6667 - 1 = -0.3333 (left: -0.3333)', '-0.3333 + -0.3333 = -0.6666 (left: -0.6666)'], randomness=495), GameOf24State(puzzle='1 1 4 6', current_state='No possible next steps, only one number remains.', steps=['6 * 1 = 6 (left: 1 4 6)', '4 / 1 = 4 (left: 4 6)', '6 / 4 = 1.5 (left: 1.5)', 'No possible next steps, only on

CancelledError: 

In [36]:
await runReflexionGameOf24("summary", 2, 2)

Step 0 : Stepping
Current step for agent 0: 1 + 6 = 7 (left: 1 4 7) 

Current step for agent 1: 4 * 1 = 4 (left: 1 4 6) 

Step 1 : Stepping
Current step for agent 0: 4 + 7 = 11 (left: 11 1) 

Current step for agent 1: 4 + 6 = 10 (left: 1 10) 

Step 2 : Stepping
Current step for agent 0: 1 - 11 = -10 (left: -10) 

Current step for agent 1: 10 - 1 = 9 (left: 9) 

Step 3 : Stepping
Current step for agent 0: None 

Current step for agent 1: 9 - 9 = 0 (left: 0) 

[GameOf24State(puzzle='1 1 4 6', current_state='None', steps=['1 + 6 = 7 (left: 1 4 7)', '4 + 7 = 11 (left: 11 1)', '1 - 11 = -10 (left: -10)', 'None'], randomness=21), GameOf24State(puzzle='1 1 4 6', current_state='0', steps=['4 * 1 = 4 (left: 1 4 6)', '4 + 6 = 10 (left: 1 10)', '10 - 1 = 9 (left: 9)', '9 - 9 = 0 (left: 0)'], randomness=255)]


AttributeError: module 'src.prompts.adapt.gameof24' has no attribute 'summary_prompt'