In [1]:
# Imports
from lazykey import AsyncKeyHandler
from groq import AsyncGroq
from dataclasses import dataclass
from typing import List
import pickle

import random

import asyncio
import re
import math
import random
import numpy as np
from sympy import simplify

random.seed(0)

from async_engine.batched_api import BatchingAPI
from async_engine.api import API

from src.prompts.adapt import gameof24 as llama_prompts
from utils import parse_suggestions, create_box

In [2]:
#Setup lazykey
api_keys = ["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", "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", "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"]
client = AsyncGroq

api = AsyncKeyHandler(api_keys, client)

completion = await api.request(
    messages=[
        {
            "role": "user",
            "content": "Cats or dogs?",
        }
    ],
    model="llama-3.3-70b-versatile",
)
print(completion)

ChatCompletion(id='chatcmpl-dc6b4c4e-e471-4fdc-8115-29f25799329b', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="As a neutral AI, I don't have personal preferences, but I can tell you about the pros and cons of having cats or dogs as pets.\n\nCats are often low-maintenance, independent, and affectionate in their own way. They are usually self-cleaning, don't need to be taken out for walks, and are generally quieter than dogs. However, they can be aloof andscratching furniture might be a problem.\n\nDogs, on the other hand, are often social, loyal, and loving. They thrive on attention and exercise, which can be great for people who enjoy outdoor activities. They can be trained to behave well and perform tricks, but they do require regular walks and training.\n\nUltimately, the choice between cats and dogs depends on your lifestyle, living situation, and personal preferences. Both can make wonderful pets, and it's essential to consid

In [3]:
# State class

@dataclass(frozen=True)
class GameOf24State:
    # game of 24 puzzle, for example 1 1 4 6
    puzzle: str

    # initialized to the same value as puzzle, but is updated as the game progresses
    current_state: str

    steps: List[str]

    #Randomness used for resampling (random seed)
    randomness: int

    def __hash__(self):
        return hash((self.puzzle, self.current_state, " -> ".join(self.steps)))
    
    def items(self):
        return self.puzzle, self.current_state, self.steps, self.randomness
    
    def duplicate(self, randomness=None):
        return GameOf24State(
            puzzle=self.puzzle,
            current_state=self.current_state,
            steps=self.steps,
            randomness=randomness if randomness is not None else self.randomness)

In [7]:
#Testing the game of 24

states = []
puzzle = "1 1 4 6"
example = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))

# for step in range(2):
#     print(f"Step {step} : Stepping")

print("Initial State")
print(example.items(), "\n")

print("Step: 0")
example = GameOf24State(puzzle="1 1 4 6", current_state="1 4 6", steps=example.steps + ["1 * 1 = 1"], randomness=random.randint(0, 1000))
print(example.items(), "\n")


print("Step: 1")
example = GameOf24State(puzzle="1 1 4 6", current_state="1 24", steps=example.steps + ["4 * 6 = 24"], randomness=random.randint(0, 1000))
print(example.items(), "\n")

print("Step: 2")
example = GameOf24State(puzzle="1 1 4 6", current_state="24", steps=example.steps + ["1 * 24 = 24"], randomness=random.randint(0, 1000))
print(example.items(), "\n")

Initial State
('1 1 4 6', '1 1 4 6', [], 864) 

Step: 0
('1 1 4 6', '1 4 6', ['1 * 1 = 1'], 394) 

Step: 1
('1 1 4 6', '1 24', ['1 * 1 = 1', '4 * 6 = 24'], 776) 

Step: 2
('1 1 4 6', '24', ['1 * 1 = 1', '4 * 6 = 24', '1 * 24 = 24'], 911) 



In [7]:
#Reflexion agent :O

class GameOf24Agent:

    @staticmethod
    async def step(state: GameOf24State, api, namespace, reflexion: list)-> GameOf24State:
        """
        Given a state, returns the next state one.
        """

        # set up the prompt, based on the current state

        # ToT uses bfs_prompt to generate next steps but then uses
        # the cot_prompt to get the final expression. 
        # For example, input : 1 1 4 6
        # Step 0 : '1 - 1 = 0 (left: 0 4 6)'          BFS prompt
        # Step 1 : '0 + 4 = 4 (left: 4 6)'            BFS prompt
        # Step 2 : '4 * 6 = 24 (left: 24)'            BFS prompt
        # Step 3 : Answer : ((1 - 1) + 4) * 6 = 24    CoT prompt


        # set up the prompt, based on the current state

        current_state = state.current_state
        
        if current_state.strip() == "24":
            # CoT prompt
            steps = "\n".join(state.steps) + "\n"
            

            prompt = llama_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps + "Answer: "

            # Set up CoT prompt
            # if any(author in api.model for author in ["meta", "google", "mistral", "gpt-4o"]):
            #     prompt = llama_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps + "Answer: "
            # else:
            #     prompt = totor_prompts.cot_prompt.format(input=state.puzzle) + "Steps:\n" + steps

            # Get the final expression
            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # State does not change, only the steps
            selected_suggestion = suggestions
            selected_state = state.current_state
            


        else:
            if len(reflexion) == 0:
                prompt = llama_prompts.bfs_prompt.format(input=current_state) 
            else:
                prompt = llama_prompts.bfs_reflexion_prompt.format(input=current_state, puzzle = "1 1 4 6", reflexion=reflexion[0]) 
                
            # Set up BFS prompt
            # if any(author in api.model for author in ["meta", "google", "mistral", "gpt-4o"]):
            #     prompt = llama_prompts.bfs_prompt.format(input=current_state) + "Keep in mind the following critique from the last step: \n" + reflexion_suggestions
            # else:
            #     prompt = totor_prompts.bfs_prompt.format(input=current_state) + "Keep in mind the following critique from the last step: \n" + reflexion_suggestions

            # Get the next state
            # suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            suggestions = await api.buffered_request(prompt, key=hash(state), namespace=namespace)

            # parse suggestions, based on the current state
            parsed_suggestions = parse_suggestions(suggestions)
            if parsed_suggestions == []:
                print(f"No suggestions were parsed from state: {state}")
                print(f"\nPrompt: {prompt}\nSuggestions: {suggestions}\nParsed suggestions: {' | '.join(parsed_suggestions)}\n")
                assert False, "No suggestions found."
            
            suggestions = parsed_suggestions
            
            random.seed(state.randomness)
            selected_suggestion = random.choice(suggestions)
            selected_state = GameOf24Agent.parse_next_state(selected_suggestion)

        # set up new state object
        next_state = GameOf24State(
            puzzle=state.puzzle,
            current_state=selected_state,
            steps=state.steps + [selected_suggestion],
            randomness=random.randint(0, 1000)
        )
        return next_state
    
    @staticmethod
    def parse_next_state(suggestion: str) -> str:
        return suggestion.split('left: ')[-1].split(')')[0]
    
    @staticmethod
    def verify(state: GameOf24State)-> dict:
            """
            Verifies the output of a given task
                1. Checks if the numbers used are the same as the ones provided.
                2. Checks if the operations performed result to 24.

            States 
                {"r": 0} : Not finished.
                {"r": 1} : Finished and correct.
                {"r": -1} : Finished and incorrect.
            """
            current_states = state.current_state.split(" ")
            if len(current_states) !=1 or len(state.steps)<=3:
                # More than one number left
                return {'r':0}
            elif current_states[0] != "24":
                # One number left and it is not 24
                return {'r':-1}
            else:
                # One number left and it is 24
                expression = state.steps[-1].lower().replace('answer: ', '').split('=')[0]
                numbers = re.findall(r'\d+', expression)
                problem_numbers = re.findall(r'\d+', state.puzzle)
                if sorted(numbers) != sorted(problem_numbers):
                    # Numbers used are not the same as the ones provided
                    return {'r': -1}
                try:
                    if simplify(expression) == 24:
                        return {'r': 1}
                    else:
                        # Operations performed do not result to 24
                        return {'r': -1}
                except Exception as e:
                    print(e)
                    return {'r': -1}

    @staticmethod
    def generate_reflexion(puzzle: str, steps: object, state: GameOf24State, api: API, namespace: tuple) -> str:
        prompt = llama_prompts.reflexion_prompt.format(puzzle=puzzle, steps=steps)
        reflexion = api.buffered_request(prompt, key=hash(state), namespace=namespace)
        return reflexion
    
    @staticmethod
    def evaluate_step(puzzle: str, steps: object, state: GameOf24State, api: API, namespace: tuple)-> str:
        prompt = llama_prompts.evaluate_prompt.format(puzzle=puzzle, steps=steps)
        evalution = api.buffered_request(prompt, key=hash(state), namespace=namespace)
        return evalution


# Solve 1 1 4 6 puzzle:

In [8]:
# Initialization

step_api_config = eval_api_config = {
    "max_tokens": 1000,
    "temperature": 0,
    "top_p": 1,
    "request_timeout": 120,
    "top_k": 50
}

# eligible providers ["TogehterAI", "OpenAI", "Groq"]
model = "llama-3.3-70b-versatile"
provider = "Groq"
models = {
    "step": {"model_name":model, "provider":provider},
    "eval": {"model_name":model, "provider":provider},
}

# api = API(eval_api_config, models=models.values(), resources=2, verbose=False)

puzzle = "1 1 4 6"
num_steps = 4
num_agents = 2




In [None]:
# Attempting to solve the puzzle (without reflexion)

#Create initial state/environment
states = []
for _ in range(num_agents):
    states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))
step_batcher = BatchingAPI(api, batch_size=1, timeout=2, model=models["step"]["model_name"], tab="step")

finished_states = []

#Stepping
for step in range(num_steps):
    
    print(f"Step {step} : Stepping")
    agent_tasks = [
        asyncio.create_task(
        GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflexion="")
        )
        for agent_id, state in enumerate(states)
    ]
    states = await asyncio.gather(*agent_tasks)
    for agent_id, state in enumerate(states):
        print(f"Current step for agent {agent_id}: {state.steps[-1]} \n")

    # Evaluate whether a puzzle has been solved
    i = 0
    while i < len(states):
        if GameOf24Agent.verify(states[i]) == {"r": 1}:
            print(f"Puzzle finished: {states[i].puzzle}")
            finished_states.append(states.pop(i))
        else:
            i += 1

    # If all puzzles have been solved, break
    if len(states) == 0:
        break

Step 0 : Stepping


In [74]:
print(len(finished_states))
print(len(states))

0
0


In [15]:
# Generate reflexions if the puzzle is not solved

agent_evaluations = [
    asyncio.create_task(
    GameOf24Agent.evaluate_step(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
    )
    for agent_id, state in enumerate(states)
]
evaluation = await asyncio.gather(*agent_evaluations)

for agent_id, agent_evaluation in enumerate(evaluation):
    print(f"reflexion {agent_id}: {agent_evaluation} \n")

reflexion 0: To evaluate the given solution attempt for the game of 24 with the numbers 1, 1, 4, and 6, let's break it down step by step.

1. Evaluate step:
   - The first step is 4 / 1 = 4. With the numbers 4 and 1, we can indeed get 4. The numbers left are 4, 1, and 6. 
     - 4 * 6 = 24 (sure)
     - 4 + 6 = 10 (still a reasonable range, but we need to use the 1 as well)
     - Given the numbers, it seems likely we can reach 24, but let's continue evaluating the steps as given.

2. Check whether each step is valid:
   - The first step is 4 / 1 = 4. This is mathematically correct, and it uses available numbers. The result is used in the next step, so this step is valid.
   - The second step is 1 + 4 = 5. This is mathematically correct, and it uses available numbers. However, the result of this step (5) and the remaining number (6) are used to calculate 5 - 6 = -1. This step is mathematically correct but leads to a negative number, which might not be useful for reaching 24 directly. H

Testing evaluate prompt:

In [80]:
from tqdm import tqdm

test_list = []
for i in tqdm(range(10)):
    states = []
    for _ in range(1):
        states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))
    step_batcher = BatchingAPI(api, batch_size=1, timeout=2, model=models["step"]["model_name"], tab="step")

    finished_states = []

    #Stepping
    for step in range(num_steps):
        
        agent_tasks = [
            asyncio.create_task(
            GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflexion="")
            )
            for agent_id, state in enumerate(states)
        ]
        states = await asyncio.gather(*agent_tasks)


        # Evaluate whether a puzzle has been solved
        i = 0
        while i < len(states):
            if GameOf24Agent.verify(states[i]) == {"r": 1}:
                print(f"Puzzle finished: {states[i].puzzle}")
                finished_states.append(states.pop(i))
            else:
                i += 1

        # If all puzzles have been solved, break
        if len(states) == 0:
            break
    test_list.append(states)

 60%|██████    | 6/10 [01:33<01:09, 17.41s/it]

Rate limit error, sleeping for 5 seconds
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jjw28gc7e3ps2k7jvffye5x3` service tier `on_demand` on : Limit 100000, Used 100063, Requested 154. Please try again in 3m8.165s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': '', 'code': 'rate_limit_exceeded'}}
Rate limit error, sleeping for 15 seconds
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jjw28gc7e3ps2k7jvffye5x3` service tier `on_demand` on : Limit 100000, Used 100057, Requested 154. Please try again in 3m2.916s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': '', 'code': 'rate_limit_exceeded'}}


 60%|██████    | 6/10 [01:53<01:15, 18.98s/it]


CancelledError: 

Rate limit error, sleeping for 45 seconds
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jjw28gc7e3ps2k7jvffye5x3` service tier `on_demand` on : Limit 100000, Used 100039, Requested 154. Please try again in 2m46.91s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': '', 'code': 'rate_limit_exceeded'}}
Rate limit error, sleeping for 60 seconds
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jjw28gc7e3ps2k7jvffye5x3` service tier `on_demand` on : Limit 100000, Used 99987, Requested 154. Please try again in 2m1.679999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': '', 'code': 'rate_limit_exceeded'}}
Rate limit error, sleeping for 60 seconds
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organi

In [76]:
with open('evaluate_prompt_list.txt', 'r') as file:
    test_list = [line.strip() for line in file]
print(len(test_list))

10


Agent 0: 

Test 1  fail: Step 0
Test 2  fail: Step 1
Test 3  fail: Step 1
Test 4  fail: Step 0
Test 5  fail: Step 0
Test 6  fail: Step 0
Test 7  fail: Step 1
Test 8  fail: Step 0
Test 9  fail: Step 0
Test 10 fail: Step 0 

In [77]:
agent_evaluations = [
    asyncio.create_task(
    GameOf24Agent.evaluate_step(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
    )
    for agent_id, state in enumerate(test_list)
]
evaluation = await asyncio.gather(*agent_evaluations)

for agent_id, agent_evaluation in enumerate(evaluation):
    print(f"reflexion {agent_id}: {agent_evaluation} \n")

AttributeError: 'str' object has no attribute 'steps'

In [13]:
# Generate reflexions if the puzzle is not solved

agent_reflexions = [
    asyncio.create_task(
    GameOf24Agent.generate_reflexion(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
    )
    for agent_id, state in enumerate(states)
]
reflexion = await asyncio.gather(*agent_reflexions)

for agent_id, agent_reflexion in enumerate(reflexion):
    print(f"reflexion {agent_id}: {agent_reflexion}")

reflexion 0: The previous attempt at solving the puzzle contains a mistake. The error occurs when the player multiplies 1 by 9, resulting in 9, which does not bring them closer to the goal of reaching 24. This mistake can be avoided by carefully evaluating the remaining numbers and available operations to ensure each step progresses towards the target.

A general reflection is that players should prioritize operations that increase the value or create more favorable combinations, rather than repeating or stagnating the current value. This requires a strategic approach, considering the potential outcomes of each operation and the remaining numbers to be used.
reflexion 1: Reflection:
The mistake in the previous attempt is the operation sequence, which led to a dead end with no possible way to reach 24. A similar mistake can be avoided by considering the properties of the numbers and the operations. Specifically, using division with a small divisor (1 in this case) may limit subsequent o

In [10]:
# Reattempting to solve the puzzle (with reflexion)

#Resetting
states = []
for _ in range(num_agents):
    states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))
finished_states = []



#Stepping
for step in range(num_steps):
    
    print(f"Step {step} : Stepping")
    agent_tasks = [
        asyncio.create_task(
        GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflexion=reflexion[agent_id])
        )
        for agent_id, state in enumerate(states)
    ]

    states = await asyncio.gather(*agent_tasks)

    for agent_id, state in enumerate(states):
        print(f"Current step for agent {agent_id}: {state.steps[-1]} \n")

    # Evaluate whether a puzzle has been solved
    i = 0
    while i < len(states):
        print("i: ", i)
        if GameOf24Agent.verify(states[i]) == {"r": 1}:
            print(f"Puzzle finished: {states[i].puzzle}")
            finished_states.append(states.pop(i))
        else:
            i += 1

    # If all puzzles have been solved, break
    if len(states) == 0:
        break

Step 0 : Stepping
Current step for agent 0: 4 * 6 = 24 (left: 1 24) 

Current step for agent 1: 1 / 4 = 0.25 (left: 0.25 1 6) 

i:  0
i:  1
Step 1 : Stepping
Current step for agent 0: 1 * 24 = 24 (left: 24) 

Current step for agent 1: 6 / 1 = 6 (left: 0.25 6) 

i:  0
i:  1
Step 2 : Stepping
Current step for agent 0: 24 / 2 = 12 (left: 12)
24 * 2 = 48 (not possible since 2 is not in the input)
24 + 2 = 26 (not possible since 2 is not in the input)
24 - 2 = 22 (not possible since 2 is not in the input)
24 / 1 = 24 (left: 24)
24 * 1 = 24 (left: 24)
24 + 1 = 25 (not possible since 1 is not in the input)
24 - 1 = 23 (not possible since 1 is not in the input)
24 + 24 = 48 (left: 48)
24 * 24 = 576 (left: 576)
24 - 24 = 0 (left: 0)
24 / 24 = 1 (left: 1)
24 / 3 = 8 (not possible since 3 is not in the input)
24 / 4 = 6 (not possible since 4 is not in the input)
24 / 6 = 4 (not possible since 6 is not in the input)
24 / 8 = 3 (not possible since 8 is not in the input)
24 / 12 = 2 (not possible si

In [11]:
print(states)

[GameOf24State(puzzle='1 1 4 6', current_state='24', steps=['4 * 6 = 24 (left: 1 24)', '1 * 24 = 24 (left: 24)', '24 / 2 = 12 (left: 12)\n24 * 2 = 48 (not possible since 2 is not in the input)\n24 + 2 = 26 (not possible since 2 is not in the input)\n24 - 2 = 22 (not possible since 2 is not in the input)\n24 / 1 = 24 (left: 24)\n24 * 1 = 24 (left: 24)\n24 + 1 = 25 (not possible since 1 is not in the input)\n24 - 1 = 23 (not possible since 1 is not in the input)\n24 + 24 = 48 (left: 48)\n24 * 24 = 576 (left: 576)\n24 - 24 = 0 (left: 0)\n24 / 24 = 1 (left: 1)\n24 / 3 = 8 (not possible since 3 is not in the input)\n24 / 4 = 6 (not possible since 4 is not in the input)\n24 / 6 = 4 (not possible since 6 is not in the input)\n24 / 8 = 3 (not possible since 8 is not in the input)\n24 / 12 = 2 (not possible since 12 is not in the input)\n24 * 3 = 72 (not possible since 3 is not in the input)\n24 * 4 = 96 (not possible since 4 is not in the input)\n24 * 6 = 144 (not possible since 6 is not in th

In [12]:
agent_reflexions = [
    asyncio.create_task(
    GameOf24Agent.generate_reflexion(puzzle=puzzle, steps=state.steps, state=state, api=step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"))
    )
    for agent_id, state in enumerate(states)
]
reflexion.append(await asyncio.gather(*agent_reflexions))


for agent_id, agent_reflexion in enumerate(new_reflexions):
    reflexion.append(agent_reflexion)
    print(f"reflexion {agent_id}: {agent_reflexion}")

reflexion 0: Reflection:
The previous attempt at solving the puzzle appears to be overly repetitive and inefficient. The main mistake is the repeated exploration of the same operations with numbers that are not in the original input list, such as 2, 3, 8, and 12. This leads to a significant amount of redundant calculations and ignores the fact that these numbers are not available for use.

To avoid similar mistakes, it's essential to strictly adhere to the original input numbers and only explore operations that involve these numbers. This can be achieved by systematically listing all possible combinations of the input numbers and basic arithmetic operations, and then evaluating each combination to determine if it yields the desired result of 24.

A general reflection is that brute-force approaches can be inefficient and prone to errors when dealing with complex problems. A more systematic and constrained approach can help to avoid redundant calculations and increase the chances of find

In [13]:
print(len(reflexion))

4


In [14]:
# Reattempting to solve for the second time the puzzle (with reflexion)

#Resetting
states = []
for _ in range(num_agents):
    states.append(GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000)))
finished_states = []



#Stepping
for step in range(num_steps):
    
    print(f"Step {step} : Stepping")
    agent_tasks = [
        asyncio.create_task(
        GameOf24Agent.step(state, step_batcher, namespace=(0, f"Agent: {agent_id}", f"Step : {step}"), reflexion=reflexion[agent_id] + "\n" + reflexion[agent_id+num_agents])
        )
        for agent_id, state in enumerate(states)
    ]

    states = await asyncio.gather(*agent_tasks)

    for agent_id, state in enumerate(states):
        print(f"Current step for agent {agent_id}: {state.steps[-1]} \n")

    # Evaluate whether a puzzle has been solved
    i = 0
    while i < len(states):
        print("i: ", i)
        if GameOf24Agent.verify(states[i]) == {"r": 1}:
            print(f"Puzzle finished: {states[i].puzzle}")
            finished_states.append(states.pop(i))
        else:
            i += 1

    # If all puzzles have been solved, break
    if len(states) == 0:
        break

Step 0 : Stepping
Current step for agent 0: 1 + 6 = 7 (left: 1 4 7) 

Current step for agent 1: 1 / 1 = 1 (left: 1 4 6) 

i:  0
i:  1
Step 1 : Stepping
Current step for agent 0: 7 + 1 = 8 (left: 4 8) 

Current step for agent 1: 1 * 6 = 6 (left: 6 4) 

i:  0
i:  1
Step 2 : Stepping
Current step for agent 0: 4 + 8 = 12 (left: 12) 

Current step for agent 1: 6 * 4 = 24 (left: 24) 

i:  0
i:  1
Step 3 : Stepping
Current step for agent 0: 12 - 12 = 0 (left: 0) 

Current step for agent 1: 24 / 2 = 12 (left: 12)
24 * 2 = 48 (not possible since 2 is not in the input)
24 + 2 = 26 (not possible since 2 is not in the input)
24 - 2 = 22 (not possible since 2 is not in the input)
24 / 1 = 24 (left: 24)
24 * 1 = 24 (left: 24)
24 + 1 = 25 (not possible since 1 is not in the input)
24 - 1 = 23 (not possible since 1 is not in the input)
24 + 24 = 48 (left: 48)
24 * 24 = 576 (left: 576)
24 - 24 = 0 (left: 0)
24 / 24 = 1 (left: 1)
24 / 3 = 8 (not possible since 3 is not in the input)
24 / 4 = 6 (not poss