First, we will define our functions

In [1]:
import numpy as np
STACK_OFFSET = 3
STACK_LETTERS = "ABC"
INCORRECT_MOVE_LANGUAGE = "Invalid move. "

def process_move(move, previous_state):
    if len(move) != 2:
        return None
    move_disc = move[0]
    move_stack = move[1]
    
    
    if not move_disc.isdigit():
        return None
    if move_stack not in STACK_LETTERS:
        return None
    previous_state_list = previous_state.split("\n")
    final_discs = [previous_state_list[STACK_OFFSET + i].strip()[-1] for i in range(3)]
    if move_disc not in final_discs:

        return None
    final_disc_stack = previous_state_list[STACK_OFFSET + STACK_LETTERS.index(move_stack)]
    
    if final_discs[STACK_LETTERS.index(move_stack)].isdigit() and (move_disc > final_discs[STACK_LETTERS.index(move_stack)]):
        return None
    
    previous_state_list[STACK_OFFSET + final_discs.index(move_disc)] = previous_state_list[STACK_OFFSET + final_discs.index(move_disc)][:-2]
    previous_state_list[STACK_OFFSET + STACK_LETTERS.index(move_stack)] += " " + move_disc
    
    return "\n".join(previous_state_list)
    
    
    

def get_next_state(previous_state, response):
    
    move = response.strip().split(" ")[0]
    next_state = process_move(move, previous_state)
    if next_state is not None:
        return next_state.replace(INCORRECT_MOVE_LANGUAGE, "")
    else:
        return INCORRECT_MOVE_LANGUAGE + previous_state.replace(INCORRECT_MOVE_LANGUAGE, "")
    
    
def get_valid_moves(current_state):
    end_chars = [x.strip()[-1] for x in current_state.split("\n")[STACK_OFFSET:STACK_OFFSET+3]]
    valid_moves = []
    for i, disc in enumerate(end_chars):
        if disc.isdigit():
            for j in range(3):
                if j == i:
                    continue
                if end_chars[j] <= disc:
                    continue
                valid_moves.append(disc + STACK_LETTERS[j])
    return valid_moves

def get_random_move(current_state):
    return np.random.choice(get_valid_moves(current_state))

def get_full_stack(num_discs):
    full_stack = ""
    for i in range(num_discs):
        full_stack = " " + str(i) + full_stack
    return full_stack


def get_game_start_end(num_discs):
    full_stack = get_full_stack(num_discs)
    starting_game = f"""The current state of the game is:

```
A | {full_stack}
B | 
C | 
```"""
    
    ending_game = f"""The current state of the game is:

```
A | 
B | 
C | {full_stack}
```"""
    return starting_game, ending_game
    
                
    
    
    

I used this chunk to debug the gameplay

In [12]:
current_state, ending_state = get_game_start_end(2)
num_iterations = 0
num_discs = 3

current_state, ending_state = get_game_start_end(num_discs)

## Looking at 
while current_state != ending_state:
    num_iterations += 1
    if num_iterations > 20:
        break
    move = get_random_move(current_state)
    current_state = get_next_state(current_state, move)
    
    print(current_state)
    print(get_valid_moves(current_state))
    
print(f"Completed the game at {num_iterations} iterations")

The current state of the game is:

```
A |  2 1
B |  0
C | 
```
['1C', '0A', '0C']
The current state of the game is:

```
A |  2 1 0
B | 
C | 
```
['0B', '0C']
The current state of the game is:

```
A |  2 1
B | 
C |  0
```
['1B', '0A', '0B']
The current state of the game is:

```
A |  2
B |  1
C |  0
```
['1A', '0A', '0B']
The current state of the game is:

```
A |  2 1
B | 
C |  0
```
['1B', '0A', '0B']
The current state of the game is:

```
A |  2 1 0
B | 
C | 
```
['0B', '0C']
The current state of the game is:

```
A |  2 1
B | 
C |  0
```
['1B', '0A', '0B']
The current state of the game is:

```
A |  2
B |  1
C |  0
```
['1A', '0A', '0B']
The current state of the game is:

```
A |  2
B |  1 0
C | 
```
['2C', '0A', '0C']
The current state of the game is:

```
A |  2
B |  1
C |  0
```
['1A', '0A', '0B']
The current state of the game is:

```
A |  2 0
B |  1
C | 
```
['0B', '0C', '1C']
The current state of the game is:

```
A |  2
B |  1
C |  0
```
['1A', '0A', '0B']
The current stat

In [42]:
from config import OPENAI_API_KEY
import os
from openai import OpenAI
import re
import time



os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

def llm_plays_tower_hanoi(num_discs, model = "gpt-4.1", restrict_token_length = False, max_num_iterations = 50, seconds_sleep_per_step = 1, temperature = 1):
    """Have an openAI model play the tower of hanoi
    
    Args:
        num_discs: The number of discs to include in the game
        model: The model to use
        restrict_token_length: Whether to restrict the model's input to only include the most recent game state. If True, the model will only receive the current state
        max_num_iterations: the maximum number of iterations. 
        seconds_sleep_per_step: The number of seconds to sleep for each move in gameplay. Useful if you are hitting rate limits. 
        temperature: The model's temperature
    """
    sys_prompt = f"""
You are a puzzle master completing the tower of Hanoi puzzle. 

This puzzle will be represented to you in this fashion:

A | {get_full_stack(num_discs)}
B | 
C | 

There are three stacks: A, B, and C. 

There are {num_discs} discs on the stacks, represented as numbers, with 0 being the smallest and {num_discs - 1} being the largest. Each digit is a disc

Your goal is to move all of the discs from stack A to stack C, in the proper order. 

You must follow the following rules: 
1. Only one disk may be moved at a time.
2. Each move consists of taking the top disk from one of the stacks and placing it on the top of another stack or on an empty stack.
3. No disk may be placed on top of a disk that is smaller than it. (so, disk 1 cannot be placed on top of disk 0)

The game will be represented to you in an intermediate state, where the right-most disc represents the "top" of each stack. 


At the start of the game, the puzzle looked like:

A | {get_full_stack(num_discs)}
B | 
C | 

To win, you must move all the discs to stack C, so the puzzle will look like:

A | 
B | 
C | {get_full_stack(num_discs)}

Your role is to plan ahead and make the best move based on the current state of the puzzle, so that the puzzle moves closer to the winning state. 

To make a move, state the number of the disk and the stack you want to move it to in the following manner:
```
1B
```

The move `1B` moves the disc 1 to the top of stack B. The move `0C` moves the disc 0 to the top of stack C.

Do not state anything other than the move in your response.
"""

    print(sys_prompt)


    client = OpenAI()



    current_state, ending_state = get_game_start_end(num_discs)
    num_iterations = 0

    messages = [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": current_state}
            ]


    while current_state != ending_state:
        valid_moves = "\nValid Moves are: " + "[`" + "`, `".join(get_valid_moves(current_state)) + "`]"
        
        num_iterations += 1
        if num_iterations >=  max_num_iterations:
            break


        response = client.chat.completions.create(
            model=model,
            temperature=temperature,
            messages=messages
        )
        move = response.choices[0].message.content
        print("-"*20)
        print(current_state + valid_moves)
        print("-"*20)
        print(move)

        
        messages.append({"role": "assistant", "content": move})
        
        previous_state = current_state + valid_moves
        previous_move = move
        current_state = get_next_state(current_state, re.sub("[^\w\s]", "", move))
        
        if restrict_token_length:
            ## Only pass in the current state of the game
            messages = [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": previous_state.replace(INCORRECT_MOVE_LANGUAGE, "")},
            {"role": "user", "content": previous_move},
            {"role": "user", "content": current_state + valid_moves}
            ]
        else:
            # Append the new game state as a user message
            messages.append({"role": "user", "content": current_state + valid_moves})
            
        time.sleep(seconds_sleep_per_step) ## Prevent myself from hitting rate limits.
    if current_state == ending_state:
        
        print(f"Completed the game at {num_iterations} iterations")
    else:
        print(f"Stoped gameplay by reaching the max number of iterations ({max_num_iterations})")

In [37]:
help(client.chat.completions.create)

Help on method create in module openai.resources.chat.completions:

create(*, messages: 'Iterable[ChatCompletionMessageParam]', model: 'Union[str, ChatModel]', frequency_penalty: 'Optional[float] | NotGiven' = NOT_GIVEN, function_call: 'completion_create_params.FunctionCall | NotGiven' = NOT_GIVEN, functions: 'Iterable[completion_create_params.Function] | NotGiven' = NOT_GIVEN, logit_bias: 'Optional[Dict[str, int]] | NotGiven' = NOT_GIVEN, logprobs: 'Optional[bool] | NotGiven' = NOT_GIVEN, max_tokens: 'Optional[int] | NotGiven' = NOT_GIVEN, n: 'Optional[int] | NotGiven' = NOT_GIVEN, parallel_tool_calls: 'bool | NotGiven' = NOT_GIVEN, presence_penalty: 'Optional[float] | NotGiven' = NOT_GIVEN, response_format: 'completion_create_params.ResponseFormat | NotGiven' = NOT_GIVEN, seed: 'Optional[int] | NotGiven' = NOT_GIVEN, service_tier: "Optional[Literal['auto', 'default']] | NotGiven" = NOT_GIVEN, stop: 'Union[Optional[str], List[str]] | NotGiven' = NOT_GIVEN, stream: 'Optional[Literal[Fa

Attempt 1: the model plays the game with context of all previous moves. 

In [35]:
llm_plays_tower_hanoi(num_discs = 3)




You are a puzzle master completing the tower of Hanoi puzzle. 

This puzzle will be represented to you in this fashion:

A |  2 1 0
B | 
C | 

There are three stacks: A, B, and C. 

There are 3 discs on the stacks, represented as numbers, with 0 being the smallest and 2 being the largest. Each digit is a disc

Your goal is to move all of the discs from stack A to stack C, in the proper order. 

You must follow the following rules: 
1. Only one disk may be moved at a time.
2. Each move consists of taking the top disk from one of the stacks and placing it on the top of another stack or on an empty stack.
3. No disk may be placed on top of a disk that is smaller than it. (so, disk 1 cannot be placed on top of disk 0)

The game will be represented to you in an intermediate state, where the right-most disc represents the "top" of each stack. 


At the start of the game, the puzzle looked like:

A |  2 1 0
B | 
C | 

To win, you must move all the discs to stack C, so the puzzle will look li

Attempt 2: does lowering the temperature enable the model to better make moves?

In [40]:
llm_plays_tower_hanoi(num_discs = 3, temperature = 0.1)


You are a puzzle master completing the tower of Hanoi puzzle. 

This puzzle will be represented to you in this fashion:

A |  2 1 0
B | 
C | 

There are three stacks: A, B, and C. 

There are 3 discs on the stacks, represented as numbers, with 0 being the smallest and 2 being the largest. Each digit is a disc

Your goal is to move all of the discs from stack A to stack C, in the proper order. 

You must follow the following rules: 
1. Only one disk may be moved at a time.
2. Each move consists of taking the top disk from one of the stacks and placing it on the top of another stack or on an empty stack.
3. No disk may be placed on top of a disk that is smaller than it. (so, disk 1 cannot be placed on top of disk 0)

The game will be represented to you in an intermediate state, where the right-most disc represents the "top" of each stack. 


At the start of the game, the puzzle looked like:

A |  2 1 0
B | 
C | 

To win, you must move all the discs to stack C, so the puzzle will look li

--------------------
Invalid move. The current state of the game is:

```
A | 
B |  1 0
C |  2
```
Valid Moves are: [`0A`, `0C`, `2A`]
--------------------
0A
--------------------
The current state of the game is:

```
A |  0
B |  1
C |  2
```
Valid Moves are: [`0B`, `0C`, `1C`]
--------------------
0C
--------------------
The current state of the game is:

```
A | 
B |  1
C |  2 0
```
Valid Moves are: [`1A`, `0A`, `0B`]
--------------------
1C
--------------------
Invalid move. The current state of the game is:

```
A | 
B |  1
C |  2 0
```
Valid Moves are: [`1A`, `0A`, `0B`]
--------------------
0B
Stoped gameplay by reaching the max number of iterations (50)


Attempt 3: can the game run if we remove previous context?

In [43]:
llm_plays_tower_hanoi(num_discs = 3, restrict_token_length  = True)


You are a puzzle master completing the tower of Hanoi puzzle. 

This puzzle will be represented to you in this fashion:

A |  2 1 0
B | 
C | 

There are three stacks: A, B, and C. 

There are 3 discs on the stacks, represented as numbers, with 0 being the smallest and 2 being the largest. Each digit is a disc

Your goal is to move all of the discs from stack A to stack C, in the proper order. 

You must follow the following rules: 
1. Only one disk may be moved at a time.
2. Each move consists of taking the top disk from one of the stacks and placing it on the top of another stack or on an empty stack.
3. No disk may be placed on top of a disk that is smaller than it. (so, disk 1 cannot be placed on top of disk 0)

The game will be represented to you in an intermediate state, where the right-most disc represents the "top" of each stack. 


At the start of the game, the puzzle looked like:

A |  2 1 0
B | 
C | 

To win, you must move all the discs to stack C, so the puzzle will look li

--------------------
The current state of the game is:

```
A |  0
B |  2
C |  1
```
Valid Moves are: [`0B`, `0C`, `1B`]
--------------------
0B
--------------------
The current state of the game is:

```
A | 
B |  2 0
C |  1
```
Valid Moves are: [`0A`, `0C`, `1A`]
--------------------
1B
--------------------
Invalid move. The current state of the game is:

```
A | 
B |  2 0
C |  1
```
Valid Moves are: [`0A`, `0C`, `1A`]
--------------------
0C
Stoped gameplay by reaching the max number of iterations (50)


I'm a bit surprised at how bad the LLM is at playing the tower of hanoi, so here I'll replicate Apple's paper to see if that may be different. 

In [57]:
def replicate_apple_procedure(num_discs, model = "gpt-4.1"):

    apple_replication_sys_prompt = f"""You are a helpful assistant. Solve this puzzle for me.
    There are three pegs and {num_discs} disks of different sizes stacked on the first peg. The disks are numbered from 1 (smallest) to n (largest). Disk moves in this puzzle should follow:
    1. Only one disk can be moved at a time.
    2. Each move consists of taking the upper disk from one stack and placing it on top of another stack.
    3. A larger disk may not be placed on top of a smaller disk.
    The goal is to move the entire stack to the third peg.
    Example: With 3 disks numbered 1 (smallest), 2, and 3 (largest), the initial state is [[3, 2, 1], [], []], and a solution might be:
    moves = [[1, 0, 2], [2, 0, 1], [1, 2, 1], [3, 0, 2], [1, 1, 0], [2, 1, 2], [1, 0, 2]]

    Thismeans: Move disk  1 from peg 0 to peg 2, then move disk 2 from peg 0 to peg 1, and so on.
    Requirements:
    • When exploring potential solutions in your thinking process, always include the corresponding complete list of moves.
    • The positions are 0-indexed (the leftmost peg is 0).
    • Ensure your final answer includes the complete list of moves in the format:
           moves = [[disk id, from peg, to peg], ...]
    """

    apple_replication_user_prompt = f"""
    The initial state of the game is: [[{",".join([str(num_discs - i ) for i in range(num_discs)])}], [], []]

    The winning state of the game is: [[], [], [{",".join([str(num_discs - i ) for i in range(num_discs)])}]]

    Share your complete list of moves to move from the initial state to the winning state
    """


    messages = [
                {"role": "system", "content": apple_replication_sys_prompt},
        {"role": "user", "content": apple_replication_user_prompt}
    ]


    client = OpenAI()

    response = client.chat.completions.create(
                model=model,
                messages=messages
            )

    print(apple_replication_sys_prompt)
    print("-"*20)
    print(apple_replication_user_prompt)
    print("-"*20)
    print(response.choices[0].message.content)



In [58]:
replicate_apple_procedure(num_discs = 3)

You are a helpful assistant. Solve this puzzle for me.
    There are three pegs and 3 disks of different sizes stacked on the first peg. The disks are numbered from 1 (smallest) to n (largest). Disk moves in this puzzle should follow:
    1. Only one disk can be moved at a time.
    2. Each move consists of taking the upper disk from one stack and placing it on top of another stack.
    3. A larger disk may not be placed on top of a smaller disk.
    The goal is to move the entire stack to the third peg.
    Example: With 3 disks numbered 1 (smallest), 2, and 3 (largest), the initial state is [[3, 2, 1], [], []], and a solution might be:
    moves = [[1, 0, 2], [2, 0, 1], [1, 2, 1], [3, 0, 2], [1, 1, 0], [2, 1, 2], [1, 0, 2]]

    Thismeans: Move disk  1 from peg 0 to peg 2, then move disk 2 from peg 0 to peg 1, and so on.
    Requirements:
    • When exploring potential solutions in your thinking process, always include the corresponding complete list of moves.
    • The positions are

In [None]:
replicate_apple_procedure(num_discs = 9)