In [None]:
import random
import pandas as pd
from IPython.display import display
from collections import defaultdict

### Global Trackers and Dictionaries

In [None]:
pipes = {}  # maps (x, y) of pipe end -> (x, y) of connected destination
blocks = {}  # maps row_y -> {x: original tile} for restoring blocked rows

bucket_tracker = {i: 0 for i in range(5)}  # maps bucket index -> number of landings (range(num_buckets))
ledge_tracker = defaultdict(int)  # maps (start_x, y) of ledge -> number of visits
spike_tracker = defaultdict(int)  # maps spike row y -> number of hits
pipe_tracker = defaultdict(int)  # maps (x, y) of pipe entry/exit -> number of uses
button_tracker = defaultdict(int)  # maps (x, y) of button tile -> number of presses

### Board Functions

In [None]:
q_table = {}  # maps y (ledge row) -> best column choices

def generate_grid(width, height):
    grid = {}
    for y in range(height):
        for x in range(width):
            if (y % 2 == 0 and x % 2 == 1) or (y % 2 == 1 and x % 2 == 0):
                grid[(x, height - 1 - y)] = 'O'  # place pegs in a checkered pattern
            else:
                grid[(x, height - 1 - y)] = ' '  # empty spaces between pegs
    return grid

def mark_ledge(grid, start_x, length, ledge_y, button_x=None):
    # place a horizontal ledge starting at start_x on row ledge_y
    for x in range(start_x, start_x + length): 
        if x == button_x:
            grid[(x, ledge_y)] = '⬒'  # mark a special button tile
            button_tracker[(x, ledge_y)]  # initialize button in tracker
        else:
            grid[(x, ledge_y)] = '_'  # normal ledge tile
    ledge_tracker[((start_x, ledge_y), frozenset())] # initialize ledge visit tracker

def mark_spike(grid, start_x, length, spike_y):
    for x in range(start_x, start_x + length):
        grid[(x, spike_y)] = '^'
    spike_tracker[spike_y]  # auto-initializes to 0 if not already set

def mark_pipe(grid, x, y1, y2):
    # mark a vertical pipe that connects y1 and y2 at column x
    top = max(y1, y2)
    bottom = min(y1, y2)

    for y in range(bottom, top + 1):
        if y == top:
            grid[(x, y)] = '⤓'  # down pipe entrance
        elif y == bottom:
            grid[(x, y)] = '↥'  # up pipe entrance
        else:
            tile = grid.get((x, y), ' ')
            grid[(x, y)] = 'Φ' if tile == 'O' else '|'  # middle of the pipe

    # connect both ends in the pipes map
    pipes[(x, top)] = (x, bottom)
    pipes[(x, bottom)] = (x, top)

    # start tracking usage of this pipe
    pipe_tracker[(x, top)]
    pipe_tracker[(x, bottom)]

def mark_slide(grid, start_x, start_y, length, direction):
    slide_char = '\\' if direction == "forward" else '/'
    x, y = start_x, start_y

    for _ in range(length):
        if (x, y) in grid and grid[(x, y)] == 'O':
            grid[(x, y)] = slide_char  # replace pegs with slides

        # replace diagonally in the selected direction
        if direction == "forward":
            x += 1
            y -= 1
        else:
            x -= 1
            y -= 1

def mark_block(grid, width, row_y):
    if row_y in blocks:
        return  # skip if already marked

    blocks[row_y] = {}  # store original row tiles
    for x in range(width):
        current_tile = grid.get((x, row_y), ' ')
        if current_tile not in {'↥', 'Φ', '⤓', '|'}:  # skip if tile is part of a pipe
            blocks[row_y][x] = current_tile  # remember what was here
            grid[(x, row_y)] = '█'  # mark block tile

def unmark_block(grid, row_y):
    if row_y not in blocks:
        return  # nothing to unmark

    for x, original_char in blocks[row_y].items():
        grid[(x, row_y)] = original_char  # restore original tile
    del blocks[row_y]  # remove from block tracker

def mark_buckets(width, num_buckets):
    buckets = {}  # maps x to bucket index
    base_size = width // num_buckets  # base size for each bucket
    extra = width % num_buckets  # leftover columns
    middle_bucket = num_buckets // 2  # middle bucket index
    start_x = 0  # starting column for current bucket

    for i in range(num_buckets):
        # add 1 to size if extra columns remain and it's not the middle bucket
        size = base_size + (1 if extra > 0 and i != middle_bucket else 0)
        for x in range(start_x, start_x + size):
            buckets[x] = i  # map each column to bucket index
        start_x += size  # move to next start column
        if extra > 0 and i != middle_bucket:
            extra -= 1  # use up one extra column

    return buckets

def visualize_grid(grid, width, height, ball_position=None, buckets=None):
    x_labels = "   " + " ".join(str(i % 10) for i in range(width))  # make x-axis labels

    print(x_labels)  # print top x-axis
    for y in range(height - 1, -1, -1):
        row = f"{y:2} "  # add y-axis label
        for x in range(width):
            if ball_position and (x, y) == ball_position:
                row += 'X'  # draw ball
            else:
                row += grid.get((x, y), ' ')  # draw tile
            row += " "
        print(row)  # print full row

    bucket_row = "   "
    for x in range(width):
        bucket_row += str(buckets.get(x, ' ')) + " " if buckets else "  "  # show bucket index
    print(bucket_row)  # print bucket labels
    print(x_labels)  # print bottom x-axis
    print("===" + "=" * (2 * width))  # draw horizontal divider


### Game Logic

In [None]:
def handle_blocks(grid, x, y, width, exploration_rate, state_action_pairs, pressed_buttons):
    block_state = (('block', y), frozenset(pressed_buttons))  # unique state with memory of buttons

    # create q-table entry if needed
    if block_state not in q_table:
        q_table[block_state] = {col: 0 for col in range(width)}

    while True:
        available_actions = list(q_table[block_state].keys())
        action = random.choice(available_actions) if random.random() < exploration_rate else max(q_table[block_state], key=q_table[block_state].get)

        # print(f"[BLOCK @ y={y}] Agent at (x={x}, y={y}) chose x={action}. Available: {available_actions}")

        state_action_pairs.append((block_state, action))  # log the decision
        x = action

        if (x, y) in pipes:
            destination = pipes[(x, y)]
            pipe_tracker[(x, y)] += 1
            # print(f"[BLOCK EXIT] Agent used pipe at ({x}, {y}) to teleport to {destination}")
            return destination[0], destination[1]

def drop_ball(grid, width, height, start_x, buckets, exploration_rate, target_bucket):
    x, y = start_x, height - 1  # start at top row
    state_action_pairs = []  # track moves
    reward = 0
    pressed_buttons = set() # a list of y values of pressed buttons this episode
    visualize_grid(grid, width, height, None, buckets) # visualize grid once per episode
    while y > 0:
        # visualize_grid(grid, width, height, (x,y), buckets) # visualize grid every time step to watch ball drop

        # check for block row or adjacent to one while on a pipe
        if grid.get((x, y)) == '█' or (
            grid.get((x, y)) in {'⤓', '↥'} and (
                grid.get((x - 1, y), '') == '█' or grid.get((x + 1, y), '') == '█'
            )
        ):
            x, y = handle_blocks(grid, x, y, width, exploration_rate, state_action_pairs, pressed_buttons)
            continue

        # handle slide tiles
        while (x, y) in grid and grid[(x, y)] in {'\\', '/'}:
            x += 1 if grid[(x, y)] == '\\' else -1
            y -= 1
            if (x, y) not in grid or grid[(x, y)] in {'O', 'Φ'}:
                break

        # handle ledges, pipes, buttons
        while grid.get((x, y)) in {'_', '⤓', '↥', '⬒'}:
            ledge_state = None  # assume no ledge yet

            # find the current ledge based on x and y, using full x-range check
            for (ledge_start_x, ledge_y), _ in ledge_tracker:
                ledge_length = sum(
                    1 for dx in range(width)
                    if grid.get((ledge_start_x + dx, ledge_y)) in {'_', '⬒'}
                )
                if y == ledge_y and ledge_start_x <= x < ledge_start_x + ledge_length:
                    ledge_state = ((ledge_start_x, ledge_y), frozenset(pressed_buttons))
                    ledge_tracker[ledge_state] += 1
                    break


            if not ledge_state:
                break  # stop if no matching ledge

            # find all valid columns on this ledge
            ledge_positions = [col for col in range(width) if grid.get((col, y)) in {'_', '⤓', '↥', '⬒'}]

            # add this ledge to q_table if it's not there yet
            if ledge_state not in q_table:
                q_table[ledge_state] = {col: 0 for col in ledge_positions}

            # choose an action: explore or exploit
            action = random.choice(ledge_positions) if random.random() < exploration_rate else max(q_table[ledge_state], key=q_table[ledge_state].get)

            # print debug info
            print(f"[LEDGE @ y={y}] Agent on tile '{grid.get((x, y))}' at (x={x}, y={y}) chose x={action} ({grid.get((action, y))}). Available: {ledge_positions}")

            # store the decision
            state_action_pairs.append((ledge_state, action))

            # get the tile the agent chose
            tile = grid.get((action, y))

            if tile == '⬒':  # button was pressed
                grid[(action, y)] = '_'  # turn button into ledge
                unmark_block(grid, 5)  # remove block row
                button_tracker[(action, y)] += 1  # track button usage
                pressed_buttons.add((action, y))  # <-- new line
                x = action  # move to button spot
                ledge_tracker[ledge_state] -= 1  # cancel extra visit count
                continue  # stay on ledge and pick again

            elif (action, y) in pipes:  # pipe was selected
                destination = pipes[(action, y)]  # get destination
                pipe_tracker[(action, y)] += 1  # track pipe use
                x, y = destination  # teleport to pipe destination
                break  # stop ledge logic

            else:  # normal drop off ledge
                x = action  # move to chosen column
                # fall straight down if empty space below
                while (x, y - 1) in grid and grid[(x, y - 1)] == ' ':
                    y -= 1
                y -= 1  # drop one more row
                break  # done with ledge


        # check for block row again
        if grid.get((x, y)) == '█' or (
            grid.get((x, y)) in {'⤓', '↥'} and (
                grid.get((x - 1, y), '') == '█' or grid.get((x + 1, y), '') == '█'
            )
        ):
            x, y = handle_blocks(grid, x, y, width, exploration_rate, state_action_pairs, pressed_buttons)
            continue

        # spike check
        if grid.get((x, y)) == '^':
            if y in spike_tracker:
                spike_tracker[y] += 1
            return state_action_pairs, 0

        # try falling diagonally
        possible_moves = []

        # check down-left/right diagonals
        if (x - 1, y - 1) in grid and grid[(x - 1, y - 1)] in {'O', '_', '\\', '/', '⤓', '↥', '⬒', '█', '^', 'Φ'}:
            possible_moves.append((x - 1, y - 1))
        if (x + 1, y - 1) in grid and grid[(x + 1, y - 1)] in {'O', '_', '\\', '/', '⤓', '↥', '⬒', '█', '^', 'Φ'}:
            possible_moves.append((x + 1, y - 1))

        if possible_moves:
            # pick a random diagonal move
            chosen_move = random.choice(possible_moves)
            print(f"[DIAGONAL FALL] Agent moves from (x={x}, y={y}) to {chosen_move}")
            x, y = chosen_move
        else:
            # fall straight down if no diagonal options
            print(f"[FALL] Agent falling from (x={x}, y={y}) to (x={x}, y={y-1})")
            y -= 1

    # final reward based on landing bucket
    bucket = buckets.get(x, 'Unknown')
    bucket_tracker[bucket] += 1
    reward += 1 if bucket == target_bucket else -1

    return state_action_pairs, reward



### Board Constructor

In [None]:
def build_board():
    width, height = 15, 30
    num_buckets = 5
    grid = generate_grid(width, height)
    global blocks
    blocks = {}

    mark_ledge(grid, start_x=2, length=5, ledge_y=27)
    mark_ledge(grid, start_x=6, length=4, ledge_y=24)
    mark_ledge(grid, start_x=1, length=6, ledge_y=21)
    mark_ledge(grid, start_x=9, length=5, ledge_y=19)
    mark_ledge(grid, start_x=3, length=7, ledge_y=17)
    mark_ledge(grid, start_x=7, length=6, ledge_y=15)
    mark_ledge(grid, start_x=0, length=5, ledge_y=13)
    mark_ledge(grid, start_x=9, length=6, ledge_y=11)
    mark_ledge(grid, start_x=0, length=9, ledge_y=9, button_x=0)
    mark_ledge(grid, start_x=5, length=6, ledge_y=7)
    mark_ledge(grid, start_x=4, length=5, ledge_y=5)
    mark_ledge(grid, start_x=8, length=4, ledge_y=3)
    mark_ledge(grid, start_x=4, length=8, ledge_y=2)
    
    
    mark_slide(grid, start_x=0, start_y=28, length=4, direction="forward")
    mark_slide(grid, start_x=13, start_y=23, length=3, direction="backward")
    mark_slide(grid, start_x=14, start_y=16, length=4, direction="backward")
    mark_slide(grid, start_x=0, start_y=12, length=2, direction="forward")    

    mark_spike(grid, start_x=5, length=4, spike_y=18)
    mark_spike(grid, start_x=7, length=2, spike_y=6)
    
    mark_pipe(grid, x=6, y1=27, y2=24)
    mark_pipe(grid, x=9, y1=24, y2=19)
    mark_pipe(grid, x=4, y1=9, y2=5)
    
    mark_block(grid, width, row_y=5)
    # unmark_block(grid, row_y=21)

    buckets = mark_buckets(width, num_buckets) # map each x value to a bucket
    
    return grid, buckets

### Q-Learning

In [None]:
# q-learning parameters
learning_rate = 0.1
discount_factor = 0.9
exploration_rate = 1.0  # start fully exploratory
exploration_decay = 0.995  # reduce randomness over time
min_exploration = 0.01  # smallest possible exploration rate
episodes = 1000  # number of training episodes

# train agent
target_bucket = 2  # the bucket the agent should aim for

for episode in range(episodes):
    grid, buckets = build_board()

    width = max(x for (x, y) in grid.keys()) + 1
    height = max(y for (x, y) in grid.keys()) + 1

    start_x = random.randint(0, width - 1)
    state_action_pairs, reward = drop_ball(
        grid, width, height, start_x, buckets,
              exploration_rate, target_bucket
    )

    # update Q-table using recorded decisions
    for ledge_state, action in reversed(state_action_pairs):
        current_q = q_table[ledge_state].get(action, 0)
        best_future_q = max(q_table.get(ledge_state, {}).values(), default=0)
        q_table[ledge_state][action] = current_q + learning_rate * (reward + discount_factor * best_future_q - current_q)

    # gradually reduce exploration to favor learned strategies
    exploration_rate = max(min_exploration, exploration_rate * exploration_decay)

# print training statistics
print("\nLedge Visit Statistics:")
for (start_x, y), count in sorted(ledge_tracker.items(), key=lambda item: (-item[1], item[1])):
    print(f"Ledge at ({start_x}, {y}) visited {count} times")

print("\nBucket Landing Statistics:")
for bucket_id, count in sorted(bucket_tracker.items()):
    print(f"Bucket {bucket_id}: {count} landings")

print("\nSpike Hit Statistics:")
for y, count in sorted(spike_tracker.items()):
    print(f"Spike row {y} hit {count} times")

print("\nPipe Usage Statistics:")
for (x, y), count in sorted(pipe_tracker.items()):
    print(f"Pipe at ({x}, {y}) used {count} times")

print("\nButton Press Statistics:")
for (x, y), count in sorted(button_tracker.items()):
    print(f"Button at ({x}, {y}) pressed {count} times")

# build DataFrame from Q-table
q_df = pd.DataFrame(q_table).T

# convert index into MultiIndex: ((start_x, y), buttons_pressed) -> MultiIndex with 2 levels
q_df.index = pd.MultiIndex.from_tuples(
    [(ledge[0], tuple(sorted(ledge[1]))) for ledge in q_df.index],
    names=["position", "buttons_pressed"]
)

# sort by descending y value (position = (start_x, y))
q_df = q_df.sort_values(
    by="position",
    key=lambda idx: [-pos[1] for pos in idx]
)

# sort columns (actions/columns in environment) left to right
q_df = q_df[sorted(q_df.columns)]

# show Q-table
display(q_df)
