In [76]:
import random
import pandas as pd
from IPython.display import display

In [None]:
q_table = {}  # maps y (ledge row) -> best column choices

def generate_plinko_grid(width, height):
    grid = {}
    for y in range(height):
        for x in range(width):
            if (y % 2 == 0 and x % 2 == 1) or (y % 2 == 1 and x % 2 == 0):
                grid[(x, height - 1 - y)] = 'O'  # place pegs in a checkered pattern
            else:
                grid[(x, height - 1 - y)] = ' '  # empty spaces between pegs
    return grid

def mark_ledge(grid, start_x, length, ledge_y, ledges):
    if ledge_y not in ledges:
        ledges[ledge_y] = 0  # initialize ledge visit counter for the row
    for x in range(start_x, start_x + length):
        grid[(x, ledge_y)] = '_'  # mark ledge locations

def mark_slide(grid, start_x, start_y, length, direction):
    slide_char = '\\' if direction == "forward" else '/'
    x, y = start_x, start_y
    
    for _ in range(length):
        if (x, y) in grid and grid[(x, y)] == 'O':
            grid[(x, y)] = slide_char  # replace pegs with slides
        
        # replace diagonally in the selected direction
        if direction == "forward":
            x += 1
            y -= 1
        else:
            x -= 1
            y -= 1

def mark_buckets(width, num_buckets):
    buckets = {}
    base_size = width // num_buckets
    extra = width % num_buckets  # distribute leftover columns
    middle_bucket = num_buckets // 2  # make the middle bucket smaller if needed
    start_x = 0
    
    for i in range(num_buckets):
        size = base_size + (1 if extra > 0 and i != middle_bucket else 0)
        for x in range(start_x, start_x + size):
            buckets[x] = i  # assign bucket numbers
        start_x += size
        if extra > 0 and i != middle_bucket:
            extra -= 1
    
    return buckets

def visualize_grid(grid, width, height, ball_position=None, buckets=None):
    print("   " + " ".join(str(i % 10) for i in range(width)))  # column labels
    for y in range(height - 1, -1, -1):
        row = f"{y:2} "  # row labels
        for x in range(width):
            if ball_position and (x, y) == ball_position:
                row += 'X'  # show ball position
            else:
                row += grid.get((x, y), ' ')
            row += " "
        print(row)
    
    # print buckets below the board
    bucket_row = "   "
    for x in range(width):
        bucket_row += str(buckets.get(x, ' ')) + " " if buckets else "  "
    print(bucket_row)
    print("===" + "=" * (2 * width))  # visual separator

def select_action(y, ledge_positions, exploration_rate):
    if y not in q_table:
        q_table[y] = {col: 0 for col in ledge_positions}  # Initialize Q-values

    # gradually explore less as exploration_rate decays
    if random.random() < exploration_rate:
        return random.choice(ledge_positions)  # explore all moves
    else:
        return max(q_table[y], key=q_table[y].get)  # exploit best known move

def drop_ball(grid, width, height, start_x, ledges, buckets, exploration_rate, target_bucket, bucket_tracker):
    x, y = start_x, height - 1
    state_action_pairs = [] # track (ledge row, selected column)
    visualize_grid(grid, width, height, (x, y), buckets)

    while y > 0:

        # check if the ball is on a slide
        while (x, y) in grid and grid[(x, y)] in {'\\', '/'}:
            if grid[(x, y)] == '\\':
                x += 1  # move right
            elif grid[(x, y)] == '/':
                x -= 1  # move left
            y -= 1
            if (x, y) not in grid or grid[(x, y)] == 'O':
                break  # stop sliding at next peg

        # check if the ball is on a ledge
        if y in ledges and grid.get((x, y)) == '_':
            ledges[y] += 1  # track visits to ledges by row
            ledge_positions = [col for col in range(width) if grid.get((col, y)) == '_']

            if ledge_positions:
                action = select_action(y, ledge_positions, exploration_rate)  # Agent chooses a column
                state_action_pairs.append((y, action))  # Store (ledge level, chosen column)
                x = action  # Move to the chosen column
                print(f"Agent moved ball to column {x} on ledge at row {y}.")
                
                # ball falls straight down after ledge selection
                while (x, y - 1) in grid and grid[(x, y - 1)] == ' ':
                    y -= 1  # keep falling through open spaces
                y -= 1

        # determine possible diagonal movement
        possible_moves = []
        if (x - 1, y - 1) in grid and grid[(x - 1, y - 1)] in {'O', '_', '\\', '/'}:
            possible_moves.append((x - 1, y - 1))
        if (x + 1, y - 1) in grid and grid[(x + 1, y - 1)] in {'O', '_', '\\', '/'}:
            possible_moves.append((x + 1, y - 1))

        if possible_moves:
            x, y = random.choice(possible_moves)  # randomly choose diagonal direction
        else:
            y -= 1  # fall straight down if no diagonal move available

    bucket = buckets.get(x, 'Unknown') # determine bucket based on final x coordinate
    bucket_tracker[bucket] += 1  # track bucket landings
    reward = 1 if bucket == target_bucket else -1

    return state_action_pairs, reward



In [None]:
width, height = 15, 30
num_buckets = 5
plinko_grid = generate_plinko_grid(width, height)

ledges = {}  # dictionary to track ledges and visits

mark_ledge(plinko_grid, start_x=2, length=5, ledge_y=27, ledges=ledges)
mark_ledge(plinko_grid, start_x=6, length=4, ledge_y=24, ledges=ledges)
mark_ledge(plinko_grid, start_x=1, length=6, ledge_y=21, ledges=ledges)
mark_ledge(plinko_grid, start_x=9, length=5, ledge_y=19, ledges=ledges)
mark_ledge(plinko_grid, start_x=3, length=7, ledge_y=17, ledges=ledges)
mark_ledge(plinko_grid, start_x=7, length=6, ledge_y=15, ledges=ledges)
mark_ledge(plinko_grid, start_x=0, length=5, ledge_y=13, ledges=ledges)
mark_ledge(plinko_grid, start_x=10, length=6, ledge_y=11, ledges=ledges)
mark_ledge(plinko_grid, start_x=2, length=7, ledge_y=9, ledges=ledges)
mark_ledge(plinko_grid, start_x=5, length=6, ledge_y=7, ledges=ledges)
mark_ledge(plinko_grid, start_x=4, length=5, ledge_y=5, ledges=ledges)
mark_ledge(plinko_grid, start_x=8, length=4, ledge_y=3, ledges=ledges)
mark_ledge(plinko_grid, start_x=1, length=6, ledge_y=2, ledges=ledges)

mark_slide(plinko_grid, start_x=0, start_y=28, length=4, direction="forward")
mark_slide(plinko_grid, start_x=13, start_y=23, length=5, direction="backward")
mark_slide(plinko_grid, start_x=14, start_y=16, length=4, direction="backward")
mark_slide(plinko_grid, start_x=0, start_y=12, length=3, direction="forward")
mark_slide(plinko_grid, start_x=14, start_y=8, length=3, direction="backward")
mark_slide(plinko_grid, start_x=1, start_y=5, length=3, direction="forward")

buckets = mark_buckets(width, num_buckets) # map each x value to a bucket

# stat trackers
ledge_tracker = {y: 0 for y in ledges}
bucket_tracker = {i: 0 for i in range(num_buckets)}

# q-learning parameters
learning_rate = 0.1
discount_factor = 0.9
exploration_rate = 1.0  # start fully exploratory
exploration_decay = 0.995  # reduce randomness over time
min_exploration = 0.01  # smallest possible exploration rate
episodes = 1000  # number of training episodes

# train agent
target_bucket = 2  # the bucket the agent should aim for

for episode in range(episodes):
    start_x = random.randint(0, width - 1)  # randomly select starting position

    # drop the ball and track state-action history
    state_action_pairs, reward = drop_ball(plinko_grid, width, height, start_x, ledges, buckets, exploration_rate, target_bucket, bucket_tracker)

    # update Q-table using recorded decisions
    for y, action in reversed(state_action_pairs):
        current_q = q_table[y].get(action, 0)  # get current Q-value
        best_future_q = max(q_table.get(y, {}).values(), default=0)  # get best future Q-value
        q_table[y][action] = current_q + learning_rate * (reward + discount_factor * best_future_q - current_q)  # update Q-value

    # gradually reduce exploration to favor learned strategies
    exploration_rate = max(min_exploration, exploration_rate * exploration_decay)

# print training statistics
print("\nLedge Visit Statistics:", ledges)  # track ledge visits
print("\nBucket Landing Statistics:", bucket_tracker)  # track bucket landings

# display Q-table
display(pd.DataFrame(q_table).T.sort_index(ascending=False))

   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4
29   O   O   O   O   O   O X O   
28 \   O   O   O   O   O   O   O 
27   \ _ _ _ _ _ O   O   O   O   
26 O   \   O   O   O   O   O   O 
25   O   \   O   O   O   O   O   
24 O   O   O   _ _ _ _ O   O   O 
23   O   O   O   O   O   O   /   
22 O   O   O   O   O   O   /   O 
21   _ _ _ _ _ _ O   O   /   O   
20 O   O   O   O   O   /   O   O 
19   O   O   O   O   _ _ _ _ _   
18 O   O   O   O   O   O   O   O 
17   O   _ _ _ _ _ _ _   O   O   
16 O   O   O   O   O   O   O   / 
15   O   O   O   _ _ _ _ _ _ /   
14 O   O   O   O   O   O   /   O 
13 _ _ _ _ _ O   O   O   /   O   
12 \   O   O   O   O   O   O   O 
11   \   O   O   O   O _ _ _ _ _ 
10 O   \   O   O   O   O   O   O 
 9   O _ _ _ _ _ _ _ O   O   O   
 8 O   O   O   O   O   O   O   / 
 7   O   O   _ _ _ _ _ _ O   /   
 6 O   O   O   O   O   O   /   O 
 5   \   O _ _ _ _ _ O   O   O   
 4 O   \   O   O   O   O   O   O 
 3   O   \   O   O _ _ _ _   O   
 2 O _ _ _ _ _ _   O   O   O   O 
 1   O   O   O 

Unnamed: 0,9,10,11,12,13,3,4,5,6,7,8,0,1,2,14
27,,,,,,0.720846,0.82981,5.219705,0.44693,,,,,-0.011402,
24,1.35069,,,,,,,,1.879483,5.932829,1.165918,,,,
21,,,,,,1.132263,5.88975,1.600065,1.322612,,,,1.157768,1.463124,
19,0.725178,5.488995,1.102902,1.412364,0.480746,,,,,,,,,,
17,3.793891,,,,,6.548591,2.571568,2.872661,2.664566,2.117231,2.165774,,,,
15,0.587575,-0.007411,0.383168,0.720259,,,,,,0.121866,3.579258,,,,
13,,,,,,2.068793,1.729242,,,,,2.543489,6.63589,1.456261,
11,,-0.038882,1.595176,0.098525,0.078403,,,,,,,,,,-0.252405
9,,,,,,2.175448,2.111083,1.87972,4.649921,1.575598,2.20155,,,1.795435,
7,4.7206,1.249603,,,,,,1.377657,1.920697,2.347774,1.1414,,,,
