# **Minimal Gridworld + Q-Learning**

This notebook has two parts:
1. **Small Interactive Grid**: Let a user move around manually (via text input) in a tiny grid.
2. **Q-Learning on a Larger Grid**: An environment with special tiles (ice, bumpers, pits, etc.) and Q-learning to find a policy.

In [53]:
# Install packages in the current Jupyter kernel
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install ipywidgets
!{sys.executable} -m pip install ipycanvas
!{sys.executable} -m pip install ipyevents



In [54]:
# libraries for displaying and user input
import ipywidgets as widgets
from IPython.display import display, clear_output
import ipycanvas
import ipyevents

# libraries for RL
import numpy as np
import math

## Part 1: Small Interactive Environment
We define a 3x3 environment. The user sees only cells they have \"visited\".
- Use buttons `up`, `down`, `left`, `right` to move.
- If you step into the goal, you win!
- This small environment is purely for demonstration and manual play.

In [55]:
class SmallGridEnv:
    def __init__(self, rows=3, cols=3):
        self.rows = rows
        self.cols = cols
        # Define start and goal
        self.start = (0, 0)
        self.goal = (rows-1, cols-1)
        # Current position
        self.agent_pos = self.start
        # Keep track of visited cells
        self.visited = set()
        self.visited.add(self.start)

    def reset(self):
        self.agent_pos = self.start
        self.visited = set([self.start])
        return self.agent_pos

    def step(self, action):
        # action is one of 'up', 'down', 'left', 'right' (or aliases)
        r, c = self.agent_pos
        if action in ['up', 'u']:
            r = max(r-1, 0)
        elif action in ['down', 'd']:
            r = min(r+1, self.rows-1)
        elif action in ['left', 'l']:
            c = max(c-1, 0)
        elif action in ['right', 'r']:
            c = min(c+1, self.cols-1)
        self.agent_pos = (r, c)
        self.visited.add(self.agent_pos)
        done = (self.agent_pos == self.goal)
        return self.agent_pos, done

    def render(self):
        # We show only the visited cells, all others are '?' or '.'
        # 'A' = agent, 'G' = goal, '.' = visited, '?' = unvisited
        grid_display = []
        for r in range(self.rows):
            row_chars = []
            for c in range(self.cols):
                pos = (r,c)
                if pos == self.agent_pos:
                    row_chars.append('A')
                elif pos == self.goal:
                    # Show goal if visited, else hide
                    if pos in self.visited:
                        row_chars.append('G')
                    else:
                        row_chars.append('?')
                else:
                    if pos in self.visited:
                        row_chars.append('.')
                    else:
                        row_chars.append('?')
            grid_display.append(' '.join(row_chars))
        display_str = '\n'.join(grid_display)
        print(display_str)  # text-based print

### Manual Play

#### Using buttons (needs ipywidgets library):

In [56]:
env = SmallGridEnv(rows=3, cols=3)
env.reset()

# Create buttons for each action
button_up = widgets.Button(description="Up")
button_down = widgets.Button(description="Down")
button_left = widgets.Button(description="Left")
button_right = widgets.Button(description="Right")

# Display area for the grid
output = widgets.Output()

# Put them into a GridBox
controls = widgets.GridBox(
    children=[button_up, button_left, button_right, button_down],
    layout=widgets.Layout(
        width='390px',
        grid_template_rows='50px 50px 50px',
        grid_template_columns='100px 100px 100px',
        justify_items='center',
        align_items='center'
    )
)

# Assign each button to the right "cell"
# (row / column) are 1-indexed here
button_up.layout.grid_area = '1 / 2'    # Row 1, Column 2
button_left.layout.grid_area = '2 / 1'  # Row 2, Column 1
button_right.layout.grid_area = '2 / 3' # Row 2, Column 3
button_down.layout.grid_area = '3 / 2'  # Row 3, Column 2

def on_button_click(button):
    with output:
        # Clear previous output
        clear_output(wait=True)

        # Determine action based on which button was clicked
        if button.description == "Up":
            action = "up"
        elif button.description == "Down":
            action = "down"
        elif button.description == "Left":
            action = "left"
        elif button.description == "Right":
            action = "right"

        # Step the environment
        obs, done = env.step(action)

        # Render the environment
        env.render()

        if done:
            clear_output(wait=True)
            print("You reached the goal! Resetting environment...")
            env.reset()
            env.render()

# Attach the same callback to each button
button_up.on_click(on_button_click)
button_down.on_click(on_button_click)
button_left.on_click(on_button_click)
button_right.on_click(on_button_click)

# Display them
display(controls)
display(output)

GridBox(children=(Button(description='Up', layout=Layout(grid_area='1 / 2'), style=ButtonStyle()), Button(desc…

Output()

#### Using arrow keys (needs ipyevents library):

In [57]:
l = widgets.Label('Click or type on me!')
l.layout.border = '2px solid red'

h = widgets.HTML('Event info')
d = ipyevents.Event(source=l, watched_events=['click', 'keydown', 'mouseenter', 'touchmove'])

def handle_event(event):
    lines = ['{}: {}'.format(k, v) for k, v in event.items()]
    content = '<br>'.join(lines)
    h.value = content

d.on_dom_event(handle_event)
display(l, h)

Label(value='Click or type on me!', layout=Layout(border_bottom='2px solid red', border_left='2px solid red', …

HTML(value='Event info')

In [58]:
env = SmallGridEnv(rows=4, cols=4)
env.reset()

l = widgets.Label('Click or type on me!')
l.layout.border = '2px solid red'

d = ipyevents.Event(source=l, watched_events=['keydown'])

def handle_key(event):
    key_code = event['key']
    print(key_code)
    if key_code == 'ArrowLeft':
        action = "left"
    elif key_code == 'ArrowUp':
        action = "up"
    elif key_code == 'ArrowRight':
        action = "right"
    elif key_code == 'ArrowDown':
        action = "down"
    else:
        return

    obs, done = env.step(action)

    # Render the environment
    env.render()

    if done:
        clear_output(wait=True)
        print("You reached the goal! Resetting environment...")
        env.reset()
        env.render()

d.on_dom_event(handle_key)

display(l)

Label(value='Click or type on me!', layout=Layout(border_bottom='2px solid red', border_left='2px solid red', …