Let’s implement a basic reinforcement learning (RL) example using Q-learning, a simple but powerful RL algorithm.

We’ll use a small grid environment to demonstrate the idea. Here's a basic Python implementation without any external libraries (except numpy)

'''🧠 Q-Learning in a GridWorld
🗺 Environment Description:
4x4 Grid.

Agent starts at (0, 0).

Goal at (3, 3) → +10 reward.

Hitting walls → stay in place.

Each move → -1 reward.'''

In [2]:
import pandas as pd
import numpy as np
import random

In [3]:
# Environment size
grid_size = 4

# Actions: up, down, left, right
actions = ['U', 'D', 'L', 'R']
action_dict = {
    'U': (-1, 0),
    'D': (1, 0),
    'L': (0, -1),
    'R': (0, 1)
}


In [4]:

# Q-table initialization
Q = np.zeros((grid_size, grid_size, len(actions)))

# Hyperparameters
alpha = 0.1      # Learning rate
gamma = 0.9      # Discount factor
epsilon = 0.2    # Exploration rate
episodes = 500


In [5]:
# Reward function
def get_reward(state):
    return 10 if state == (grid_size - 1, grid_size - 1) else -1


In [6]:
def is_valid(state):
    x, y = state
    return 0 <= x < grid_size and 0 <= y < grid_size


In [7]:

# Training loop
for episode in range(episodes):
    state = (0, 0)

    while state != (grid_size - 1, grid_size - 1):
        x, y = state

        if random.uniform(0, 1) < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(Q[x, y])

        action = actions[action_idx]
        dx, dy = action_dict[action]
        next_state = (x + dx, y + dy)

        if not is_valid(next_state):
            next_state = state  # stay in place

        reward = get_reward(next_state)
        nx, ny = next_state

        # Q-learning update
        Q[x, y, action_idx] += alpha * (
            reward + gamma * np.max(Q[nx, ny]) - Q[x, y, action_idx]
        )

        state = next_state


In [8]:
for i in range(grid_size):
    for j in range(grid_size):
        best_action = actions[np.argmax(Q[i, j])]
        print(f"({i},{j}): {best_action}", end="\t")
    print()

(0,0): R	(0,1): R	(0,2): D	(0,3): D	
(1,0): R	(1,1): R	(1,2): D	(1,3): D	
(2,0): R	(2,1): R	(2,2): R	(2,3): D	
(3,0): R	(3,1): R	(3,2): R	(3,3): U	
