In [4]:


import pygame
import numpy as np

# Initialize Pygame
pygame.init()

# Parameters
grid_size = 6
cell_size = 100
screen_size = grid_size * cell_size
screen = pygame.display.set_mode((screen_size, screen_size))
pygame.display.set_caption("Q-Learning Grid World 24MAI0114")

# Colors
white = (255, 255, 255)
black = (0, 0, 0)
red = (255, 0, 0)
green = (0, 255, 0)
blue = (0, 0, 255)

# Game elements
goal_state = (3, 3)
obstacle = (1, 2)
actions = ['up', 'down', 'left', 'right']

# Initialize Q-table
Q = np.zeros((grid_size, grid_size, len(actions)))
reward = 100
penalty = -100

# Initialize Q-table with some values to guide the agent's movement
Q[0, 0, actions.index("right")] = 1
Q[0, 1, actions.index("right")] = 1
Q[0, 2, actions.index("down")] = 1
Q[1, 2, actions.index("left")] = 1
Q[1, 1, actions.index("down")] = 1
Q[2, 1, actions.index("down")] = 1
Q[3, 1, actions.index("right")] = 1
Q[3, 2, actions.index("right")] = 1

# Helper functions
def choose_action(state):
    """Choose the best action based on the current Q-table."""
    return actions[np.argmax(Q[state[0], state[1], :])]

def take_action(state, action):
    """Move the agent and calculate reward/penalty."""
    new_state = list(state)
    if action == 'up':
        new_state[0] -= 1
    elif action == 'down':
        new_state[0] += 1
    elif action == 'left':
        new_state[1] -= 1
    else:
        new_state[1] += 1
    # Ensure new state is within bounds
    new_state[0] = max(0, min(new_state[0], grid_size - 1))
    new_state[1] = max(0, min(new_state[1], grid_size - 1))
    new_state = tuple(new_state)
    
    # Check for obstacle or goal
    if new_state == obstacle:
        return new_state, penalty
    elif new_state == goal_state:
        return new_state, reward
    else:
        return new_state, 0

# Run a loop to test the learned policy and visualize the agent’s movement
state = (0, 0)
steps = 0
max_steps = 36  # Limit the number of steps to prevent infinite loops

# Main game loop
running = True
clock = pygame.time.Clock()  # To control the speed of the loop

while running and state != goal_state and steps < max_steps:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    # Agent chooses action and moves
    action = choose_action(state)
    new_state, reward = take_action(state, action)
    state = new_state
    steps += 1
    print(f"Step {steps}: {action} -> {state}")

    # Draw the grid and elements
    screen.fill(white)
    for row in range(grid_size):
        for col in range(grid_size):
            rect = pygame.Rect(col * cell_size, row * cell_size, cell_size, cell_size)
            pygame.draw.rect(screen, black, rect, 1)  # Draw grid
            if (row, col) == goal_state:
                pygame.draw.rect(screen, green, rect)  # Draw goal
            elif (row, col) == obstacle:
                pygame.draw.rect(screen, red, rect)  # Draw obstacle
            elif (row, col) == state:
                pygame.draw.rect(screen, blue, rect)  # Draw agent

    pygame.display.flip()
    clock.tick(2)  # Controls how fast the agent moves (2 frames per second)

pygame.quit()


Step 1: right -> (0, 1)
Step 2: right -> (0, 2)
Step 3: down -> (1, 2)
Step 4: left -> (1, 1)
Step 5: down -> (2, 1)
Step 6: down -> (3, 1)
Step 7: right -> (3, 2)
Step 8: right -> (3, 3)
