In [29]:
np.zeros((5,5,1))

array([[[0.],
        [0.],
        [0.],
        [0.],
        [0.]],

       [[0.],
        [0.],
        [0.],
        [0.],
        [0.]],

       [[0.],
        [0.],
        [0.],
        [0.],
        [0.]],

       [[0.],
        [0.],
        [0.],
        [0.],
        [0.]],

       [[0.],
        [0.],
        [0.],
        [0.],
        [0.]]])

In [9]:
import pygame
import numpy as np
import random

# Constants
WIDTH, HEIGHT = 8, 8
CELL_SIZE = 100
WIN_WIDTH, WIN_HEIGHT = WIDTH * CELL_SIZE, HEIGHT * CELL_SIZE
FPS = 30
OBSTACLES = [(1, 1), (2, 2), (3, 3)]  # Obstacle coordinates
START_POS = (0, 0)  # Start position
END_POS = (WIDTH - 1, HEIGHT - 1)  # End position
ACTIONS = ["UP", "DOWN", "LEFT", "RIGHT"]
EPSILON = 0.1  # Epsilon for exploration
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.9
EPISODES = 1000

# Initialize Pygame
pygame.init()
window = pygame.display.set_mode((WIN_WIDTH, WIN_HEIGHT))
clock = pygame.time.Clock()

# Q-Table
q_table = np.zeros((WIDTH, HEIGHT, len(ACTIONS)))

# Helper function to draw the grid and obstacles
def draw_grid():
    for x in range(0, WIN_WIDTH, CELL_SIZE):
        pygame.draw.line(window, (255, 255, 255), (x, 0), (x, WIN_HEIGHT))
    for y in range(0, WIN_HEIGHT, CELL_SIZE):
        pygame.draw.line(window, (255, 255, 255), (0, y), (WIN_WIDTH, y))
    for obstacle in OBSTACLES:
        pygame.draw.rect(window, (255, 0, 0), (obstacle[0] * CELL_SIZE, obstacle[1] * CELL_SIZE, CELL_SIZE, CELL_SIZE))

# Function to choose an action based on the Q-values with epsilon-greedy policy
def choose_action(state):
    if np.random.rand() < EPSILON:
        return random.choice(range(len(ACTIONS)))  # Explore
    else:
        return np.argmax(q_table[state])  # Exploit

# Main Q-learning algorithm
def q_learning():
    global EPSILON  # Declare EPSILON as global to modify its value
    for episode in range(EPISODES):
        state = START_POS
        done = False

        while not done:
            action = choose_action(state)
            if action == 0:  # UP
                next_state = (state[0], max(0, state[1] - 1))
            elif action == 1:  # DOWN
                next_state = (state[0], min(HEIGHT - 1, state[1] + 1))
            elif action == 2:  # LEFT
                next_state = (max(0, state[0] - 1), state[1])
            else:  # RIGHT
                next_state = (min(WIDTH - 1, state[0] + 1), state[1])

            # Check for obstacles
            if next_state not in OBSTACLES:
                reward = -1  # Reward for moving to a valid state
            else:
                reward = -5  # Reward for moving to an invalid state

            # Update Q-value using the Bellman equation
            q_table[state][action] = (1 - LEARNING_RATE) * q_table[state][action] + \
                                     LEARNING_RATE * (reward + DISCOUNT_FACTOR * np.max(q_table[next_state]))

            state = next_state

            # Check if the agent reached the goal
            if state == END_POS:
                done = True

        # Reduce epsilon over time
        EPSILON = max(0.1, EPSILON * 0.99)

# Function to draw the agent on the grid
def draw_agent(pos):
    pygame.draw.rect(window, (0, 0, 255), (pos[0] * CELL_SIZE, pos[1] * CELL_SIZE, CELL_SIZE, CELL_SIZE))

# Game loop
def main():
    q_learning()
    current_pos = START_POS

    running = True
    while running:
        window.fill((0, 0, 0))

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

        draw_grid()
        draw_agent(current_pos)

        # Move the agent based on the learned Q-values
        action = np.argmax(q_table[current_pos])
        if action == 0:  # UP
            next_pos = (current_pos[0], max(0, current_pos[1] - 1))
        elif action == 1:  # DOWN
            next_pos = (current_pos[0], min(HEIGHT - 1, current_pos[1] + 1))
        elif action == 2:  # LEFT
            next_pos = (max(0, current_pos[0] - 1), current_pos[1])
        else:  # RIGHT
            next_pos = (min(WIDTH - 1, current_pos[0] + 1), current_pos[1])

        # Check for obstacles
        if next_pos not in OBSTACLES:
            current_pos = next_pos

        pygame.display.update()
        clock.tick(FPS)

if __name__ == "__main__":
    main()
    pygame.quit()
