In [17]:
import pygame
import pickle
import random
from collections import deque
import numpy as np
import time

'''
# Reuse existing classes from training
class MazeEnv:
    def __init__(self, maze):
        # ... [Copy full MazeEnv class from Code2.ipynb]
        # Modify initialization for testing
        self.episode_count = 1000  # Disable fixed cup logic
'''

WALL = '#'
EMPTY = '.'
sample_maze = [
    "###############",
    "#.............#",
    "#.#.#####.#.#.#",
    "#.#.....#.#.#.#",
    "#.#..##.#.#.#.#",
    "#.#.....#.#.#.#",
    "#.#.###.#.#.#.#",
    "#.#.....#.#.#.#",
    "#.............#",
    "###############"
]

class MazeEnv:
    def __init__(self, maze):
        self.maze = maze
        self.width = len(maze[0])
        self.height = len(maze)
        self.walls = set()
        self.cup_pos = None
        self.harry_pos = None
        self.death_eater_pos = None
        self.visited_states = {}
        self.episode_count = 1000
        self.add_walls()

    def add_walls(self):
        for y, row in enumerate(self.maze):
            for x, char in enumerate(row):
                if char == WALL:
                    self.walls.add((x, y))

    def reset(self):
        self.visited_states = {}
        empty_cells = [(x, y) for y in range(self.height) for x in range(self.width)
                      if (x, y) not in self.walls]
        
        # Fixed cup position for first 500 episodes to help model learn:
        if self.episode_count < 500:
            if not hasattr(self, 'fixed_cup_pos'):
                center_x, center_y = self.width//2, self.height//2
                potential = [(x,y) for x,y in empty_cells 
                           if abs(x-center_x)+abs(y-center_y) < 5]
                self.fixed_cup_pos = random.choice(potential)
            self.cup_pos = self.fixed_cup_pos
        else:
            self.cup_pos = random.choice(empty_cells)
        
        empty_cells.remove(self.cup_pos)
        self.harry_pos = random.choice(empty_cells)
        empty_cells.remove(self.harry_pos)
        
        self.death_eater_pos = random.choice(empty_cells)
        
        return self.get_state()

    def get_state(self):
        return (self.harry_pos, self.death_eater_pos, self.cup_pos)

    def is_valid_pos(self, pos):
        x, y = pos
        return 0 <= x < self.width and 0 <= y < self.height and pos not in self.walls

    def move_agent(self, pos, action):
        x, y = pos
        if action == 0:  # Up
            new_pos = (x, y-1)
        elif action == 1:  # Down
            new_pos = (x, y+1)
        elif action == 2:  # Left
            new_pos = (x-1, y)
        elif action == 3:  # Right
            new_pos = (x+1, y)
        else:
            new_pos = pos
        return new_pos if self.is_valid_pos(new_pos) else pos

    def death_eater_move(self):
        start = self.death_eater_pos
        goal = self.harry_pos
        if start == goal:
            return start
        
        queue = deque([start])
        visited = {start: None}
        while queue:
            current = queue.popleft()
            if current == goal:
                break
            for action in range(4):
                neighbor = self.move_agent(current, action)
                if neighbor not in visited:
                    visited[neighbor] = current
                    queue.append(neighbor)
        3
        if goal not in visited:
            return self.death_eater_pos
        
        step = goal
        while visited[step] != start:
            step = visited[step]
        return step


def m_distance(pos1, pos2):
    return abs(pos1[0]-pos2[0]) + abs(pos1[1]-pos2[1])

class QLearningAgent:
    def __init__(self, env, q_table=None):
        self.env = env
        self.q_table = q_table or {}
        self.epsilon = 0.01  # Lower exploration for trained model

    def load_q_table(self, filename):
        """Load saved Q-table from file"""
        with open(filename, 'rb') as f:
            self.q_table = pickle.load(f)
    
    def get_q(self, state, action):
        hx, hy = state[0]
        dx, dy = state[1]
        cx, cy = state[2]
        
        rel_x = hx - cx
        rel_y = hy - cy
        de_x = hx - dx
        de_y = hy - dy
        
        cup_dir = (1 if rel_x>0 else (-1 if rel_x<0 else 0), 
                   1 if rel_y>0 else (-1 if rel_y<0 else 0))
        de_dir = (1 if de_x>0 else (-1 if de_x<0 else 0), 
                  1 if de_y>0 else (-1 if de_y<0 else 0))
        
        return self.q_table.get((hx, hy, de_dir[0], de_dir[1], cup_dir[0], cup_dir[1], action), 0.0)
    
    def choose_action(self, state):
        if np.random.rand() < 0.01:  # 1% random exploration
            return np.random.choice(4)
        if np.random.rand() < self.epsilon:
            return np.random.choice(4)
        else:
            q_values = [self.get_q(state, a) for a in range(4)]
            max_q = max(q_values)
            return np.random.choice([a for a, q in enumerate(q_values) if q == max_q])

def step(env, action):
    prev_pos = env.harry_pos
    env.harry_pos = env.move_agent(env.harry_pos, action)
    env.death_eater_pos = env.death_eater_move()

    caught = (env.harry_pos == env.death_eater_pos)
    success = (env.harry_pos == env.cup_pos)
    reward = get_reward(env, env.harry_pos, env.cup_pos, caught, prev_pos)

    return env.get_state(), reward, (caught or success)

def get_reward(env, harry_pos, cup_pos, caught, prev_harry_pos=None):
    if harry_pos == cup_pos:
        return 200
    elif caught:
        return -150
    
    reward = -1
    
    # Revisit penalty
    if harry_pos in env.visited_states:
        reward -= 0.5
    env.visited_states[harry_pos] = env.visited_states.get(harry_pos, 0) + 1
    
    # Proximity reward
    dist_to_cup = m_distance(harry_pos, cup_pos)
    max_dist = env.width + env.height
    reward += 2 * (1 - dist_to_cup / max_dist)
    
    # Distance from DE reward
    if prev_harry_pos:
        prev_dist = m_distance(prev_harry_pos, env.death_eater_pos)
        curr_dist = m_distance(harry_pos, env.death_eater_pos)
        if curr_dist > prev_dist:
            reward += 0.5
            
    return reward


CELL_SIZE = 40
MARGIN = 2
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
BLUE = (0, 0, 255)
GOLD = (255, 215, 0)
BROWN = (139, 69, 19)
GREEN = (0, 255, 0)
INFO_HEIGHT = 100

pygame.init()
pygame.font.init()

def render_maze(self, episode=0, total_reward=0):

    self.screen_width = self.width * (CELL_SIZE + MARGIN) + MARGIN
    self.screen_height = self.height * (CELL_SIZE + MARGIN) + MARGIN + INFO_HEIGHT
    self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
    pygame.display.set_caption("The Goblet of Fire - Triwizard Maze")
    self.font = pygame.font.SysFont('Arial', 16)
    
    self.screen.fill(BLACK)
    
    #banjar zameen
    for y in range(self.height):
        for x in range(self.width):
            rect = pygame.Rect(
                x * (CELL_SIZE + MARGIN) + MARGIN,
                y * (CELL_SIZE + MARGIN) + MARGIN,
                CELL_SIZE,
                CELL_SIZE
            )
            if (x, y) in self.walls:
                pygame.draw.rect(self.screen, BROWN, rect)
            else:
                pygame.draw.rect(self.screen, WHITE, rect)
    
    #Cup
    if self.cup_pos:
        x, y = self.cup_pos
        cup_rect = pygame.Rect(
            x * (CELL_SIZE + MARGIN) + MARGIN,
            y * (CELL_SIZE + MARGIN) + MARGIN,
            CELL_SIZE,
            CELL_SIZE
        )
        pygame.draw.rect(self.screen, GOLD, cup_rect)
    
    #Harry
    if self.harry_pos:
        x, y = self.harry_pos
        harry_rect = pygame.Rect(
            x * (CELL_SIZE + MARGIN) + MARGIN,
            y * (CELL_SIZE + MARGIN) + MARGIN,
            CELL_SIZE,
            CELL_SIZE
        )
        pygame.draw.rect(self.screen, BLUE, harry_rect)
        # Drawing (H)
        pygame.draw.line(self.screen, WHITE, 
            (x * (CELL_SIZE + MARGIN) + MARGIN + 10, y * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE//2),
            (x * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE - 10, y * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE//2), 2)
        pygame.draw.line(self.screen, WHITE,
            (x * (CELL_SIZE + MARGIN) + MARGIN + 10, y * (CELL_SIZE + MARGIN) + MARGIN + 10),
            (x * (CELL_SIZE + MARGIN) + MARGIN + 10, y * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE - 10), 2)
        pygame.draw.line(self.screen, WHITE,
            (x * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE - 10, y * (CELL_SIZE + MARGIN) + MARGIN + 10),
            (x * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE - 10, y * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE - 10), 2)
    
    #Death Eater
    if self.death_eater_pos:
        x, y = self.death_eater_pos
        de_rect = pygame.Rect(
            x * (CELL_SIZE + MARGIN) + MARGIN,
            y * (CELL_SIZE + MARGIN) + MARGIN,
            CELL_SIZE,
            CELL_SIZE
        )
        pygame.draw.rect(self.screen, RED, de_rect)
        center_x = x * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE//2
        center_y = y * (CELL_SIZE + MARGIN) + MARGIN + CELL_SIZE//2
        radius = CELL_SIZE//3
        pygame.draw.circle(self.screen, BLACK, (center_x, center_y), radius, 2)
    
    # neeche ka info panel
    info_surface = pygame.Surface((self.screen_width, INFO_HEIGHT))
    info_surface.fill((200, 200, 200))  # Light gray
    episode_text = self.font.render(f"Episode: {episode}", True, BLACK)
    reward_text = self.font.render(f"Total Reward: {total_reward}", True, BLACK)
    instruction_text = self.font.render("Press SPACE for next episode, ESC to quit", True, BLACK)
    controls_text = self.font.render("S: Step | R: Run episode | F: Toggle speed", True, BLACK)
    
    info_surface.blit(episode_text, (10, 10))
    info_surface.blit(reward_text, (10, 30))
    info_surface.blit(instruction_text, (10, 50))
    info_surface.blit(controls_text, (10, 70))
    
    self.screen.blit(info_surface, (0, self.height * (CELL_SIZE + MARGIN) + MARGIN))
    #Updating display-
    pygame.display.flip()

MazeEnv.render = render_maze


# Pygame Visualization Code (simplified)
def run_trained_visualization():
    env = MazeEnv(sample_maze)
    env.episode_count = 1000  # Force post-training behavior
    agent = QLearningAgent(env)
    agent.load_q_table('q_table.pkl')  # Load saved weights

    running = True
    episode = 0
    total_reward = 0
    state = env.reset()
    done = False
    auto_run = False
    step_mode = False
    delay = 0.2  #between steps
    
    env.render(episode, total_reward)
    
    clock = pygame.time.Clock()
    
    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    running = False
                elif event.key == pygame.K_SPACE:
                    # Reset for new episode
                    state = env.reset()
                    total_reward = 0
                    done = False
                    episode += 1
                    env.render(episode, total_reward)
                    auto_run = False
                elif event.key == pygame.K_s:
                    # Step mode - take one step
                    if not done:
                        action = agent.choose_action(state)
                        next_state, reward, done = step(env, action)
                        state = next_state
                        total_reward += reward
                        env.render(episode, total_reward)
                elif event.key == pygame.K_r:
                    # Toggle auto run mode
                    auto_run = not auto_run
                elif event.key == pygame.K_f:
                    # Toggle speed
                    if delay > 0.05:
                        delay /= 2
                    else:
                        delay = 0.2
        
        # If in auto run mode, take steps automatically
        if auto_run and not done:
            action = agent.choose_action(state)
            next_state, reward, done = step(env, action)
            state = next_state
            total_reward += reward
            
            # Render the current state
            env.render(episode, total_reward)
            
            if done:
                if reward == 100:
                    outcome = "Harry found the Cup!"
                else:
                    outcome = "Harry was caught by the Death Eater!"
                print(f"Episode {episode} ended: {outcome} | Total Reward: {total_reward}")
                auto_run = False
            
            # Add a delay to visualize movements
            time.sleep(delay)
        
        # Control the frame rate
        clock.tick(60)
    
    pygame.quit()


if __name__ == "__main__":
    run_trained_visualization()


Episode 1 ended: Harry was caught by the Death Eater! | Total Reward: 203.52
Episode 2 ended: Harry was caught by the Death Eater! | Total Reward: 211.51999999999998
Episode 3 ended: Harry was caught by the Death Eater! | Total Reward: 211.02
Episode 4 ended: Harry was caught by the Death Eater! | Total Reward: 216.14
