In [6]:
import tkinter as tk
from tkinter import messagebox
import random
import numpy as np

# Maze size
MAZE_SIZE = 15  # Reduced size for faster training
# Define the maze lattice type
EMPTY = 0
WALL = 1
START = 2
END = 3
# Defining Direction
DIRECTIONS = [(0, 1), (1, 0), (0, -1), (-1, 0)]  # 右、下、左、上

# 定义奖励值
REWARD_GOAL = 100  # 到达终点的奖励
REWARD_WALL = -10  # 撞墙的惩罚
REWARD_STEP = -1   # 每次移动的代价


class QLearningAgent:
    def __init__(self, maze_size, actions, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.q_table = np.zeros((maze_size * maze_size, len(actions)))  # Q表初始化
        self.alpha = alpha  # 学习率
        self.gamma = gamma  # 折扣因子
        self.epsilon = epsilon  # 探索率
        self.actions = actions  # 动作空间

    def choose_action(self, state):
        """使用epsilon-greedy策略选择动作"""
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.actions)  # 随机探索
        else:
            return np.argmax(self.q_table[state])  # 利用已知最优动作

    def update_q_table(self, state, action, reward, next_state):
        """更新Q表"""
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.gamma * self.q_table[next_state][best_next_action]
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.alpha * td_error


class MazeGame:
    def __init__(self, root):
        self.root = root
        self.root.title("Maze Game")

        # Create a maze
        self.maze = self.generate_maze_dfs(MAZE_SIZE)
        self.player_pos = self.find_start()
        self.end_pos = self.find_end()

        # Create a Canvas
        self.cell_size = 40  # Reduce the size of each grid to fit a larger maze
        self.canvas = tk.Canvas(root, width=MAZE_SIZE * self.cell_size, height=MAZE_SIZE * self.cell_size)
        self.canvas.pack()

        # Drawing a maze
        self.draw_maze()

        # Create Button
        self.mode_frame = tk.Frame(root)
        self.mode_frame.pack()

        self.player_button = tk.Button(self.mode_frame, text="Player's Game", command=self.start_player_mode)
        self.player_button.pack(side=tk.LEFT)

        self.ai_button = tk.Button(self.mode_frame, text="AI AutoGame", command=self.start_ai_mode)
        self.ai_button.pack(side=tk.LEFT)

        # Binding Keyboard Events
        self.root.bind("<KeyPress>", self.key_press)

        # Game State
        self.game_mode = None
        self.score = 0  # 初始化分数
        self.score_label = tk.Label(root, text=f"Score: {self.score}")
        self.score_label.pack()

        # AI Evaluation
        self.ai_steps = 0  # AI完成任务的步数
        self.ai_total_reward = 0  # AI累积奖励
        self.ai_evaluation_label = tk.Label(root, text="AI Evaluation: Not Evaluated")
        self.ai_evaluation_label.pack()

    def generate_maze_dfs(self, size):
        """Maze generation using depth-first search (DFS)"""
        maze = [[WALL for _ in range(size)] for _ in range(size)]

        def dfs(x, y):
            maze[x][y] = EMPTY
            directions = DIRECTIONS[:]
            random.shuffle(directions)

            for dx, dy in directions:
                nx, ny = x + dx * 2, y + dy * 2
                if 0 <= nx < size and 0 <= ny < size and maze[nx][ny] == WALL:
                    maze[x + dx][y + dy] = EMPTY
                    dfs(nx, ny)

        # Generate a maze from the starting point
        dfs(1, 1)

        # Setting up the start and end points
        maze[1][1] = START
        maze[size - 2][size - 2] = END

        return maze

    def find_start(self):
        """Find the starting position."""
        for i in range(MAZE_SIZE):
            for j in range(MAZE_SIZE):
                if self.maze[i][j] == START:
                    return (i, j)
        return None

    def find_end(self):
        """Find the end position."""
        for i in range(MAZE_SIZE):
            for j in range(MAZE_SIZE):
                if self.maze[i][j] == END:
                    return (i, j)
        return None

    def draw_maze(self):
        """Drawing the Maze"""
        self.canvas.delete("all")
        for i in range(MAZE_SIZE):
            for j in range(MAZE_SIZE):
                x1, y1 = j * self.cell_size, i * self.cell_size
                x2, y2 = x1 + self.cell_size, y1 + self.cell_size
                if self.maze[i][j] == WALL:
                    self.canvas.create_rectangle(x1, y1, x2, y2, fill="black")
                elif self.maze[i][j] == START:
                    self.canvas.create_rectangle(x1, y1, x2, y2, fill="green")
                elif self.maze[i][j] == END:
                    self.canvas.create_rectangle(x1, y1, x2, y2, fill="red")
                else:
                    self.canvas.create_rectangle(x1, y1, x2, y2, fill="white")

        # Plotting Player Positions
        if self.player_pos:
            x, y = self.player_pos
            x1, y1 = y * self.cell_size, x * self.cell_size
            x2, y2 = x1 + self.cell_size, y1 + self.cell_size
            self.canvas.create_oval(x1 + 5, y1 + 5, x2 - 5, y2 - 5, fill="blue")

    def get_reward(self, new_pos):
        """根据新位置计算奖励"""
        if new_pos == self.end_pos:
            return REWARD_GOAL
        elif self.maze[new_pos[0]][new_pos[1]] == WALL:
            return REWARD_WALL
        else:
            return REWARD_STEP

    def update_score(self, score):
        """更新分数显示"""
        self.score += score
        self.score_label.config(text=f"Score: {self.score}")

    def evaluate_ai(self):
        """评估AI的表现"""
        evaluation = f"AI Steps: {self.ai_steps}, Total Reward: {self.ai_total_reward}"
        self.ai_evaluation_label.config(text=f"AI Evaluation: {evaluation}")

    def start_player_mode(self):
        """Starting Player Play Mode"""
        self.game_mode = "player"
        self.player_pos = self.find_start()
        self.score = 0
        self.update_score(0)
        self.draw_maze()
        messagebox.showinfo("Hint", "Player mode is activated! Use the arrow keys to move!")

    def start_ai_mode(self):
        """Start AI Auto Game Mode"""
        self.game_mode = "ai"
        self.player_pos = self.find_start()
        self.score = 0
        self.update_score(0)
        self.ai_steps = 0
        self.ai_total_reward = 0
        self.draw_maze()
        self.ai_train_q_learning()

    def key_press(self, event):
        """dealing with keyboard"""
        if self.game_mode != "player":
            return

        x, y = self.player_pos
        if event.keysym == "Up":
            new_x, new_y = x - 1, y
        elif event.keysym == "Down":
            new_x, new_y = x + 1, y
        elif event.keysym == "Left":
            new_x, new_y = x, y - 1
        elif event.keysym == "Right":
            new_x, new_y = x, y + 1
        else:
            return

        # Check if you can move
        if 0 <= new_x < MAZE_SIZE and 0 <= new_y < MAZE_SIZE and self.maze[new_x][new_y] != WALL:
            reward = self.get_reward((new_x, new_y))
            self.update_score(reward)
            self.player_pos = (new_x, new_y)
            self.draw_maze()

            # Check to see if you've reached the end of the line
            if self.player_pos == self.end_pos:
                messagebox.showinfo("Congratulations", "You managed to get out of the maze！")
                self.game_mode = None

    def ai_train_q_learning(self):
        """使用Q-learning训练AI"""
        agent = QLearningAgent(MAZE_SIZE, actions=[0, 1, 2, 3])  # 动作空间：上、下、左、右
        episodes = 1000  # 训练轮数
        for episode in range(episodes):
            state = self.find_start()
            done = False
            while not done:
                action = agent.choose_action(state[0] * MAZE_SIZE + state[1])
                dx, dy = DIRECTIONS[action]
                new_x, new_y = state[0] + dx, state[1] + dy
                if 0 <= new_x < MAZE_SIZE and 0 <= new_y < MAZE_SIZE and self.maze[new_x][new_y] != WALL:
                    next_state = (new_x, new_y)
                else:
                    next_state = state  # 如果撞墙，则保持原地
                reward = self.get_reward(next_state)
                agent.update_q_table(state[0] * MAZE_SIZE + state[1], action, reward, next_state[0] * MAZE_SIZE + next_state[1])
                state = next_state
                if state == self.end_pos:
                    done = True
        messagebox.showinfo("Training Complete", "Q-learning training completed!")
        self.ai_play(agent)

    def ai_play(self, agent):
        """AI使用训练好的Q表玩游戏"""
        state = self.find_start()
        self.player_pos = state
        self.draw_maze()
        self.root.update()
        self.root.after(500)

        while self.player_pos != self.end_pos:
            state_index = state[0] * MAZE_SIZE + state[1]
            action = np.argmax(agent.q_table[state_index])
            dx, dy = DIRECTIONS[action]
            new_x, new_y = state[0] + dx, state[1] + dy
            if 0 <= new_x < MAZE_SIZE and 0 <= new_y < MAZE_SIZE and self.maze[new_x][new_y] != WALL:
                next_state = (new_x, new_y)
            else:
                next_state = state  # 如果撞墙，则保持原地
            self.player_pos = next_state
            self.draw_maze()
            self.root.update()
            self.root.after(200)  # Interval per move X(ms)
            state = next_state
            self.ai_steps += 1
            reward = self.get_reward(next_state)
            self.ai_total_reward += reward

        self.evaluate_ai()
        messagebox.showinfo("Congratulations", "AI made it!")


if __name__ == "__main__":
    root = tk.Tk()
    game = MazeGame(root)
    root.mainloop()