In [3]:
from Maze import Maze
from Agent import Agent
from MyQLearning import MyQLearning
from MyEGreedy import MyEGreedy

In [6]:
# Load the maze
file = "../data/easy_maze.txt"
maze = Maze(file)

# Set the reward at the bottom right to 10
maze.set_reward(maze.get_state(24, 14), 10)

# Create a robot at starting and reset location (0,0) (top left)
robot = Agent(0, 0)

# Parameters
alfa = 0.7
gamma = 0.9
epsilon = 0.1
max_episodes = 5000
max_steps = 2000
consecutive_episodes = 100
max_steps_threshold = 150  # Threshold for stopping criterion
rolling_avg_window = 200

# Initialize objects
selection = MyEGreedy()
learn = MyQLearning()

consecutive_episode_count = 0
final_episode_steps = 0
episode_lengths = []

for episode in range(max_episodes):
    state = robot.get_state(maze)
    steps = 0

    while steps < max_steps:
        action = selection.get_egreedy_action(robot, maze, learn, epsilon)
        state_next = robot.do_action(action, maze)
        r = maze.get_reward(state_next)
        possible_actions = maze.get_valid_actions(robot)

        learn.update_q(state, action, r, state_next, possible_actions, alfa, gamma)

        state = state_next
        steps += 1

        if state == maze.get_state(24, 14):  # reached goal
            robot.reset()
            break

    episode_lengths.append(steps)

    if episode >= rolling_avg_window:
        rolling_avg = sum(episode_lengths[-rolling_avg_window:]) / rolling_avg_window
        if rolling_avg <= max_steps_threshold:
            print(f"Stopping after {episode + 1} episodes")
            final_episode_steps = steps
            break

    if episode % 50 == 0:
        print(f"Episode {episode}: {steps} steps")

print("Optimal path:")
print(final_episode_steps)

Ready reading maze file ../data/easy_maze.txt
Made EGreedy
Episode 0: 1298 steps
Episode 50: 168 steps
Episode 100: 44 steps
Episode 150: 42 steps
Stopping after 201 episodes
Optimal path:
46


Ready reading maze file ../data/easy_maze.txt
Made EGreedy
Episode 0: 330 steps
Episode 50: 50 steps
Episode 100: 48 steps
Episode 150: 40 steps
Stopping after 201 episodes
Optimal path:
42
