In [13]:
import numpy as np

def create_maze():
    rows = int(input("Enter the number of rows: "))
    cols = int(input("Enter the number of columns: "))
    maze = np.zeros((rows, cols), dtype=int)
    print("Enter the maze layout (0 for path, 1 for wall):")
    for row in range(rows):
        while True:
            row_str = input(f"Row {row}: ").strip().replace(" ", "")
            if len(row_str) == cols and all(c in '01' for c in row_str):
                maze[row] = [int(cell) for cell in row_str]
                break
            else:
                print(f"Invalid input. Please enter exactly {cols} numbers (0 or 1).")
    return maze

class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_table = np.zeros((num_states, num_actions))

    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            return np.argmax(self.q_table[state])

    def learn(self, state, action, reward, next_state):
        predicted = self.q_table[state, action]
        # Q-learning update rule: Q(s,a) = Q(s,a) + alpha * [reward + gamma * max(Q(s',a')) - Q(s,a)]
        target = reward + self.discount_factor * np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (target - predicted)

# Create the maze and agent
print("Maze input example:")
print("5")
print("5")
print("00010")
print("01010")
print("01000")
print("01110")
print("00000")
maze = create_maze()
rows, cols = maze.shape
num_states = rows * cols
num_actions = 4

initial_state = 0
goal_state = num_states - 1
agent = QLearningAgent(num_states, num_actions)

def train_agent(agent, maze, num_episodes=5000):
    for episode in range(num_episodes):
        state = initial_state
        done = False
        while not done:
            action = agent.choose_action(state)

            row, col = state // maze.shape[1], state % maze.shape[1]
            next_row, next_col = row, col
            if action == 0:  # Up
                next_row -= 1
            elif action == 1:  # Down
                next_row += 1
            elif action == 2:  # Left
                next_col -= 1
            elif action == 3:  # Right
                next_col += 1
            
            next_state = next_row * maze.shape[1] + next_col
            
            is_valid_move = (0 <= next_row < rows and 0 <= next_col < cols and maze[next_row, next_col] == 0)
            
            if is_valid_move:
                if next_state == goal_state:
                    reward = 10
                    done = True
                else:
                    reward = -0.1
                
                agent.learn(state, action, reward, next_state)
                state = next_state
            else:
                reward = -10
                agent.learn(state, action, reward, state)

print("\nTraining the agent...")
train_agent(agent, maze, num_episodes=5000)
print("Training complete.")

def test_agent(agent, maze):
    original_exploration_prob = agent.exploration_prob
    agent.exploration_prob = 0
    
    state = initial_state
    path = [state]
    max_steps = 100
    step_count = 0
    
    print("\nTesting the agent's learned path:")
    while state != goal_state and step_count < max_steps:
        action = agent.choose_action(state)
        
        if action == 0:
            state = state - maze.shape[1]
        elif action == 1:
            state = state + maze.shape[1]
        elif action == 2:
            state = state - 1
        elif action == 3:
            state = state + 1
        
        path.append(state)
        step_count += 1
        
    if state == goal_state:
        print("\nAgent reached the goal!")
        print(f"Path taken: {path}")
    else:
        print("\nAgent failed to reach the goal within the maximum number of steps.")
    
    agent.exploration_prob = original_exploration_prob

test_agent(agent, maze)

Maze input example:
5
5
00010
01010
01000
01110
00000


Enter the number of rows:  5
Enter the number of columns:  5


Enter the maze layout (0 for path, 1 for wall):


Row 0:  00010
Row 1:  01010
Row 2:  01000
Row 3:  01110
Row 4:  00000



Training the agent...
Training complete.

Testing the agent's learned path:

Agent reached the goal!
Path taken: [0, 1, 2, 7, 12, 13, 14, 19, 24]
