In [1]:
import numpy as np

def create_maze():
    rows = int(input("Enter number of rows: "))
    cols = int(input("Enter number of columns: "))
    print("Enter maze row by row (e.g. 010):")
    maze = np.array([list(input().strip()) for _ in range(rows)], dtype=int)
    return maze

maze = create_maze()
rows, cols = maze.shape
num_states = rows * cols
goal_state = num_states - 1

# Q-Learning setup
q = np.zeros((num_states, 4))  # 4 actions: up, down, left, right
lr, gamma, epsilon = 0.7, 0.8, 0.2  # Hyperparameters

def train(episodes=200):
    for _ in range(episodes):
        state = 0
        while state != goal_state:
            if np.random.rand() < epsilon:
                action = np.random.randint(4)
            else:
                action = np.argmax(q[state])
            
            r, c = divmod(state, cols)
            new_r, new_c = r + (action == 1) - (action == 0), c + (action == 3) - (action == 2)
            
            if 0 <= new_r < rows and 0 <= new_c < cols and maze[new_r, new_c] == 0:
                new_state = new_r * cols + new_c
                reward = 10 if new_state == goal_state else -1  # step cost + goal
            else:
                new_state, reward = state, -5  # wall penalty
            
            q[state, action] += lr * (reward + gamma * np.max(q[new_state]) - q[state, action])
            state = new_state

def test():
    path, state = [0], 0
    for _ in range(50):  # safety cap on steps
        action = np.argmax(q[state])
        r, c = divmod(state, cols)
        r, c = r + (action == 1) - (action == 0), c + (action == 3) - (action == 2)
        if 0 <= r < rows and 0 <= c < cols and maze[r, c] == 0:
            state = r * cols + c
            path.append(state)
            if state == goal_state: break
        else:
            break
    print("Agent path:", path)
    print("Reached goal!" if state == goal_state else "Failed.")

train()  # Train the agent
test()   # Test the learned path


Enter number of rows:  3
Enter number of columns:  3


Enter maze row by row (e.g. 010):


 000
 100
 010


Agent path: [0, 1, 2, 5, 8]
Reached goal!
