<a href="https://colab.research.google.com/github/anumit-web/ML-Masterlist-2024/blob/main/Machine%20Learning/Reinforcement_Learning_Training_an_Agent_to_Play_a_Simple_Grid_Game.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning

In [None]:
import numpy as np
import random

# Gridworld dimensions
grid_size = 4
start = (0, 0)
goal = (2, 3)
obstacles = [(1, 1), (1, 2)]

# Initialize Q-table
q_table = np.zeros((grid_size, grid_size, 4))  # 4 actions: up, down, left, right

# Parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate
episodes = 1000

# Action mapping
actions = {
    0: (-1, 0),  # Up
    1: (1, 0),   # Down
    2: (0, -1),  # Left
    3: (0, 1)    # Right
}

def is_valid(state):
    """ Check if the state is within bounds and not an obstacle """
    x, y = state
    return 0 <= x < grid_size and 0 <= y < grid_size and state not in obstacles

def get_reward(state):
    """ Return the reward for the given state """
    if state == goal:
        return 10
    elif state in obstacles:
        return -10
    else:
        return -1

def choose_action(state):
    """ Choose action using epsilon-greedy policy """
    if random.uniform(0, 1) < epsilon:
        return random.choice(list(actions.keys()))
    else:
        return np.argmax(q_table[state[0], state[1]])

def update_q_table(state, action, reward, next_state):
    """ Update the Q-value using the Q-learning formula """
    best_next_action = np.argmax(q_table[next_state[0], next_state[1]])
    td_target = reward + gamma * q_table[next_state[0], next_state[1], best_next_action]
    q_table[state[0], state[1], action] += alpha * (td_target - q_table[state[0], state[1], action])

# Training the agent
for episode in range(episodes):
    state = start

    while state != goal:
        action = choose_action(state)
        next_state = (state[0] + actions[action][0], state[1] + actions[action][1])

        if not is_valid(next_state):
            next_state = state

        reward = get_reward(next_state)
        update_q_table(state, action, reward, next_state)
        state = next_state

# Display the learned Q-table
print("Learned Q-table:")
print(q_table)
