# Active Reinforcement Learning

In [1]:
import random

# Define the environment
grid_size = 3
num_actions = 4
num_states = grid_size * grid_size
obstacles = [(1, 1)]

# Initialize Q-values
Q = {(x, y): [0] * num_actions for x in range(grid_size) for y in range(grid_size)}

# Function to visualize the grid
def visualize_grid(agent_state):
    for y in range(grid_size):
        for x in range(grid_size):
            if (x, y) == agent_state:
                print("A", end=" ")  # Represent the agent with 'A'
            elif (x, y) in obstacles:
                print("X", end=" ")  # Represent obstacles with 'X'
            else:
                print(".", end=" ")  # Empty space
        print()

# Active Reinforcement Learning with Output
def active_rl_with_output(max_iterations=100):
    state = (0, 0)

    def exploration_strategy(epsilon):
        return random.choice(range(num_actions)) if random.uniform(0, 1) < epsilon else max(range(num_actions), key=lambda a: Q[state][a])

    iteration = 0
    while state != (grid_size - 1, grid_size - 1) and iteration < max_iterations:
        epsilon = 0.1  # Exploration parameter
        action = exploration_strategy(epsilon)

        # Simulate environment transition
        next_x, next_y = state
        if action == 0:  # Up
            next_y = max(0, next_y - 1)
        elif action == 1:  # Down
            next_y = min(grid_size - 1, next_y + 1)
        elif action == 2:  # Left
            next_x = max(0, next_x - 1)
        elif action == 3:  # Right
            next_x = min(grid_size - 1, next_x + 1)

        # Check for obstacles
        if (next_x, next_y) not in obstacles:
            # Update Q-value (not implemented in this example)
            # Q[state][action] = ...
            state = (next_x, next_y)

        # Visualize the grid
        print(f"Iteration {iteration + 1}:")
        visualize_grid(state)

        # Print agent's state and selected action
        print(f"Selected action: {action}")

        iteration += 1

    if state == (grid_size - 1, grid_size - 1):
        print("Active RL reached the goal!")
    else:
        print("Active RL did not reach the goal within the specified number of iterations.")

# Run Active RL with Output with a maximum of 50 iterations
active_rl_with_output(max_iterations=2000)


Iteration 1:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 2:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 3:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 4:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 5:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 6:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 7:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 8:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 9:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 10:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 11:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 12:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 13:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 14:
. . . 
A X . 
. . . 
Selected action: 1
Iteration 15:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 16:
A . . 
. X . 
. . . 
Selected action: 2
Iteration 17:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 18:
A . . 
. X . 
. . . 
Selected action: 0
Iteration 19:
A . . 
. X . 
. . . 
Se