In [12]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = "browser"

# Load the trained Q-values
q_values = np.load('trained_q_values.npy')
print(q_values)

# Grid size
n = 5

# Initialize the grid
grid = np.zeros((n, n))

# Set package and target locations
package_location = (np.random.randint(n), np.random.randint(n))
while package_location == (n - 1, n - 1):
    package_location = (np.random.randint(n), np.random.randint(n))
target_location = (n-1, n-1)  # Bottom-right corner

# Initialize the agent's position randomly
agent_position = (np.random.randint(n), np.random.randint(n))

# Function to get the next location based on the chosen action
actions = ['up', 'right', 'down', 'left']  # Actions the agent can take
action_space = len(actions)

def get_next_location(agent_row, agent_col, action_index):
    new_row, new_col = agent_row, agent_col
    if actions[action_index] == 'up' and agent_row > 0:
        new_row -= 1
    elif actions[action_index] == 'right' and agent_col < n - 1:
        new_col += 1
    elif actions[action_index] == 'down' and agent_row < n - 1:
        new_row += 1
    elif actions[action_index] == 'left' and agent_col > 0:
        new_col -= 1
    return new_row, new_col

# Epsilon-greedy algorithm for choosing the next action
def get_next_action(agent_row, agent_col, package_row, package_col, carrying):
    return np.argmax(q_values[agent_row, agent_col, package_row, package_col, carrying])

# Visualization function
def visualize_movement(agent_positions, package_location, target_location):
    frames = []
    for i, pos in enumerate(agent_positions):

        # Is the agent carrying the package?
        carrying_package = (pos == package_location)

        # Create the grid in the animation
        grid_trace = go.Scatter(
            x=[0, n, n, 0, 0],
            y=[0, 0, n, n, 0],
            mode='lines',
            line=dict(color='black'),
            showlegend=False
        )
        
        # Create the agent (if not carrying) in the animation
        agent_trace = go.Scatter(
            x=[pos[1] + 0.5],
            y=[pos[0] + 0.5],
            mode='markers',
            marker=dict(size=15, color='red'),
            name='Agent'
        )

        # Create the package (if not picked up) in the animation
        if not carrying_package:
            package_trace = go.Scatter(
                x=[package_location[1] + 0.5],
                y=[package_location[0] + 0.5],
                mode='markers',
                marker=dict(size=12, color='blue', symbol='square'),
                name='Package'
            )
        else:
            package_trace = go.Scatter(x=[], y=[], mode='markers', name='Package (Picked Up)')

        # Create the target
        target_trace = go.Scatter(
            x=[target_location[1] + 0.5],
            y=[target_location[0] + 0.5],
            mode='markers',
            marker=dict(size=12, color='green', symbol='star'),
            name='Target'
        )
        
        frame = go.Frame(data=[grid_trace, agent_trace, package_trace, target_trace])
        frames.append(frame)

    return frames

# Function to simulate agent's movement (for demonstration, random movement)
# Simulate agent movement
def simulate_agent_movement(agent_position, package_location, target_location):
    path = [agent_position]
    current_position = agent_position

    # Move towards the package
    while current_position != package_location:
        best_action = get_next_action(current_position[0], current_position[1], package_location[0], package_location[1], False)
        current_position = get_next_location(current_position, best_action)
        path.append(current_position)
    
    # Move towards the target
    while current_position != target_location:
        best_action = get_next_action(current_position[0], current_position[1], package_location[0], package_location[1], False)
        current_position = get_next_location(current_position, best_action)
        path.append(current_position)
    
    return path

# Simulate agent movement
agent_positions = simulate_agent_movement(agent_position, package_location, target_location)

# Visualize the agent's movement
frames = visualize_movement(agent_positions, package_location, target_location)

# Create the initial layout
fig = go.Figure(
    data=[frames[0].data[0], frames[0].data[1], frames[0].data[2], frames[0].data[3]],
    layout=go.Layout(
        xaxis=dict(range=[-1, n+1], showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(range=[-1, n+1], showgrid=False, zeroline=False, showticklabels=False),
        title="Agent Movement Simulation",
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play", method="animate", args=[None, {"frame": {"duration":500, "redraw": True}, "fromcurrent": True}]),
                    dict(label="Pause", method="animate", args=[[None], {"frame": {"duration": 0, "redraw": False}, "mode": "immediate", "transition": {"duration": 0}}])]
        )]
    ),
    frames=frames
)

# Show the animation
fig.show()

Path to Package: [(0, 1), (1, 1), (2, 1), (3, 1), (3, 2)]
Path to Agent: [(3, 2), (4, 2), (4, 3), (4, 4)]
