In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle
import cv2
import time

BLUE = [0, 0.4470, 0.7410]
RED = [0.8500, 0.3250, 0.0980]
YELLOW = [0.929, 0.6940, 0.1250]

## Load data
This code loads the data as a list 'trajectories' and also makes a data frame 'df' for further analysis.

In [19]:
run_name = 'fully-connected_03'

def load_data(run_name):
    # import pickle file from ./test_policy/test_trajectory.pkl
    with open('./test_policy/test_'+run_name+'/test_trajectory.pkl', 'rb') as f:
        trajectories = pickle.load(f)

    # calculate cumulative reward for each agent until each timestep as additional columns
    for trajectory in trajectories:
        for i in range(len(trajectory)):
            trajectory[i].extend([sum([row[14] for row in trajectory[:i+1]]), sum([row[15] for row in trajectory[:i+1]]), sum([row[16] for row in trajectory[:i+1]])])

    for trajectory in trajectories:
        for k in range(len(trajectory)):
            dists = []
            for i in range(3):
                dist = []
                for j in range(3):
                    dist.append(np.linalg.norm(np.array(trajectory[k][2+2*i:4+2*i]) - np.array(trajectory[k][8+2*j:10+2*j])))
                dists.append(min(dist))
            trajectory[k].extend(dists)

    # trajectories is a list of trajectories. Where each trajectory is a list of:
    # [timestep, agent1_x, agent1_y, agent2_x, agent2_y, agent3_x, agent3_y, landmark1_x, landmark1_y, landmark2_x, landmark2_y, landmark3_x, landmark3_y, reward_1, reward_2, reward_3]
    # convert trajectories to pd dataframe with columns: episode, timestep, agent1_x, agent1_y, agent2_x, agent2_y, agent3_x, agent3_y, reward_1, reward_2, reward_3
    # Flatten the nested list
    flattened_data = [tup for trajectory in trajectories for tup in trajectory]
    # Convert to DataFrame
    df = pd.DataFrame(flattened_data, columns=[ 'episode', 'timestep',
                                                'agent1_x', 'agent1_y',
                                                'agent2_x', 'agent2_y', 
                                                'agent3_x', 'agent3_y', 
                                                'land1_x', 'land1_y',
                                                'land2_x', 'land2_y', 
                                                'land3_x', 'land3_y', 
                                                'reward_1', 'reward_2', 'reward_3',
                                                'agent1_cum_reward', 'agent2_cum_reward', 'agent3_cum_reward',
                                                'agent1_dist', 'agent2_dist', 'agent3_dist'])
    return df, trajectories

df, trajectories = load_data(run_name)

## Mean Performance Analysis
This code runs analysis on all the test episodes wihin the loaded run

In [13]:
def plot_test_analysis(df, run_name):
    %matplotlib qt
    # Colors
    BLUE = [0, 0.4470, 0.7410]
    RED = [0.8500, 0.3250, 0.0980]
    YELLOW = [0.929, 0.6940, 0.1250]

    p = [BLUE, RED, YELLOW]
    sns.set_palette(p)


    fig, axs = plt.subplots(2,3, figsize=(15,8))

    fig.suptitle('Policy used: {}'.format(run_name), fontsize=16)


    sns.lineplot(ax=axs[0,0], x="timestep", y="agent1_dist", data=df, errorbar=("sd",1), sort=False, color=BLUE, label='agent 1')
    sns.lineplot(ax=axs[0,0], x="timestep", y="agent2_dist", data=df, errorbar=("sd",1), sort=False, color=RED, label='agent 2')
    sns.lineplot(ax=axs[0,0], x="timestep", y="agent3_dist", data=df, errorbar=("sd",1), sort=False, color=YELLOW, label='agent 3')
    axs[0,0].set_xlabel('timestep')
    axs[0,0].set_ylabel('distance to closest landmark')
    axs[0,0].grid()

    # plot the histogram of the mean of the distance to the closest landmark for each agent at the timestep 70
    # sns.violinplot(data=df, x="timestep", y="reward_1")
    melted_df = data=df[df['timestep']==70].melt(value_vars=['agent1_cum_reward', 'agent2_cum_reward', 'agent3_cum_reward'], var_name='agent_name', value_name='value')
    sns.histplot(ax=axs[0,1], data=melted_df, x='value', hue='agent_name', element='step', common_norm=False, bins=30)
    axs[0,1].set_xlabel('cumulative shared reward')
    axs[0,1].set_ylabel('number of episodes')
    axs[0,1].legend(['agent 1', 'agent 2', 'agent 3'])


    sns.lineplot(ax=axs[0,2], x="timestep", y="agent1_cum_reward", data=df, errorbar=("sd",1), sort=False, color=BLUE, label='agent 1')
    sns.lineplot(ax=axs[0,2], x="timestep", y="agent2_cum_reward", data=df, errorbar=("sd",1), sort=False, color=RED, label='agent 2')
    sns.lineplot(ax=axs[0,2], x="timestep", y="agent3_cum_reward", data=df, errorbar=("sd",1), sort=False, color=YELLOW, label='agent 3')
    axs[0,2].set_xlabel('timestep')
    axs[0,2].set_ylabel('cumulative shared reward')
    axs[0,2].grid()

    # plt.subplot(2,2,4)
    sns.lineplot(ax=axs[1,0], x="timestep", y="reward_1", data=df, errorbar=("sd",1), sort=False, color=BLUE, label='agent 1')
    sns.lineplot(ax=axs[1,0], x="timestep", y="reward_2", data=df, errorbar=("sd",1), sort=False, color=RED, label='agent 2')
    sns.lineplot(ax=axs[1,0], x="timestep", y="reward_3", data=df, errorbar=("sd",1), sort=False, color=YELLOW, label='agent 3')
    axs[1,0].set_xlabel('timestep')
    axs[1,0].set_ylabel('mean shared reward')
    axs[1,0].grid()


    # plot the histogram of the mean of the distance to the closest landmark for each agent at the timestep 70
    melted_df = data=df[df['timestep']==1].melt(value_vars=['agent1_dist', 'agent2_dist', 'agent3_dist'], var_name='agent_name', value_name='value')
    sns.histplot(ax=axs[1,1],data=melted_df, x='value', hue='agent_name', element='step', common_norm=False, bins=30)
    axs[1,1].set_xlabel('distance to closest landmark at the first timestep')
    axs[1,1].set_ylabel('number of episodes')
    axs[1,1].legend(['agent 1', 'agent 2', 'agent 3'])


    # plot the histogram of the mean of the distance to the closest landmark for each agent at the timestep 70
    melted_df = data=df[df['timestep']==70].melt(value_vars=['agent1_dist', 'agent2_dist', 'agent3_dist'], var_name='agent_name', value_name='value')
    sns.histplot(ax=axs[1,2], data=melted_df, x='value', hue='agent_name', element='step', common_norm=False, bins=30)
    axs[1,2].set_xlabel('distance to closest landmark at the last timestep')
    axs[1,2].set_ylabel('number of episodes')
    axs[1,2].legend(['agent 1', 'agent 2', 'agent 3'])

    fig.tight_layout()
    # fig.savefig(run_name+'_analysis.png', dpi=300)
    plt.show()

plot_test_analysis(df, run_name)

## Episodic Analysis
This code plots the details of a particular episode

In [10]:
%matplotlib qt

episode = 123

plt.figure(figsize=(15,4.5))

plt.subplot(1,3,1)
# plot the x,y trajectory of agents in episode 1 sorted by timestep using seaborn
sns.lineplot(x="agent1_x", y="agent1_y", data=df[df['episode']==episode], sort=False, color=BLUE)
sns.lineplot(x="agent2_x", y="agent2_y", data=df[df['episode']==episode], sort=False, color=RED)
sns.lineplot(x="agent3_x", y="agent3_y", data=df[df['episode']==episode], sort=False, color=YELLOW)
# mark the location of landmarks
sns.scatterplot(x="land1_x", y="land1_y", data=df[df['episode']==episode], color='gray', s=300)
sns.scatterplot(x="land2_x", y="land2_y", data=df[df['episode']==episode], color='gray', s=300)
sns.scatterplot(x="land3_x", y="land3_y", data=df[df['episode']==episode], color='gray', s=300)
# make axis equal and limit the axis to -1.0 to 1.0
plt.axis('square')
plt.xlim(-1.5,1.5)
plt.ylim(-1.5,1.5)
plt.xlabel('x')
plt.ylabel('y')
plt.show()

plt.subplot(1,3,2)
# plot the reward of agents in episode 1 sorted by timestep using seaborn
sns.lineplot(x="timestep", y="reward_1", data=df[df['episode']==episode], sort=False, color=BLUE, label='agent1')
sns.lineplot(x="timestep", y="reward_2", data=df[df['episode']==episode], sort=False, color=RED, label='agent2')
sns.lineplot(x="timestep", y="reward_3", data=df[df['episode']==episode], sort=False, color=YELLOW, label='agent3')
plt.xlabel('timestep')
plt.ylabel('shared reward')
plt.title('Episode {}'.format(episode))

plt.subplot(1,3,3)
# plot the distance to the closest landmark for each agent
sns.lineplot(x="timestep", y="agent1_dist", data=df[df['episode']==episode], sort=False, color=BLUE, label='agent1')
sns.lineplot(x="timestep", y="agent2_dist", data=df[df['episode']==episode], sort=False, color=RED, label='agent2')
sns.lineplot(x="timestep", y="agent3_dist", data=df[df['episode']==episode], sort=False, color=YELLOW, label='agent3')
plt.xlabel('timestep')
plt.ylabel('distance to closest landmark')
plt.show()

## Plot the reward function

In [16]:
def reward_function(d):
        return np.exp(-(d**2)/.1)

def plot_reward_function(df):

    # get the landmark locations for episode 1 timestep 1 from df
    landmark_locations = df[(df['timestep']==1) & (df['episode']==1)][['land1_x', 'land1_y', 'land2_x', 'land2_y', 'land3_x', 'land3_y']]
    x = np.linspace(-1, 1, 100)
    y = np.linspace(-1, 1, 100)
    X, Y = np.meshgrid(x, y)
    lanmark_locations = [(landmark_locations.land1_x.values[0], landmark_locations.land1_y.values[0]), 
                         (landmark_locations.land2_x.values[0], landmark_locations.land2_y.values[0]), 
                         (landmark_locations.land3_x.values[0], landmark_locations.land3_y.values[0])]

    # calculate the distance to the closest landmark for each point in the grid
    Z = np.zeros_like(X)
    for i in range(len(X)):
        for j in range(len(Y)):
            dists = []
            for k in range(3):
                dists.append(np.linalg.norm(np.array([X[i,j], Y[i,j]]) - np.array(lanmark_locations[k])))
            Z[i,j] = reward_function(min(dists))
    # plot the reward function as a 2D heatmap using sns
    fig = plt.figure(figsize=(6,6))
    ax = plt.axes()
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title('Reward Function')
    cs = ax.contourf(X, Y, Z, 50, cmap='cividis')
    cbar = fig.colorbar(cs)
    # limit the colorbar to 0 to 1 and make sure 0 and 1 are part of the ticks
    cbar.set_ticks([0, 0.25, 0.5, 0.75, 1])
    # plot contour lines with labels on top of the heatmap
    cs = ax.contour(X, Y, Z, [0.01,0.1,0.25,0.5,0.75,0.95], colors='white', linewidths=0.5)
    # add specific contour values
    ax.clabel(cs, inline=1, fontsize=8)
    ax.set_aspect('equal')
    plt.show()
plot_reward_function(df)

We can find the episode number of the wrost performing episodes and see the behavior

In [17]:
# find the episodes with the worst cumulative reward at the timestep 70
df[df['timestep']==70].sort_values(by='agent1_cum_reward', ascending=True).head(5)


Unnamed: 0,episode,timestep,agent1_x,agent1_y,agent2_x,agent2_y,agent3_x,agent3_y,land1_x,land1_y,...,land3_y,reward_1,reward_2,reward_3,agent1_cum_reward,agent2_cum_reward,agent3_cum_reward,agent1_dist,agent2_dist,agent3_dist
58379,834,70,3.71937,-2.282283,-0.22938,0.396079,0.719549,0.033549,0.5,0.0,...,-0.433013,1.395175e-68,1.395175e-68,1.395175e-68,2.300226e-10,2.300226e-10,2.300226e-10,3.946284,0.0423,0.222097
5319,76,70,4.470279,-1.84578,7.713436,-0.558476,-1.034868,-1.584986,0.5,0.0,...,-0.433013,9.4016e-320,9.4016e-320,9.4016e-320,6.666467e-10,6.666467e-10,6.666467e-10,4.378358,7.235023,1.393937
14209,203,70,4.072829,0.775028,6.683566,-1.780517,0.91588,-0.981435,0.5,0.0,...,-0.433013,1.554979e-243,1.554979e-243,1.554979e-243,7.240644e-10,7.240644e-10,7.240644e-10,3.655923,6.434806,1.065913
47529,679,70,4.398247,0.471654,6.204931,5.24414,0.940527,-0.545348,0.5,0.0,...,-0.433013,0.0,0.0,0.0,1.760432e-09,1.760432e-09,1.760432e-09,3.926676,7.749016,0.701048
5039,72,70,2.4487,-0.335249,1.154482,2.644794,0.475252,-0.664066,0.5,0.0,...,-0.433013,1.948412e-49,1.948412e-49,1.948412e-49,3.097751e-09,3.097751e-09,3.097751e-09,1.977328,2.620028,0.664527


## Behavior Reply Animation

Just adjust the episode number and observe the behavior

In [43]:
%matplotlib qt
import pygame

episode = 123
# Sample trajectory data
trajectory_ = trajectories[episode]
# drop the first element of the list for each list item in trajectory
trajectory_ = [t[1:] for t in trajectory_]


pygame.init()

# Display settings
WIDTH, HEIGHT = 640, 480
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Multi-Agent and Landmarks Animation")

# Colors
BLUE = np.array([0, 0.4470*255, 0.7410*255])
RED = np.array([0.8500*255, 0.3250*255, 0.0980*255])
YELLOW = np.array([0.929*255, 0.6940*255, 0.1250*255])

WHITE = (255, 255, 255)
AGENTS_COLORS = [BLUE, RED, YELLOW]
LANDMARK_COLORS = [(50, 50, 50), (50, 50, 50), (50, 50, 50)]
BLACK = (20, 20, 20)

frames = []
# shift and scale the data (-1,1) to fit the screen size (0,500) and (0,500)
SCALE_FACTOR = 200  # given our dimensions and trajectory range
SCREEN_CENTER = (WIDTH // 2, HEIGHT // 2)

# Choose a font (using a default system font here)
font = pygame.font.SysFont("arial", 16)

def map_to_screen(pos):
    """Map a trajectory position to a screen position."""
    return int(pos[0] * SCALE_FACTOR + SCREEN_CENTER[0]), int(pos[1] * SCALE_FACTOR + SCREEN_CENTER[1])


def draw_entity(screen, x, y, color, size=.2*SCALE_FACTOR):
    pygame.draw.circle(screen, color, (int(x), int(y)), size)



def display_text(text, x, y, color=BLACK):
    """Render and display text on the screen at specified coordinates."""
    text_surface = font.render(text, True, color)
    screen.blit(text_surface, (x, y))

def capture_frame(screen):
    """Capture the current Pygame screen frame."""
    frame = pygame.Surface(screen.get_size())
    frame.blit(screen, (0, 0))
    frames.append(frame)


def save_frames_to_video(frames, filename, fps=30):
    """Save captured frames to a video file."""
    height, width = frames[0].get_size()
    size = (width, height)
    out = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc(*'XVID'), fps, size)
    for frame in frames:
        # Convert Pygame surface to OpenCV format
        frame_rgb = pygame.surfarray.array3d(frame).transpose([1, 0, 2])
        frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
        out.write(frame_bgr)

    out.release()

def main():
    time.sleep(5)
    clock = pygame.time.Clock()
    running = True
    current_time = 0
    while running:
        screen.fill(WHITE)
        # Calculate the current positions of the agents and landmarks
        for i in range(len(trajectory_) - 1):

            cum_rew_1 = trajectory_[i][16]
            cum_rew_2 = trajectory_[i][17]
            cum_rew_3 = trajectory_[i][18]

            t0, *data0 = trajectory_[i]
            t1, *data1 = trajectory_[i + 1]

            if t0 <= current_time < t1:
                alpha = (current_time - t0) / (t1 - t0)

                # Drawing agents
                for j in range(3):  
                    x0, y0, x1, y1 = data0[j * 2], data0[j * 2 + 1], data1[j * 2], data1[j * 2 + 1]
                    # r0, r1 = data0[12 + j], data1[12 + j]
                    x = x0 * (1 - alpha) + x1 * alpha
                    y = y0 * (1 - alpha) + y1 * alpha
                    r = data0[12 + j]
                    screen_x, screen_y = map_to_screen((x, y))
                    draw_entity(screen, screen_x, screen_y, AGENTS_COLORS[j])
                    display_text(f"{r:.4f}", screen_x-25, screen_y-60)

                # Drawing landmarks
                for j in range(3):
                    x0, y0, x1, y1 = data0[6 + j * 2], data0[7 + j * 2], data1[6 + j * 2], data1[7 + j * 2]
                    x = x0 * (1 - alpha) + x1 * alpha
                    y = y0 * (1 - alpha) + y1 * alpha
                    screen_x, screen_y = map_to_screen((x, y))
                    draw_entity(screen, screen_x, screen_y, LANDMARK_COLORS[j], size=.05*SCALE_FACTOR)

                display_text(f"agent 1 cumulative reward: {cum_rew_1:.2f}", 370, 10)
                display_text(f"agent 2 cumulative reward: {cum_rew_2:.2f}", 370, 30)
                display_text(f"agent 2 cumulative reward: {cum_rew_3:.2f}", 370, 50)
                display_text(f"episode: {episode}",10, 20)
                display_text(f"timestep: {i}", 120, 20)

        pygame.display.flip()   
        capture_frame(screen)
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

        
        clock.tick(60)
        # Update current time
        current_time += .1

        # Exit loop when trajectory ends
        if current_time > trajectory[-1][0]:
            running = False

    pygame.quit()

if __name__ == "__main__":
    main()
    # save_frames_to_video(frames,'episode_{}.avi'.format(episode))


pygame 2.5.2 (SDL 2.28.3, Python 3.9.18)
Hello from the pygame community. https://www.pygame.org/contribute.html


: 