In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle
import cv2
import time
import os
import sys
import gzip
import seaborn as sns

BLUE = [0, 0.4470, 0.7410]
RED = [0.8500, 0.3250, 0.0980]
YELLOW = [0.929, 0.6940, 0.1250]
agent_colors = [BLUE, RED, YELLOW]

## Load data

To load the data, directory 'saved_policy' must be in the same directory as this notebook.
2-agent results are evaluated for 3 number of train runs and 3-agent results are evaluated for 10 number of train runs.

In [75]:
# Constants
num_agents = 3
num_train_runs = 10

# the rewards are not saved in the trajectory to make the logs lighter. We need to calculate them here
# this is the reward function used in the training if you change the reward function in the training you need to change it here as well
def reward_function(d):
    return np.maximum(np.exp(-(d ** 2) / .1), 0.01)

def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)


if num_agents == 3:
    rsrn_types = ['WPM']
    agent_limitations = ['slow']
    networks = ['self-interested', 'fully-connected', 'authoritarian', 'collapsed authoritarian', 'tribal', 'collapsed tribal']
if num_agents == 2:
    rsrn_types = ['WSM', 'Minmax', 'WPM']
    agent_limitations = ['normal','slow', 'stuck']
    networks = ['fully-connected']

# Data loading and DataFrame creation
rows = []
for i in range(num_train_runs):
    print(f'Run {i+1}')
    for rsrn_type in rsrn_types:
        for network in networks:
            for agent_limitation in agent_limitations:
                path = f'./saved_policy/{num_agents}-agent_{rsrn_type}_{network}_{agent_limitation}_{i+1}/'
                with gzip.open(path + 'test_trajectory.pkl.gz', 'rb') as f:
                    run = pickle.load(f)
                    info = [rsrn_type, network, agent_limitation, i + 1]
                    for trajectory in run:
                        for row in trajectory:
                            rows.append(row + info)

# Now convert this into a pandas DataFrame
if num_agents == 2:
    df = pd.DataFrame(rows, columns=[   'episode', 
                                        'timestep', 
                                        'agent_1_x', 'agent_1_y', 
                                        'agent_2_x', 'agent_2_y', 
                                        'landmark_1_x', 'landmark_1_y', 
                                        'landmark_2_x', 'landmark_2_y', 
                                        'agent_1_reward', 
                                        'agent_2_reward',
                                        'rsrn_type',
                                        'network',
                                        'agent_limitation',
                                        'run_number'])

# Now convert this into a pandas DataFrame
if num_agents == 3:
    df = pd.DataFrame(rows, columns=[   'episode', 
                                        'timestep', 
                                        'agent_1_x', 'agent_1_y', 
                                        'agent_2_x', 'agent_2_y', 
                                        'agent_3_x', 'agent_3_y', 
                                        'landmark_1_x', 'landmark_1_y', 
                                        'landmark_2_x', 'landmark_2_y',
                                        'landmark_3_x', 'landmark_3_y',
                                        'agent_1_reward', 
                                        'agent_2_reward',
                                        'agent_3_reward',
                                        'rsrn_type',
                                        'network',
                                        'agent_limitation',
                                        'run_number'])


# df = pd.DataFrame(rows, columns=column_names)

# Replace 'Minmax' with 'MiniMax' in 'rsrn_type' column
df['rsrn_type'] = df['rsrn_type'].replace('Minmax', 'MiniMax')
del rows

# Calculating distances and rewards
for agent_index in range(1, num_agents + 1):
    print(f'Calculating distances and rewards for agent {agent_index}...')
    agent_x = df[f'agent_{agent_index}_x'].values
    agent_y = df[f'agent_{agent_index}_y'].values

    distances = np.array([euclidean_distance(agent_x, agent_y, df[f'landmark_{j}_x'].values, df[f'landmark_{j}_y'].values) for j in range(1, num_agents + 1)])
    closest_distances = distances.min(axis=0)

    df[f'agent_{agent_index}_closest_landmark_distance'] = closest_distances
    df[f'agent_{agent_index}_indv_reward'] = reward_function(closest_distances)

# Cumulative rewards
grouping_columns = ['rsrn_type', 'network', 'agent_limitation', 'run_number', 'episode']
for agent_index in range(1, num_agents + 1):
    df[f'agent_{agent_index}_cum_reward'] = df.groupby(grouping_columns)[f'agent_{agent_index}_reward'].cumsum()

print('Done!')

Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Calculating distances and rewards for agent 1...
Calculating distances and rewards for agent 2...
Calculating distances and rewards for agent 3...
Done!


## 3-Agent Behavioral Analysis (Different Networks - Slow - WPM)

In [47]:
%matplotlib qt



variable_of_interest = 'indv_reward' #'closest_landmark_distance', indv_reward



BLUE = [0, 0.4470, 0.7410]
RED = [0.8500, 0.3250, 0.0980]
YELLOW = [0.929, 0.6940, 0.1250]
p = [BLUE,RED, YELLOW]
sns.set_palette(p)
plt.close('all')
sns.set_style("whitegrid")
sns.set_context("paper")
def plot_episode(variable_of_interest, num_agents, rsrn_types, networks, agent_limitation='slow'):
    # sns.set_style("whitegrid")
    # sns.set_context("paper")
    # sns.set_palette("colorblind")
    sns.set(font_scale=1)
    sns.set_style("whitegrid")
    fig, axs = plt.subplots(3, 2, figsize=(6, 15))
    axs_ = axs.flat
    for j, network in enumerate(networks):
        if j == 0: letter = '(a) Self-interested'
        if j == 1: letter = '(b) Communitarian'
        if j == 2: letter = '(c) Authoritarian'
        if j == 3: letter = '(d) Collapsed Authoritarian'
        if j == 4: letter = '(e) Tribal'
        if j == 5: letter = '(f) Collapsed Tribal'

        for i, rsrn_type in enumerate(rsrn_types):
            # get ax as the jth element 
            ax = axs_[j]
            df_ep = df.loc[(df['rsrn_type'] == rsrn_type) & (df['network'] == network) & (df['agent_limitation'] == agent_limitation)]
            # df_ep = df.loc[df['network'] == network]
            # print(df_ep.head())
            for agent_index in range(1, num_agents+1):
                # sns.lineplot(data=df_ep, x='timestep', y=f'agent_{agent_index}_{variable_of_interest}', ax=axs[i,j], label=f'Agent {agent_index}',errorbar='sd')
                # if last agent append to its legend agent_limitation
                if agent_index == num_agents: label = f'Agent {agent_index} ({agent_limitation})'
                else: label = f'Agent {agent_index} (normal)'

                sns.lineplot(data=df_ep, x='timestep', 
                                y=f'agent_{agent_index}_{variable_of_interest}', 
                                ax=ax,
                                label=label,
                                errorbar=('pi',50), 
                                color=p[agent_index-1],
                                estimator=np.median)
                sns.lineplot(data=df_ep, x='timestep',
                             y=f'agent_{agent_index}_{variable_of_interest}',
                              ax=ax,
                              legend=False, errorbar=None, 
                              estimator=np.mean, 
                              linestyle='--', 
                              color=agent_colors[agent_index-1], 
                              alpha=0.3)
                
            ax.set_title(letter, fontsize=14, loc='center', pad=5)
            
            ax.set_xlim([0, 69])
            ax.set_xlabel('Timestep', color='gray')
            # ax.set_ylabel('Dist. to Closest Landmark', color='gray')
            ax.set_ylabel('Individual Reward', color='gray')

            if j == 0:
                ax.legend(loc='upper left', bbox_to_anchor=(.8, 1.6), title=f'WPM Scalarization', title_fontsize=12)
            else:
                ax.legend().remove()

            ax.set_ylim([0, 1.3])
            # set color of the ticks and tick labels to gray
            ax.tick_params(axis='x', colors='gray')
            ax.tick_params(axis='y', colors='gray')

            plt.xticks(color='gray')
            plt.yticks(color='gray')

    fig.subplots_adjust(hspace=0.375, wspace=0.37, bottom=0.1)
    plt.show()
plot_episode(   variable_of_interest,
                num_agents=3,
                rsrn_types=['WPM'],
                networks=[  'self-interested',
                            'fully-connected',
                            'authoritarian',
                            'collapsed authoritarian',
                            'tribal',
                            'collapsed tribal'],
                agent_limitation='slow')


## 3-Agent Training Logs

In [140]:
%matplotlib qt
num_agents = 3
num_train_runs = 10
rsrn_types = ['WPM']
agent_limitations = ['slow']
networks=[  
            'self-interested',
            'fully-connected',
            'authoritarian',
            'collapsed authoritarian',
            'tribal',
            'collapsed tribal'
            ]
num_episodes = 500000

BLUE = [0, 0.4470, 0.7410]
RED = [0.8500, 0.3250, 0.0980]
YELLOW = [0.929, 0.6940, 0.1250]
p = [BLUE,RED, YELLOW]
sns.set_palette(p)
sns.set_style("whitegrid")
sns.set_context("paper")
plt.close('all')
fig, axs = plt.subplots(len(networks), 3, figsize=(7, 12))

for n, network in enumerate(networks):
    cumulative_shared_reward = np.zeros((num_train_runs, num_episodes, num_agents))
    final_distance_to_landmark = np.zeros((num_train_runs, num_episodes, num_agents))
    cumulative_individual_reward = np.zeros((num_train_runs, num_episodes, num_agents))

    for i in range(num_train_runs):
        for rsrn_type in rsrn_types:
                for agent_limitation in agent_limitations:
                    path = ('./saved_policy/' + str(num_agents) + '-agent_' + rsrn_type + '_' + network + '_' + agent_limitation + '_' + str(i+1) + '/')
                    with gzip.open(path+'train_log.pkl.gz', 'rb') as f:
                        run = pickle.load(f)
                        cumulative_individual_reward[i,:,:] = run['cum_individual_rewards']
                        final_distance_to_landmark[i,:,:] = run['final_dis2landmark']
                        cumulative_shared_reward[i,:,:] = run['cum_shared_rewards']


    cumulative_individual_reward = cumulative_individual_reward.mean(axis=0)
    final_distance_to_landmark = final_distance_to_landmark.mean(axis=0)
    cumulative_shared_reward = cumulative_shared_reward.mean(axis=0)

    cumulative_individual_reward_std = np.std(cumulative_individual_reward, axis=0)
    final_distance_to_landmark_std = np.std(final_distance_to_landmark, axis=0)
    cumulative_shared_reward_std = np.std(cumulative_shared_reward, axis=0)

    chunk_length = 1000

    cumulative_individual_reward_chunks = np.zeros((500, num_agents))
    final_distance_to_landmark_chunks = np.zeros((500, num_agents))
    cumulative_shared_reward_chunks = np.zeros((500, num_agents))

    cumulative_individual_reward_chunks_std = np.zeros((500, num_agents))
    final_distance_to_landmark_chunks_std = np.zeros((500, num_agents))
    cumulative_shared_reward_chunks_std = np.zeros((500, num_agents))

    for i in range(500):
        cumulative_individual_reward_chunks[i,:] = np.mean(cumulative_individual_reward[i*chunk_length:(i+1)*chunk_length,:], axis=0)
        cumulative_shared_reward_chunks[i,:] = np.mean(cumulative_shared_reward[i*chunk_length:(i+1)*chunk_length,:], axis=0)
        final_distance_to_landmark_chunks[i,:] = np.mean(final_distance_to_landmark[i*chunk_length:(i+1)*chunk_length,:], axis=0)
        
        cumulative_individual_reward_chunks_std[i,:] = np.std(cumulative_individual_reward[i*chunk_length:(i+1)*chunk_length,:], axis=0)
        cumulative_shared_reward_chunks_std[i,:] = np.std(cumulative_shared_reward[i*chunk_length:(i+1)*chunk_length,:], axis=0)
        final_distance_to_landmark_chunks_std[i,:] = np.std(final_distance_to_landmark[i*chunk_length:(i+1)*chunk_length,:], axis=0)
        
    for i in range(num_agents):
        # axs[0].scatter(range(num_episodes), cumulative_individual_reward[:,i], s=1, alpha=0.005, color=p[i])
        # axs[1].scatter(range(num_episodes), cumulative_shared_reward[:,i], s=1, alpha=0.005, color=p[i])
        # axs[0].scatter(range(num_episodes), cumulative_individual_reward[:,i], lw=1, alpha=0.005, color=p[i])

        axs[n,0].plot(range(500), cumulative_individual_reward_chunks[:,i], lw=1, alpha=0.8, color=p[i])
        axs[n,1].plot(range(500), cumulative_shared_reward_chunks[:,i], lw=1, alpha=0.8, color=p[i])
        axs[n,2].plot(range(500), final_distance_to_landmark_chunks[:,i], lw=1, alpha=0.8, color=p[i])

        axs[n,0].fill_between(range(500),   cumulative_individual_reward_chunks[:,i] - cumulative_individual_reward_chunks_std[:,i],
                                            cumulative_individual_reward_chunks[:,i] + cumulative_individual_reward_chunks_std[:,i],
                                            color=p[i], alpha=0.2, linewidth=0.0)
        axs[n,1].fill_between(range(500),   cumulative_shared_reward_chunks[:,i] - cumulative_shared_reward_chunks_std[:,i],
                                            cumulative_shared_reward_chunks[:,i] + cumulative_shared_reward_chunks_std[:,i],
                                            color=p[i], alpha=0.2, linewidth=0.0)                                      
        axs[n,2].fill_between(range(500),   final_distance_to_landmark_chunks[:,i] - final_distance_to_landmark_chunks_std[:,i],
                                            final_distance_to_landmark_chunks[:,i] + final_distance_to_landmark_chunks_std[:,i], 
                                            color=p[i], alpha=0.2, linewidth=0.0)
    label_font_size = 8
    axs[n,0].set_ylabel('Individual Reward', fontsize=label_font_size)
    axs[n,0].set_xlabel('Episode (x1000)', fontsize=label_font_size)
    axs[n,0].axhline(y=70, color='r', linestyle='--')
    axs[n,0].set_xlim([0, 500])

    axs[n,1].set_ylabel('Relational Reward', fontsize=label_font_size)
    axs[n,1].set_xlabel('Episode (x1000)', fontsize=label_font_size)
    axs[n,1].axhline(y=70, color='r', linestyle='--')
    axs[n,1].set_xlim([0, 500])

    axs[n,2].set_ylabel('Dist. to Landmark ', fontsize=label_font_size)
    axs[n,2].set_xlabel('Episode (x1000)', fontsize=label_font_size)
    axs[n,2].set_ylim([0, 1.3])
    axs[n,2].set_xlim([0, 500])

fs = 11
axs[0,1].set_title('(a) Survivalist (or Self-interested)', fontsize=fs,loc='center')
axs[1,1].set_title('(b) Communitarian (or Fully-connected)', fontsize=fs,loc='center')
axs[2,1].set_title('(c) Authoritarian', fontsize=fs,loc='center')
axs[3,1].set_title('(d) Collapsed Authoritarian', fontsize=fs,loc='center')
axs[4,1].set_title('(e) Tribal', fontsize=fs,loc='center')
axs[5,1].set_title('(f) Collapsed Tribal', fontsize=fs,loc='center')

# set label font size to 8
for ax in axs.flat:
    ax.tick_params(axis='both', which='major', labelsize=8)
    ax.tick_params(axis='both', which='minor', labelsize=8)

fig.tight_layout()
plt.show()

## 2-Agent Behavioral Analysis (Different Scalarization - Fully-connected)

In [52]:

variable_of_interest = 'closest_landmark_distance' #'closest_landmark_distance', indv_reward


sns.set_style("whitegrid")
sns.set_context("paper")
sns.set_palette("colorblind")
plt.close('all')
def plot_episode(variable_of_interestm, num_agents, rsrn_types, agent_limitations):
    sns.set(font_scale=1)
    sns.set_style("whitegrid")
    fig, axs = plt.subplots(len(agent_limitations), len(rsrn_types), figsize=(9, 9))
    
    for j, agent_limitation in enumerate(agent_limitations):
        for i, rsrn_type in enumerate(rsrn_types):
            df_ep = df.loc[(df['rsrn_type'] == rsrn_type) & (df['network'] == network) & (df['agent_limitation'] == agent_limitation)]
            for agent_index in range(1, num_agents+1):


                if agent_index == num_agents: label = f'Agent {agent_index} ({agent_limitation})'
                else: label = f'Agent {agent_index} (normal)'

                sns.lineplot(data=df_ep, x='timestep', 
                                y=f'agent_{agent_index}_{variable_of_interest}', 
                                ax=axs[i,j],
                                label=label,
                                errorbar=('pi',50), 
                                color=p[agent_index-1],
                                estimator=np.median)
                sns.lineplot(data=df_ep, x='timestep',
                                y=f'agent_{agent_index}_{variable_of_interest}',
                                ax=axs[i,j],
                                legend=False, errorbar=None, 
                                estimator=np.mean, 
                                linestyle='--', 
                                color=agent_colors[agent_index-1], 
                                alpha=0.3)


            axs[i,j].set_title(f'{rsrn_type}', fontsize=14, loc='center', pad=-20)
            
            axs[i,j].set_xlabel('Timestep', color='gray')
            if variable_of_interest == 'closest_landmark_distance': axs[i,j].set_ylabel('Dist. to Closest Landmark', color='gray')
            if variable_of_interest == 'indv_reward': axs[i,j].set_ylabel('Individual Reward', color='gray')

            axs[i,j].tick_params(axis='x', colors='gray')
            axs[i,j].tick_params(axis='y', colors='gray')

            if i == 0:
                axs[i,j].legend(loc='upper left', bbox_to_anchor=(0.1, 1.6), title=f'Case {j+1}', title_fontsize=12)
            else:
                axs[i,j].legend().remove()
            axs[i,j].set_xlim([0, 69])
            
    # make all ylims the same as the first plot times 1.2
    ylim = axs[0,0].get_ylim()[1]*1.3

    for i, agent_limitation in enumerate(agent_limitations):
        for j, rsrn_type in enumerate(rsrn_types):
            axs[i,j].set_ylim([0, ylim])
    # make extra room on the right side for legend
    fig.subplots_adjust(top=0.86,
                        bottom=0.064,
                        left=0.081,
                        right=0.98,
                        hspace=0.285,
                        wspace=0.421)
    plt.tight_layout()
    plt.show()

plot_episode(variable_of_interest, num_agents=2, rsrn_types=['WSM', 'MiniMax', 'WPM'], agent_limitations=['normal', 'slow', 'stuck'])


## 3-Agent Heatmap of First/Last Timesteps

In [77]:
# Function to create a normalized heatmap for a given agent's locations at a specific timestep
def plot_normalized_heatmap_at_timestep(df, ax, agent_number, timestep_of_interest, setting_filters):
    # Construct column names based on the agent number
    agent_x_col = f'agent_{agent_number}_x'
    agent_y_col = f'agent_{agent_number}_y'
    bins=(50, 50)
    # Apply setting filters to the DataFrame
    df_filtered = df
    for column, value in setting_filters.items():
        df_filtered = df_filtered[df_filtered[column] == value]
    
    # Filter for the specific timestep of interest
    df_filtered_at_timestep = df_filtered[df_filtered['timestep'] == timestep_of_interest]

    # Calculate the total number of episodes
    total_episodes = df_filtered_at_timestep['episode'].nunique()*10 # 10 is for number of runs

    # Number of bins for x and y
    x_num_bins = bins[0]
    y_num_bins = bins[1]

    # Create an array of bin edges from -1.2 to 1.2
    x_bin_edges = np.linspace(-1.2, 1.2, x_num_bins + 1)
    y_bin_edges = np.linspace(-1.2, 1.2, y_num_bins + 1)

    # Bin the x and y positions
    x_bins = pd.cut(df_filtered_at_timestep[agent_x_col], bins=x_bin_edges, labels=range(bins[0]))
    y_bins = pd.cut(df_filtered_at_timestep[agent_y_col], bins=y_bin_edges, labels=range(bins[1]))

    # Create a DataFrame with binned x and y positions
    binned_positions = pd.DataFrame({agent_x_col: x_bins, agent_y_col: y_bins})

    # Create a full grid of all possible bin combinations
    all_bins = pd.DataFrame(
        [(x, y) for x in range(bins[0]) for y in range(bins[1])],
        columns=[agent_x_col, agent_y_col]
    )

    # Group and count the binned data
    grouped_data = binned_positions.groupby([agent_x_col, agent_y_col]).size().reset_index(name='count')

    # Merge with the full grid, filling missing values with 0
    heatmap_data = pd.merge(all_bins, grouped_data, how='left', on=[agent_x_col, agent_y_col]).fillna(0)

    # Pivot the data for heatmap
    heatmap_data_pivot = heatmap_data.pivot(agent_x_col, agent_y_col, 'count').T

    # Normalize the heatmap values by the total number of episodes to get the percentage
    heatmap_data_normalized = (heatmap_data_pivot / total_episodes) * 100

    ax = sns.heatmap(heatmap_data_normalized, cmap="turbo", ax=ax, vmin=0, vmax=1)  # Transpose and annotate with the percentages
    # adjust ticks represnt actual agent position values between -1.2 and 1.2 instead of bin numbers
    num_ticks = 7
    x_ticks = np.linspace(0, bins[0]-1, num_ticks)
    y_ticks = np.linspace(0, bins[1]-1, num_ticks)
    x_ticklabels = np.linspace(-1.2, 1.2, num_ticks)
    y_ticklabels = np.linspace(-1.2, 1.2, num_ticks)
    # set ticks and limit decimal to .0f
    ax.set_xticks(x_ticks)
    ax.set_yticks(y_ticks)
    ax.set_xticklabels(x_ticklabels.round(2))
    ax.set_yticklabels(y_ticklabels.round(2))

    # label the colorbar
    color_bar = ax.collections[0].colorbar
    color_bar.set_label('Percentage of Presence (%)', color='gray')
    # set color of the colorbar ticks and tick labels to gray
    color_bar.ax.tick_params(axis='y', colors='gray')
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_title(f'Agent {agent_number} at Timestep {timestep_of_interest}')
    # set color of the ticks and tick labels to gray
    ax.tick_params(axis='x', colors='gray')
    ax.tick_params(axis='y', colors='gray')
    plt.xticks(color='gray')
    plt.yticks(color='gray')
    ax.axis('equal')
    return ax

plt.close('all')
sns.set_style("white")
fig, axs = plt.subplots(3, 2, figsize=(8, 12))
specific_setting = {
    'rsrn_type': 'WPM',
    'network': 'fully-connected',
    'agent_limitation': 'slow'
}
for i in range(3):
        plot_normalized_heatmap_at_timestep(df, ax=axs[i,0], agent_number=i+1, timestep_of_interest=0, setting_filters=specific_setting)
        plot_normalized_heatmap_at_timestep(df, ax=axs[i,1], agent_number=i+1, timestep_of_interest=69, setting_filters=specific_setting)

# save figure
plt.tight_layout()
plt.savefig('3-agent_heatmap_v2.png', dpi=300)
plt.savefig('3-agent_heatmap_v2.svg')
plt.savefig('3-agent_heatmap_v2.pdf')
plt.show()

(50, 50)
(50, 50)
(50, 50)
(50, 50)
(50, 50)
(50, 50)


## 2-Agent Histogram of First/Last Timesteps

In [56]:
BLUE = [0, 0.4470, 0.7410]
RED = [0.8500, 0.3250, 0.0980]
YELLOW = [0.929, 0.6940, 0.1250]

p = [RED, BLUE, YELLOW]
sns.set_palette(p)

plt.close('all')

agent_limitations = ['normal', 'slow', 'stuck']
rsrn_types = ['WSM', 'MiniMax', 'WPM']

# for j, agent_limitation in [(0, 'normal')]:
for j, agent_limitation in enumerate(agent_limitations):
        fig, axs = plt.subplots(3,2, figsize=(8,10))
        # fig.suptitle('Agent 2 limitation: {}'.format(agent_limitation), fontsize=16)
        # fig, axs = plt.subplots(num_agents, 2, figsize=(8, 5))
        for p, rsrn_type in enumerate(rsrn_types):
                
                df_ = df[(df['rsrn_type'] == rsrn_type) &
                                (df['agent_limitation'] == agent_limitation)&
                                (df['timestep'] == 0)
                                ]
                
                # plot the histogram of the mean of the distance to the closest landmark for each agent at the timestep 70

                melted_df = data=df_.melt(value_vars=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'], var_name='agent_name', value_name='value')
                # sort melted_df by agent_name
                melted_df = melted_df.sort_values(by=['agent_name'])
                sns.histplot(ax=axs[p,0],data=melted_df, x='value', hue='agent_name', element='step', common_norm=False, bins=25, stat='percent',legend=False, binrange=(0, 1.0), hue_order=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'])
                
                axs[p,0].set_xlim([0, 1.0])
                axs[p,0].set_ylim([0, 100.1])
                # axs[p,0].set_ylabel('number of episodes')
                # axs[p,0].legend(['agent 1', 'agent 2'])

                df_ = df[(df['rsrn_type'] == rsrn_type) &
                        (df['agent_limitation'] == agent_limitation)&
                        (df['timestep'] == 69)
                        ]
                # plot the histogram of the mean of the distance to the closest landmark for each agent at the timestep 70
                melted_df = data=df_.melt(value_vars=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'], var_name='agent_name', value_name='value')
                melted_df = melted_df.sort_values(by=['agent_name'])
                sns.histplot(ax=axs[p,1], data=melted_df, x='value', hue='agent_name', element='step', common_norm=False, bins=25, stat='percent',legend=False, binrange=(0, 1.0), hue_order=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'])
                axs[p,1].set_xlim([0, 1.0])
                axs[p,1].set_ylim([0, 100.1])
                # make subplot titles
                axs[p,0].set_title(rsrn_type)
                axs[p,1].set_title(rsrn_type)
                axs[p,1].set_xlabel('')
                axs[p,0].set_xlabel('')
                axs[p,1].set_ylabel('Percent', color='gray')
                axs[p,0].set_ylabel('Percent', color='gray')
                axs[p,0].tick_params(axis='x', colors='gray')
                axs[p,0].tick_params(axis='y', colors='gray')
                axs[p,1].tick_params(axis='x', colors='gray')
                axs[p,1].tick_params(axis='y', colors='gray')

        axs[2,1].set_xlabel('Closest landmark distance \n (at the last timestep)', color='gray')
        axs[2,0].set_xlabel('Closest landmark distance \n (at the first timestep)', color='gray')

        # place legend outside of the plot
        axs[0,0].legend(['agent 1 (normal)', f'agent 2 ({agent_limitation})'], loc='upper left', bbox_to_anchor=(0.8, 1.5), title=f'Case {j+1}', title_fontsize=12,)
        # incrase space at the bottom
        fig.subplots_adjust(bottom=0.1, wspace=0.3, hspace=0.3)
        # fig.tight_layout()
        fig.savefig(f'hist_{agent_limitation}.svg', dpi=300)
# plt.show()

## 2-Agent Histogram of First/Last Timestep (fully connected)

In [141]:
BLUE = [0, 0.4470, 0.7410]
RED = [0.8500, 0.3250, 0.0980]
YELLOW = [0.929, 0.6940, 0.1250]

p = [RED, BLUE, YELLOW]
sns.set_palette(p)


agent_limitations = ['normal', 'slow', 'stuck']
rsrn_types = ['WSM', 'MiniMax', 'WPM']

# for j, agent_limitation in [(0, 'normal')]:
for j, agent_limitation in enumerate(agent_limitations):
        fig, axs = plt.subplots(3,2, figsize=(8,10))
        # fig.suptitle('Agent 2 limitation: {}'.format(agent_limitation), fontsize=16)
        # fig, axs = plt.subplots(num_agents, 2, figsize=(8, 5))
        for p, rsrn_type in enumerate(rsrn_types):
                
                df_ = df[(df['rsrn_type'] == rsrn_type) &
                                (df['agent_limitation'] == agent_limitation)&
                                (df['timestep'] == 0)
                                ]
                
                # plot the histogram of the mean of the distance to the closest landmark for each agent at the timestep 70

                melted_df = data=df_.melt(value_vars=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'], var_name='agent_name', value_name='value')
                # sort melted_df by agent_name
                melted_df = melted_df.sort_values(by=['agent_name'])
                sns.histplot(ax=axs[p,0],data=melted_df, x='value', hue='agent_name', element='step', common_norm=False, bins=25, stat='percent',legend=False, binrange=(0, 1.0), hue_order=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'])
                
                axs[p,0].set_xlim([0, 1.0])
                axs[p,0].set_ylim([0, 100.1])
                # axs[p,0].set_ylabel('number of episodes')
                # axs[p,0].legend(['agent 1', 'agent 2'])

                df_ = df[(df['rsrn_type'] == rsrn_type) &
                        (df['agent_limitation'] == agent_limitation)&
                        (df['timestep'] == 69)
                        ]
                # plot the histogram of the mean of the distance to the closest landmark for each agent at the timestep 70
                melted_df = data=df_.melt(value_vars=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'], var_name='agent_name', value_name='value')
                melted_df = melted_df.sort_values(by=['agent_name'])
                sns.histplot(ax=axs[p,1], data=melted_df, x='value', hue='agent_name', element='step', common_norm=False, bins=25, stat='percent',legend=False, binrange=(0, 1.0), hue_order=['agent_2_closest_landmark_distance', 'agent_1_closest_landmark_distance'])
                axs[p,1].set_xlim([0, 1.0])
                axs[p,1].set_ylim([0, 100.1])
                # make subplot titles
                axs[p,0].set_title(rsrn_type)
                axs[p,1].set_title(rsrn_type)
                axs[p,1].set_xlabel('')
                axs[p,0].set_xlabel('')
                # axs[p,0].legend(['agent 1 (normal)', f'agent 2 ({agent_limitation})'])
                # axs[p,1].legend(['agent 1 (normal)', f'agent 2 ({agent_limitation})'])
        axs[2,1].set_xlabel('Closest landmark distance \n (at the last timestep)')
        axs[2,0].set_xlabel('Closest landmark distance \n (at the first timestep)')
        # place legend outside of the plot
        axs[0,0].legend(['agent 1 (normal)', f'agent 2 ({agent_limitation})'], loc='upper left', bbox_to_anchor=(0.8, 1.5), title=f'Case {j+1}', title_fontsize=12,)
        # incrase space at the bottom
        fig.subplots_adjust(bottom=0.1, wspace=0.3, hspace=0.3)
        # fig.tight_layout()
        fig.savefig(f'hist_{agent_limitation}.png', dpi=300)
# plt.show()


## Reward Function Shape

In [174]:
def reward_function(d):
        return np.maximum(np.exp(-(d**2)/.1),0.01)

def plot_reward_function(df, ax, num_agents=2):

    # get the landmark locations for episode 1 timestep 1 from df
    x = np.linspace(-1.2, 1.2, 1000)
    y = np.linspace(-1.2, 1.2, 1000)
    X, Y = np.meshgrid(x, y)

    # calculate landamrk locations as n landmakrs distributed evenly in a circle with radius 0.5
    landmark_locations = []
    for i in range(num_agents):
        landmark_locations.append([np.cos(2*np.pi/num_agents*i), np.sin(2*np.pi/num_agents*i)])
    landmark_locations = 0.5 * np.array(landmark_locations)

    # calculate the distance to the closest landmark for each point in the grid
    Z = np.zeros_like(X)
    for i in range(len(X)):
        for j in range(len(Y)):
            dists = []
            for k in range(num_agents):
                dists.append(np.linalg.norm(np.array([X[i,j], Y[i,j]]) - np.array(landmark_locations[k])))
            Z[i,j] = reward_function(min(dists))
    # plot the reward function as a 2D heatmap using sns
    
    # ax = plt.axes()
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    if num_agents == 2:
        case = '(a)'
    elif num_agents == 3:
        case = '(b)'
    ax.set_title(f'{case} {num_agents}-Agent-{num_agents}-Landmark Environment')
    cs = ax.contourf(X, Y, Z, 50, cmap='cividis')
    if num_agents == 3:

        cbar = fig.colorbar(cs)
        # label the colorbar
        cbar.set_label('Individual Reward')
        # limit the colorbar to 0 to 1 and make sure 0 and 1 are part of the ticks
        cbar.set_ticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
        # plot contour lines with labels on top of the heatmap
    cs = ax.contour(X, Y, Z, [0.01, 0.1, 0.25, 0.5, 0.75, 0.95], colors='white', linewidths=0.5)
    # add specific contour values
    ax.clabel(cs, inline=1, fontsize=8)
    ax.set_aspect('equal', 'box')
    # set ticks and limit decimal to .0f
    num_ticks = 7
    x_ticks = np.linspace(-1.2, 1.2, num_ticks)
    y_ticks = np.linspace(-1.2, 1.2, num_ticks)
    # set ticks and limit decimal to .0f
    ax.set_xticks(x_ticks)
    ax.set_yticks(y_ticks)

plt.close('all')
fig, axs = plt.subplots(1,2,figsize=(9.5,4))
plot_reward_function(df, axs[0], num_agents=2)
plot_reward_function(df, axs[1], num_agents=3)
# tight layout
fig.tight_layout()
# save figure to file
fig.savefig('reward_function.png', dpi=300)
plt.show()

## Displaying the trajectory of any given episode

In [366]:
import numpy as np
import matplotlib.pyplot as plt
import random

# Parameters
num_agents = 2
r_l = 0.05  # Landmark radius
r_a = 0.2   # Agent radius

# Landmark locations
landmark_locations = []
for i in range(num_agents):
    landmark_locations.append([np.cos(2*np.pi/num_agents*i), np.sin(2*np.pi/num_agents*i)])
landmark_locations = 0.5 * np.array(landmark_locations)

# Agent locations
agent_locations = np.array([[0.56, 0.5], [-0.0, -0.0]])
# for _ in range(num_agents):
#     agent_locations.append([random.uniform(-1, 1), random.uniform(-1, 1)])
# agent_locations = np.array(agent_locations)

# Plotting
fig, ax = plt.subplots()

# Draw landmarks
for location in landmark_locations:
    landmark = plt.Circle(location, r_l, color='gray', label='Landmark' if location is landmark_locations[0] else "")
    ax.add_artist(landmark)

# Draw agents
for i, location in enumerate(agent_locations):
    agent = plt.Circle(location, r_a, color=agent_colors[i], label='Agent' if location is agent_locations[0] else "")
    ax.add_artist(agent)

# Set limits and aspect
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_aspect('equal')

# Legends
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys())

# Show plot
plt.show()
