## PPO performance analysis and deep dive

**Questions**
- What kind of scenes are hard to learn?
    - Properties (# agents), # intersections, etc...


**Procedure**
- Take a trained PPO model
- Evaluate model:
    - Single-agent mode (use policy to control a single-agent, the rest are expert-controlled)
    - Multi-agent mode (use policy to control all agents in the scene)

In [1]:
# Dependencies
import glob
import pandas as pd
import seaborn as sns
import numpy as np
import warnings
import torch
import imageio
import logging
import os
import matplotlib.pyplot as plt
from evaluation.policy_evaluation import evaluate_policy
from networks.perm_eq_late_fusion import LateFusionNet, LateFusionPolicy
from utils.plot import plot_agent_trajectory
from utils.config import load_config_nb
from utils.sb3.reg_ppo import RegularizedPPO

sns.set('notebook', font_scale=1.1, rc={'figure.figsize': (10, 5)})
sns.set_style('ticks', rc={'figure.facecolor': 'none', 'axes.facecolor': 'none'})
%config InlineBackend.figure_format = 'svg'
warnings.filterwarnings("ignore")
plt.set_loglevel('WARNING')

### Configurations 

In [68]:
MAX_FILES = 1000
DETERMINISTIC = True 
SELECT_FROM = 1000
NUM_EVAL_EPISODES = 1000
METRICS = ['goal_rate', 'off_road', 'veh_veh_collision']

TRAIN_DATA_PATH = '../data_full/train'
TEST_DATA_PATH = '../data_full/valid'

# Load config files
env_config = load_config_nb("env_config")
exp_config = load_config_nb("exp_config")
video_config = load_config_nb("video_config")
model_config = load_config_nb("model_config")

# Set data path
env_config.num_files = MAX_FILES

# Logging level set to INFO
LOGGING_LEVEL = "INFO"

# Scenes on which to evaluate the models
# Make sure file order is fixed
train_file_paths = glob.glob(f"{env_config.data_path}" + "/tfrecord*")
train_eval_files = sorted([os.path.basename(file) for file in train_file_paths])

### Helper functions 

In [69]:
def create_video(image_array, filename, *, fps=4, loop=500):
    with imageio.get_writer(filename, duration=1_000 / fps, loop=loop) as writer:
        for img in image_array:
            img = np.moveaxis(img, [0, 1, 2], [2, 0, 1])
            writer.append_data(img)


def display_gif(filename, width=500, height=500):
    display(
        Image(
            data=open(filename, "rb").read(), format="gif", width=width, height=height
        )
    )

### Load trained PPO policy

In [70]:
RL_BASE_PATH = f"../models/hr_rl/S{MAX_FILES}"
POLICY_NAME = 'policy_L0.0_S1000_I606'
rl_policy = RegularizedPPO.load(f'{RL_BASE_PATH}/{POLICY_NAME}')

INFO:root:No regularization weight specified, using default PPO.


## 1. Aggregate performance

### 1.1 Single-agent mode | **train**

In [71]:
df_res_sa_train = evaluate_policy(
    env_config=env_config,
    controlled_agents=1,
    data_path=TRAIN_DATA_PATH,
    mode='policy',
    policy=rl_policy,
    select_from_k_scenes=SELECT_FROM,
    num_episodes=NUM_EVAL_EPISODES,
)

df_res_sa_train['Class'] = 'SA_train'

100%|██████████| 1000/1000 [01:10<00:00, 14.10it/s]


In [72]:
df_res_sa_train[METRICS].mean() * 100

goal_rate            73.8
off_road              4.4
veh_veh_collision    23.6
dtype: float64

### 1.2 Single-agent mode | **test**

In [73]:
df_res_sa_test = evaluate_policy(
    env_config=env_config,
    controlled_agents=1,
    data_path=TEST_DATA_PATH,
    mode='policy',
    policy=rl_policy,
    select_from_k_scenes=SELECT_FROM,
    num_episodes=NUM_EVAL_EPISODES,
)

df_res_sa_test['Class'] = 'SA_test'

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [01:08<00:00, 14.68it/s]


In [74]:
df_res_sa_test[METRICS].mean() * 100

goal_rate            74.7
off_road              3.9
veh_veh_collision    22.0
dtype: float64

### 1.3 Multi-agent mode | **train**

In [75]:
df_res_ma_train = evaluate_policy(
    env_config=env_config,
    controlled_agents=20,
    data_path=TRAIN_DATA_PATH,
    mode='policy',
    policy=rl_policy,
    select_from_k_scenes=SELECT_FROM,
    num_episodes=NUM_EVAL_EPISODES,
)

df_res_ma_train['Class'] = 'MA_train'

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [03:51<00:00,  4.32it/s]


In [76]:
df_res_ma_train[METRICS].mean() * 100

goal_rate            83.030303
off_road              4.646465
veh_veh_collision    13.246753
dtype: float64

### 1.4 Multi-agent mode | **test**

In [77]:
df_res_ma_test = evaluate_policy(
    env_config=env_config,
    controlled_agents=20,
    data_path=TEST_DATA_PATH,
    mode='policy',
    policy=rl_policy,
    select_from_k_scenes=SELECT_FROM,
    num_episodes=NUM_EVAL_EPISODES,
)

df_res_ma_test['Class'] = 'MA_test'

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [03:44<00:00,  4.45it/s]


In [78]:
df_res_ma_test[METRICS].mean() * 100

goal_rate            84.811432
off_road              4.110194
veh_veh_collision    11.785504
dtype: float64

## 2. Properties of hard scenes

In [81]:
df_all = pd.concat([df_res_sa_train, df_res_sa_test, df_res_ma_train, df_res_ma_test])

In [82]:
df_all.groupby('Class')[METRICS].mean()

Unnamed: 0_level_0,goal_rate,off_road,veh_veh_collision
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MA_test,0.848114,0.041102,0.117855
MA_train,0.830303,0.046465,0.132468
SA_test,0.747,0.039,0.22
SA_train,0.738,0.044,0.236


In [91]:
# Group by scene and sort by goal rate
df_all.groupby('scene_id')[METRICS].sum().sort_values('goal_rate', ascending=False)

Unnamed: 0_level_0,goal_rate,off_road,veh_veh_collision
scene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
tfrecord-00000-of-00150_170.json,74.0,2.0,0.0
tfrecord-00004-of-00150_57.json,59.0,4.0,9.0
tfrecord-00003-of-00150_203.json,58.0,0.0,5.0
tfrecord-00011-of-00150_69.json,57.0,0.0,6.0
tfrecord-00010-of-00150_84.json,56.0,0.0,8.0
...,...,...,...
tfrecord-00002-of-00150_268.json,0.0,0.0,2.0
tfrecord-00002-of-00150_68.json,0.0,0.0,3.0
tfrecord-00132-of-01000_254.json,0.0,0.0,2.0
tfrecord-00011-of-00150_250.json,0.0,0.0,4.0
