In [1]:
# Dependencies
import pandas as pd
import seaborn as sns
import numpy as np
from IPython.display import HTML, Image
import warnings
from nocturne.envs.base_env import BaseEnv
from pathlib import Path
from pyvirtualdisplay import Display
import pickle
import imageio
import pandas as pd
import seaborn as sns
import logging
import matplotlib.pyplot as plt
import torch
from utils.config import load_config_nb
from utils.sb3.reg_ppo import RegularizedPPO
from utils.policies import load_policy

sns.set('notebook', font_scale=1.1, rc={'figure.figsize': (8, 3)})
sns.set_style('ticks', rc={'figure.facecolor': 'none', 'axes.facecolor': 'none'})
%config InlineBackend.figure_format = 'svg'
warnings.filterwarnings("ignore")
plt.set_loglevel('WARNING')

### Configurations

In [2]:
# Trained policies
model_config = load_config_nb('models_main_paper')

# Environment settings
env_config = load_config_nb('env_config')
env_config.data_path = '../data/train_no_tl'

# Single or multi-agent
env_config.max_num_vehicles = 1

# Videos
VIDEO_PATH = f'../evaluation/videos/intersecting_paths'
TRAJ_PATH = f'../evaluation/figures/trajectories'

### Trained models

In [3]:
model_config['used_human_policy'][0]

Box({'name': 'human_policy_D651_S500_02_18_20_05_AV_ONLY', 'agent': 'BC', 'train_agent': '-', 'num_scenes': 200, 'wandb_run': '-'})

In [4]:
# IL policy
human_policy = load_policy(
    data_path=f'../{model_config.bc_models_dir}',
    file_name=f'{model_config["used_human_policy"][0].name}',
)


# RL policies
best_ppo = model_config.best_overall_models[0].name
best_hr_ppo = model_config.best_overall_models[2].name

ppo_policy = RegularizedPPO.load(
    f'../{model_config.hr_ppo_models_dir_self_play}/{best_ppo}'
)

hr_ppo_policy = RegularizedPPO.load(
    f'../{model_config.hr_ppo_models_dir_self_play}/{best_hr_ppo}'
)

logging.info(f'\n \n Using PPO policy: {best_ppo} and HR-PPO policy: {best_hr_ppo}\n')

INFO:root:No regularization weight specified, using default PPO.
INFO:root:Using regularization loss: None
INFO:root:No regularization weight specified, using default PPO.
INFO:root:Using regularization loss: None
INFO:root:
 
 Using PPO policy: policy_L0.0_S200_I3000.zip and HR-PPO policy: policy_L0.07_S100_I3065.zip



### Helper functions

In [5]:
def create_video(image_array, filename, *, fps=5, loop=500):
    with imageio.get_writer(filename, duration=1_000 / fps, loop=loop) as writer:
        for img in image_array:
            img = np.moveaxis(img, [0, 1, 2], [2, 0, 1])
            writer.append_data(img)

def display_gif(filename, width=500, height=500):
    display(
        Image(
            data=open(filename, "rb").read(), format="gif", width=width, height=height
        )
    )

### Load dataframe to select scenarios from

In [6]:
df = pd.read_csv('../evaluation/results/df_paper_agg_performance_03_07_14_02_200train_scenes_10_000_test_final.csv')
df_scenes = df[['scene_id', 'num_total_vehs', 'veh_int_paths', 'tot_int_paths', 'goal_rate', 'off_road', 'veh_veh_collision', 'Dataset']]

In [7]:
# Settings
NUM_INTERSECTING_PATHS = 1
GOAL_RATE = 1
MAX_TOTAL_VEHICLES = 5

df_scenes = df_scenes[df_scenes['Dataset'] == 'Train']
df_scenes = df_scenes[(df_scenes['veh_int_paths'] == NUM_INTERSECTING_PATHS) & (df_scenes['goal_rate'] == GOAL_RATE & (df_scenes['num_total_vehs'] < MAX_TOTAL_VEHICLES))]

In [8]:
df_scenes.head()

Unnamed: 0,scene_id,num_total_vehs,veh_int_paths,tot_int_paths,goal_rate,off_road,veh_veh_collision,Dataset
112,tfrecord-00073-of-01000_439.json,2,1.0,1.0,1.0,0.0,0.0,Train
113,tfrecord-00073-of-01000_439.json,2,1.0,1.0,1.0,0.0,0.0,Train
139,tfrecord-00031-of-01000_17.json,11,1.0,2.0,0.0,0.0,1.0,Train
200,tfrecord-00031-of-01000_10.json,4,1.0,2.0,1.0,0.0,0.0,Train
204,tfrecord-00067-of-01000_85.json,4,1.0,1.0,1.0,0.0,0.0,Train


### Render

In [9]:
# Sample a random scene
random_scene = df_scenes.sample(n=1) 
display(random_scene)
scene_name = str(random_scene.scene_id.values[0])

Unnamed: 0,scene_id,num_total_vehs,veh_int_paths,tot_int_paths,goal_rate,off_road,veh_veh_collision,Dataset
30646,tfrecord-00000-of-01000_470.json,6,1.0,2.0,0.0,0.0,1.0,Train


In [10]:
MODEL = "PPO"

In [None]:
if MODEL == "PPO":
    policy = ppo_policy

elif MODEL == "HR-PPO": 
    policy = hr_ppo_policy

env = BaseEnv(env_config)

obs_dict = env.reset(filename=scene_name)
frames = []

for time_step in range(env_config.episode_length):
    
    action_dict = {}
    for agent_id in obs_dict:
        # Get observation
        obs = torch.from_numpy(obs_dict[agent_id]).unsqueeze(dim=0)

        # Get action
        action, _ = policy.predict(obs, deterministic=True)
        action_dict[agent_id] = int(action)

    # Step
    obs_dict, rew_dict, done_dict, info_dict = env.step(action_dict)
    
    if done_dict['__all__']:
        last_frame = env.scenario.getImage(
                img_width=1000,
                img_height=1000,
                draw_target_positions=True,
                padding=50.0,
                sources=[env.controlled_vehicles[0]],
                view_width=100,
                view_height=100,
                rotate_with_source=False,
            )
        
        print(info_dict[agent_id])
        break 