# Behavioural Planning for Autonomous Highway Driving

We plan a trajectory using the _Optimistic Planning for Deterministic systems_ ([OPD](https://hal.inria.fr/hal-00830182)) algorithm.

##  Setup

We first import useful modules for the environment, agent, and visualization.

In [3]:
# Environment
!pip install git+https://github.com/eleurent/highway-env#egg=highway-env
import gym
import highway_env

# Agent
!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents
from rl_agents.agents.common.factory import agent_factory

# Visualisation
import sys
from tqdm.notebook import trange
!git clone https://github.com/eleurent/highway-env.git
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
sys.path.insert(0, './highway-env/scripts/')
from utils import record_videos, show_videos, capture_intermediate_frames


fatal: destination path 'highway-env' already exists and is not an empty directory.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
xvfb is already the newest version (2:1.19.6-1ubuntu4.7).
0 upgraded, 0 newly installed, 0 to remove and 14 not upgraded.


## Run an episode

In [None]:
# Make environment
env = gym.make("merge-v0")
env = record_videos(env)
obs, done = env.reset(), False
capture_intermediate_frames(env)

# Make agent
agent_config = {
    "__class__": "<class 'rl_agents.agents.tree_search.deterministic.DeterministicPlannerAgent'>",
    "env_preprocessors": [{"method":"simplify"}],
    "budget": 50,
    "gamma": 0.7,
}
agent = agent_factory(env, agent_config)

# Run episode
#for step in trange(env.unwrapped.config["duration"], desc="Running..."):
episode_reward = 0
print(env.unwrapped.config)
for step in trange(40, desc="Running..."):
    action = agent.act(obs)
    obs, reward, done, info = env.step(action)
    episode_reward += reward
    if done or info.get('is_success', False):
      print("Reward:", episode_reward, "Success?", info.get('is_success', False))
      episode_reward = 0.0
      obs = env.reset()
    
env.close()
show_videos()

{'observation': {'type': 'TimeToCollision'}, 'action': {'type': 'DiscreteMetaAction'}, 'simulation_frequency': 15, 'policy_frequency': 1, 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle', 'screen_width': 600, 'screen_height': 150, 'centering_position': [0.3, 0.5], 'scaling': 5.5, 'show_trajectories': False, 'render_agent': True, 'offscreen_rendering': False, 'manual_control': False, 'real_time_rendering': False}


HBox(children=(FloatProgress(value=0.0, description='Running...', max=40.0, style=ProgressStyle(description_wi…