Install SUMO

In [None]:
# Setup SUMO=1.10.0
!apt-get update --fix-missing
!apt-get install -y libspatialindex-dev
%pip install --upgrade pip wheel
%pip install eclipse-sumo==1.10.0
%env SUMO_HOME=/usr/local/lib/python3.7/dist-packages/sumo

Install SMARTS

In [None]:
# Install SMARTS
%cd ~
!rm -rf /content/SMARTS
!git clone https://github.com/huawei-noah/SMARTS /content/SMARTS
!cd /content/SMARTS && git checkout 'develop' && git pull && pip install .[camera-obs]
!echo -e "import sys\nsys.path.insert(0, '/content/SMARTS/')" | python

Install Stable Baselines3

In [None]:
!pip install stable_baselines3

Build the scenarios

In [None]:
# Build scenarios
!scl scenario build-all --clean /content/SMARTS/scenarios/loop

Restart the runtime to change dependency versions. (Ctrl+M .) Continue from here:

In [None]:
%env SUMO_HOME=/usr/local/lib/python3.7/dist-packages/sumo
%cd /content/SMARTS/examples/sb3

Create the environment

In [None]:
import gym

from smarts.core import agent as smarts_agent
from smarts.core import agent_interface as smarts_agent_interface
from smarts.core import controllers as smarts_controllers
from smarts.env import hiway_env as smarts_hiway_env
import smarts.env.wrappers.rgb_image as smarts_rgb_image
import smarts.env.wrappers.single_agent as smarts_single_agent
import sb3.env.reward as reward
import sb3.env.action as action

from stable_baselines3 import PPO
from stable_baselines3.common import monitor
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy

def main(scenarios, headless, seed, sim_name):

    vehicle_interface = smarts_agent_interface.AgentInterface(
        max_episode_steps=300,
        rgb=smarts_agent_interface.RGB(
            width=64,
            height=64,
            resolution=1,
        ),
        action=getattr(
            smarts_controllers.ActionSpaceType,
            "Continuous",
        ),
        done_criteria=smarts_agent_interface.DoneCriteria(
            collision=True,
            off_road=True,
            off_route=False,
            on_shoulder=False,
            wrong_way=False,
            not_moving=False,
        ),
    )

    agent_specs = {
        "Agent-007": smarts_agent.AgentSpec(
            interface=vehicle_interface,
            agent_builder=None,
        )
    }

    env = smarts_hiway_env.HiWayEnv(
        scenarios=scenarios,
        agent_specs=agent_specs,
        headless=headless,
        visdom=False,
        seed=seed,
        sim_name=sim_name,
    )

    # Wrap env with ActionWrapper
    env = action.Action(env=env)
    # Wrap env with RewardWrapper
    env = reward.Reward(env=env)
    # Wrap env with RGBImage wrapper to only get rgb images in observation
    env = smarts_rgb_image.RGBImage(env=env, num_stack=1)
    # Wrap env with SingleAgent wrapper to be Gym compliant
    env = smarts_single_agent.SingleAgent(env=env)
    env = monitor.Monitor(env=env)
    check_env(env, warn=True)

    # create the model
    model = PPO("CnnPolicy", env, verbose=1, n_steps=50, batch_size=50)

    # evaluate at the beginning
    before_mean_reward, before_std_reward = evaluate_policy(
        model, env, n_eval_episodes=10, deterministic=True
    )
    model.learn(total_timesteps=500000)

    # evaluate after training
    mean_reward, std_reward = evaluate_policy(
        model, env, n_eval_episodes=10, deterministic=True
    )

    print(
        f"before_mean_reward:{before_mean_reward:.2f} +/- {before_std_reward:.2f}"
    )
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Run the example

In [None]:
# allow offscreen render
import os
os.environ["PYOPENGL_PLATFORM"] = "egl"

main(
    scenarios=["/content/SMARTS/scenarios/loop"],
    sim_name="SB3-PPO",
    headless=True,
    seed=42,
)