In [1]:
import flow.scenarios as scenarios

print(scenarios.__all__)

['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'MiniCityScenario', 'TriangleMergeScenario']


In [2]:
# ring road scenario class
scenario_name = "TriangleMergeScenario"

In [3]:
# input parameter classes to the scenario class
from flow.core.params import NetParams, InitialConfig
from flow.core.params import InFlows

inflow = InFlows()

inflow.add(veh_type="human",
           edge="inflow_highway_2",
           vehs_per_hour=2000,
           departSpeed=10,
           departLane="random")

inflow.add(veh_type="human",
           edge="inflow_merge_2",
           vehs_per_hour=500,
           departSpeed=10,
           departLane="random")

additional_net_params = {
    # length of the merge edge
    "merge_length": 100,
    # length of the highway leading to the merge
    "pre_merge_length": 200,
    # length of the highway past the merge
    "post_merge_length": 100,
    # number of lanes in the merge
    "merge_lanes": 2,
    # number of lanes in the highway
    "highway_lanes": 5,
    # max speed limit of the network
    "speed_limit": 30,
}

# we choose to make the main highway slightly longer
additional_net_params["pre_merge_length"] = 150

net_params = NetParams(inflows=inflow,  # our inflows
                       no_internal_links=False,
                       additional_params=additional_net_params)

# name of the scenario
name = "training_triangle"

# initial configuration to vehicles
initial_config = InitialConfig(spacing="random", perturbation=1)


In [5]:
from flow.controllers import ContinuousRouter, IDMController
from flow.core.params import SumoCarFollowingParams, SumoLaneChangeParams
from flow.core.params import VehicleParams

vehicles = VehicleParams()

# add some vehicles to this object of type "human"
vehicles.add(
    veh_id = "human",
    acceleration_controller=(IDMController, {}),
    routing_controller = (ContinuousRouter, {}),
    car_following_params=SumoCarFollowingParams(
        speed_mode="obey_safe_speed",
    ),
    lane_change_params=SumoLaneChangeParams(
        lane_change_mode= "strategic",
    ), num_vehicles = 0)


In [6]:
from flow.controllers import RLController

In [7]:
vehicles.add(veh_id="rl",
             acceleration_controller=(RLController, {}),
             routing_controller=(ContinuousRouter, {}),
             num_vehicles=1)

In [8]:
from flow.core.params import SumoParams

sumo_params = SumoParams(sim_step=0.1, render=False)

In [9]:
from flow.core.params import EnvParams

# Define horizon as a variable to ensure consistent use across notebook
HORIZON=100

env_params = EnvParams(
    # length of one rollout
    horizon=HORIZON,

    additional_params={
        # maximum acceleration of autonomous vehicles
        "max_accel": 1,
        # maximum deceleration of autonomous vehicles
        "max_decel": 1,
        # bounds on the ranges of ring road lengths the autonomous vehicle 
        # is trained on
        "ring_length": [220, 270],
    },
)

In [10]:
import flow.envs as flowenvs

print(flowenvs.__all__)

['Env', 'AccelEnv', 'LaneChangeAccelEnv', 'LaneChangeAccelPOEnv', 'GreenWaveTestEnv', 'GreenWaveTestEnv', 'WaveAttenuationMergePOEnv', 'BottleneckEnv', 'BottleNeckAccelEnv', 'WaveAttenuationEnv', 'WaveAttenuationPOEnv', 'TrafficLightGridEnv', 'PO_TrafficLightGridEnv', 'DesiredVelocityEnv', 'TestEnv', 'BayBridgeEnv']


In [11]:
env_name = "AccelEnv"

In [12]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict(
    # name of the experiment
    exp_tag=name,
    # name of the flow environment the experiment is running on
    env_name=env_name,
    # name of the scenario class the experiment uses
    scenario=scenario_name,
    # simulator that is used by the experiment
    simulator='traci',
    # sumo-related parameters (see flow.core.params.SumoParams)
    sim=sumo_params,
    # environment related parameters (see flow.core.params.EnvParams)
    env=env_params,
    # network-related parameters (see flow.core.params.NetParams and
    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
    net=net_params,
    # vehicles to be placed in the network at the start of a rollout 
    # (see flow.core.vehicles.Vehicles)
    veh=vehicles,
    # (optional) parameters affecting the positioning of vehicles upon 
    # initialization/reset (see flow.core.params.InitialConfig)
    initial=initial_config
)

In [13]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.


In [14]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 1

ray.init(redirect_output=True, num_cpus=N_CPUS)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-07-08_10-44-49_6871/logs.
Waiting for redis server at 127.0.0.1:14576 to respond...
Waiting for redis server at 127.0.0.1:64960 to respond...
Starting the Plasma object store with 6.871947672999999 GB memory using /tmp.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=be3ecf2c750b3d25ac765cd1bfc0a6f100ab78cf59427e93



{'node_ip_address': '10.105.138.56',
 'object_store_addresses': ['/tmp/ray/session_2019-07-08_10-44-49_6871/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-07-08_10-44-49_6871/sockets/raylet'],
 'redis_address': '10.105.138.56:14576',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=be3ecf2c750b3d25ac765cd1bfc0a6f100ab78cf59427e93'}

In [15]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS - 1  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [16, 16]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [None]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,
        "env": gym_name,
        "config": {
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "checkpoint_at_end": True,  # generate a checkpoint at the end
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1,  # number of iterations to stop after
        },
    },
})