In [1]:
# the TestEnv environment is used to simply simulate the network
from flow.envs import TestEnv

# the Experiment class is used for running simulations
from flow.core.experiment import Experiment

# the base network class
from flow.networks import Network
from flow.networks import InductionNet
from flow.envs import Env

# all other imports are standard
from flow.core.params import VehicleParams
from flow.core.params import NetParams
from flow.core.params import InitialConfig
from flow.core.params import EnvParams
from flow.core.params import TrafficLightParams

# create some default parameters parameters
HORIZON = 10000
env_params = EnvParams(horizon=HORIZON)
initial_config = InitialConfig()

In [2]:
LuST_dir = "/home/valentin/flow/personal_tests"

In [3]:
from flow.core.params import SumoParams

sim_params = SumoParams(render=False, sim_step=1)

In [4]:
import os

net_params = NetParams(
    template={
        # network geometry features
        "net": os.path.join(LuST_dir, "lemgo.net.xml"),
        # features associated with the properties of drivers
        "vtype": os.path.join(LuST_dir, "vtypes.add.xml"),
        # features associated with the routes vehicles take
        "rou": os.path.join(LuST_dir, "lemgo2.rou.xml"),
        "det": os.path.join(LuST_dir, "lemgo.add.xml")
    }
)

## Create custom Traffic lights:

In [5]:
tf_logic = TrafficLightParams(baseline=False)
nodes = ["27403692", "27431768", "27153964", "27153945"]
params0 = [{"duration": "42", "minDur": "8", "maxDur": "50", "state": "GGGgrr"},
           {"duration": "3", "state": "yyyyrr"},
           {"duration": "42", "minDur": "8", "maxDur": "50", "state": "GrrrGG"},
           {"duration": "3", "state": "yrrryy"}]
params1 = [{"duration": "42", "minDur": "8", "maxDur": "50", "state": "GGGgrr"},
           {"duration": "3", "state": "yyyyrr"},
           {"duration": "42", "minDur": "8", "maxDur": "50", "state": "GrrrGG"},
           {"duration": "3", "state": "yrrryy"}]
params2 = [{"duration": "33", "minDur": "8", "maxDur": "40", "state": "rrrGGgrrrGGg"},
           {"duration": "3", "state": "rrryygrrryyg"},
           {"duration": "6", "minDur": "5", "maxDur": "10", "state": "rrrrrGrrrrrG"},
           {"duration": "3", "state": "rrrrryrrrrry"},
           {"duration": "33", "minDur": "8", "maxDur": "40", "state": "GGgrrrGGgrrr"},
           {"duration": "3", "state": "yygrrryygrrr"},
           {"duration": "6", "minDur": "5", "maxDur": "10", "state": "rrGrrrrrGrrr"},
           {"duration": "3", "state": "rryrrrrryrrr"}]
params3 = [{"duration": "42", "minDur": "8", "maxDur": "50", "state": "GgrrGG"},
           {"duration": "3", "state": "yyrryy"},
           {"duration": "42", "minDur": "8", "maxDur": "50", "state": "rrGGGr"},
           {"duration": "3", "state": "rryyyr"}]

tf_logic.add(node_id=nodes[0], tls_type="actuated", programID="1",
             offset=None, phases=params0, maxGap=4.0, detectorGap=0.9, showDetectors=True)
#tf_logic.add(node_id=nodes[1], tls_type="actuated", programID="1",
#             offset=None, phases=params0, maxGap=4.0, detectorGap=0.9, showDetectors=True)
tf_logic.add(node_id=nodes[2], tls_type="actuated", programID="1",
             offset=None, phases=params2, maxGap=4.0, detectorGap=0.9, showDetectors=True)
tf_logic.add(node_id=nodes[3], tls_type="actuated", programID="1",
             offset=None, phases=params3, maxGap=4.0, detectorGap=0.9, showDetectors=True)

#### 3.2.3 Running the Modified Simulation

Finally, the fully imported simulation can be run as follows. 

**Warning**: the network takes time to initialize while the departure positions and times and vehicles are specified.

In [6]:
# create the network
network = InductionNet(
    name="template",
    net_params=net_params,
    vehicles=VehicleParams(),
    traffic_lights=tf_logic
)

# create the environment
env = TestEnv(
    env_params=env_params,
    sim_params=sim_params,
    network=network
)

# run the simulation for 100000 steps
exp = Experiment(env=env)
_ = exp.run(1, 100)

Round 0, return: 0
Average, std return: 0.0, 0.0
Average, std speed: 14.806430067688154, 0.0


In [7]:
# This is the custom environment
# Needs to be important in order to work properly in flow
from flow.envs import SimpleEnv
env_name = SimpleEnv

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict(
    # name of the experiment
    exp_tag="first_exp",
    # name of the flow environment the experiment is running on
    env_name=env_name,
    # name of the network class the experiment uses
    network=InductionNet,
    # simulator that is used by the experiment
    simulator='traci',
    # sumo-related parameters (see flow.core.params.SumoParams)
    sim=sim_params,
    # environment related parameters (see flow.core.params.EnvParams)
    env=env_params,
    # network-related parameters (see flow.core.params.NetParams and
    # the network's documentation or ADDITIONAL_NET_PARAMS component)
    net=net_params,
    # vehicles to be placed in the network at the start of a rollout 
    # (see flow.core.vehicles.Vehicles)
    veh=VehicleParams(),
    # (optional) parameters affecting the positioning of vehicles upon 
    # initialization/reset (see flow.core.params.InitialConfig)
    initial=initial_config
)

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

Instructions for updating:
non-resource variables are not supported in the long term


In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 1

ray.init(num_cpus=N_CPUS)

2019-12-13 19:06:36,693	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-12-13_19-06-36_693393_22893/logs.
2019-12-13 19:06:36,809	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:46228 to respond...
2019-12-13 19:06:36,947	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:28389 to respond...
2019-12-13 19:06:36,958	INFO services.py:809 -- Starting Redis shard with 1.65 GB max memory.
2019-12-13 19:06:37,014	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-12-13_19-06-36_693393_22893/logs.
2019-12-13 19:06:37,019	INFO services.py:1475 -- Starting the Plasma object store with 2.48 GB memory using /dev/shm.


{'node_ip_address': '192.168.101.101',
 'redis_address': '192.168.101.101:46228',
 'object_store_address': '/tmp/ray/session_2019-12-13_19-06-36_693393_22893/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2019-12-13_19-06-36_693393_22893/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2019-12-13_19-06-36_693393_22893'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

BATCH_SIZE = 10000

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS - 1  # number of parallel workers
config["train_batch_size"] = BATCH_SIZE  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [16, 16]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon
config["vf_clip_param"] = 50000.0

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
## The algorithm or model to train. This may refer to "
##      "the name of a built-on algorithm (e.g. RLLib's DQN "
##      "or PPO), or a user-defined trainable function or "
##      "class registered in the tune registry.")
#alg_run = "A3C"
#
#BATCH_SIZE = HORIZON * N_ROLLOUTS
#
#agent_cls = get_agent_class(alg_run)
#config = agent_cls._default_config.copy()
#config["num_workers"] = N_CPUS - 1  # number of parallel workers
#config["train_batch_size"] = BATCH_SIZE  # batch size
#config["sample_batch_size"] = BATCH_SIZE  # batch size
#config["gamma"] = 0.999  # discount rate
#config["model"].update({"fcnet_hiddens": [16, 16]})  # size of hidden layers in network
#config["horizon"] = HORIZON  # rollout horizon
#
## save the flow params for replay
#flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
#                       indent=4)  # generating a string version of flow_params
#config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
#config['env_config']['run'] = alg_run
#
## Call the utility function make_create_env to be able to 
## register the Flow env for this experiment
#create_env, gym_name = make_create_env(params=flow_params, version=0)
#
## Register as rllib env with Gym
#register_env(gym_name, create_env)

In [13]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,
        "env": gym_name,
        "config": {
            **config
        },
        "checkpoint_freq": 2,  # number of iterations between checkpoints
        "checkpoint_at_end": True,  # generate a checkpoint at the end
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 10,  # number of iterations to stop after
        },
    },
})

2019-12-13 19:06:37,298	INFO trial_runner.py:176 -- Starting a new experiment.
2019-12-13 19:06:37,354	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/2 CPUs, 0/0 GPUs
Memory usage on this node: 4.7/8.3 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/0 GPUs
Memory usage on this node: 4.7/8.3 GB
Result logdir: /home/valentin/ray_results/first_exp
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_SimpleEnv-v0_0:	RUNNING

[2m[36m(pid=22932)[0m Instructions for updating:
[2m[36m(pid=22932)[0m non-resource variables are not supported in the long term
[2m[36m(pid=22932)[0m 2019-12-13 19:06:41,120	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=22932)[0m 2019-12-13 19:06:41.121958: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=22932)[0m 2019-12-13 19:06:41.145928: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Freque

[2m[36m(pid=22933)[0m Instructions for updating:
[2m[36m(pid=22933)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=22933)[0m Instructions for updating:
[2m[36m(pid=22933)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=22933)[0m 2019-12-13 19:06:46,703	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=22933)[0m 2019-12-13 19:06:46,774	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((27,), dtype=int64, min=0.0, max=0.0, mean=0.0)}}
[2m[36m(pid=22933)[0m 2019-12-13 19:06:46,774	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=22933)[0m 2019-12-13 19:06:46,774	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((27,), dtype=int64, min=0.0, max=0.0, mean=0.0)
[2m[36m(pid=22933)[0m 2019-12-13 19:06:46,774	INFO sampler.py:407 -- Filtered obs: np.ndarray((27,), dtype=int64, min=0.0, max=0.0, mean=0.0)
[2m[36m(pid=22933)[0m 2019-1

[2m[36m(pid=22932)[0m 2019-12-13 19:08:11,282	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/kernel:0' shape=(27, 16) dtype=float32_ref>
[2m[36m(pid=22932)[0m 2019-12-13 19:08:11,283	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/bias:0' shape=(16,) dtype=float32_ref>
[2m[36m(pid=22932)[0m 2019-12-13 19:08:11,283	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc2/kernel:0' shape=(16, 16) dtype=float32_ref>
[2m[36m(pid=22932)[0m 2019-12-13 19:08:11,283	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc2/bias:0' shape=(16,) dtype=float32_ref>
[2m[36m(pid=22932)[0m 2019-12-13 19:08:11,283	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc_out/kernel:0' shape=(16, 8) dtype=float32_ref>
[2m[36m(pid=22932)[0m 2019-12-13 19:08:11,283	INFO tf_policy.py:355 -- Optimizin

Result for PPO_SimpleEnv-v0_0:
  custom_metrics: {}
  date: 2019-12-13_19-08-11
  done: false
  episode_len_mean: 10000.0
  episode_reward_max: -59706.0
  episode_reward_mean: -59706.0
  episode_reward_min: -59706.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: caf8702f3c0d49668e97103562fdc3e8
  hostname: valentin-Aspire-V3-372
  info:
    grad_time_ms: 412.991
    learner:
      default_policy:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 4.999999873689376e-05
        entropy: 5.674520492553711
        entropy_coeff: 0.0
        kl: 3.298496449133381e-05
        policy_loss: -0.0002572936937212944
        total_loss: 48126.49609375
        vf_explained_var: 2.086162567138672e-06
        vf_loss: 48126.625
    load_time_ms: 77.85
    num_steps_sampled: 10000
    num_steps_trained: 10000
    sample_time_ms: 85431.206
    update_time_ms: 911.824
  iterations_since_restore: 1
  node_ip: 192.168.101.101
  num_healthy_workers: 1
  off_policy_estimator: {}
  perf:





2019-12-13 19:08:28,001	ERROR worker.py:1616 -- print_logs: Error 111 connecting to 192.168.101.101:46228. Connection refused.
2019-12-13 19:08:28,008	ERROR worker.py:1716 -- listen_error_messages_raylet: Error 111 connecting to 192.168.101.101:46228. Connection refused.
2019-12-13 19:08:28,015	ERROR import_thread.py:89 -- ImportThread: Error 111 connecting to 192.168.101.101:46228. Connection refused.


KeyboardInterrupt: 