# Train Traffic Lights Agents

Utilise les fonctions de @Binetruy

- crée un network à partir d'un fichier .osm et des trajectoires de véhiculess
- ajoute un flux de voiture sur les routes
- personnalise un Environnement pour le RL
- integre l'environnement pour RLlib et execute la simulation


In [1]:
from flow.core.params import VehicleParams
from flow.core.params import NetParams, SumoCarFollowingParams
from flow.core.params import InitialConfig
from flow.core.params import EnvParams
from flow.core.params import SumoParams
from flow.controllers import RLController, IDMController
from flow.networks.IssyOSMNetwork import IssyOSMNetwork
from flow.core.params import InFlows
from collections import OrderedDict
import json
import ray
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import VehicleParams, SumoCarFollowingParams

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Importation du network d'Issy

On vérifie si IssyOSMNetwork est bien importé.

In [2]:
from flow.networks.IssyOSMNetwork import ADDITIONAL_NET_PARAMS, EDGES_DISTRIBUTION

print(ADDITIONAL_NET_PARAMS)
print(EDGES_DISTRIBUTION)

{'speed_limit': 50}
['-100822066', '4794817', '4783299#0', '155558218']


## Ajoute les flux de voiture

`IDMController` : The Intelligent Driver Model is a car-following model specifying vehicle dynamics by a differential equation for acceleration $\dot{v}$.

`RLController` : a trainable autuonomous vehicle whose actions are dictated by an RL agent. 

In [3]:
vehicles = VehicleParams()
vehicles.add("human",acceleration_controller=(IDMController, {}), num_vehicles=10)
vehicles.add("rl",acceleration_controller=(RLController, {}), num_vehicles=2)

- `vehs_per_hour`: nombre de vehicule par heure, uniformément espacés. Par exemple, comme il y a $60 \times 60 = 3600$ secondes dans une heure, le parametre $\frac{3600}{5}=720$ va faire rentrer des vehicules dans le network toutes les $5$ secondes.

- `probability`: c'est la probabilité qu'un véhicule entre dans le network toutes les secondes. Par exemple, si on la fixe à $0.2$, alors chaque seconde de la simulation un véhicule aura $\frac{1}{5}$ chance d'entrer dans le network

- `period`: C'est le temps en secondes entre 2 véhicules qui sont insérés. Par exemple, le fixer à $5$ ferait rentrer des véhicules dans le network toutes les $5$ secondes (ce qui équivaut à mettre `vehs_per_hour` à $720$).

<font color='red'>
$\rightarrow$ Exactement 1 seul de ces 3 paramètres doit être configurer !
</font>

In [4]:
inflow = InFlows()

inflow.add(veh_type      = "human",
           edge          = "4794817",
           probability   = 0.3, 
           depart_speed  = 7,
           depart_lane   = "random")

inflow.add(veh_type      = "human",
           edge          = "4783299#0",
           probability   = 0.2,
           depart_speed  = 7,
           depart_lane   = "random")

inflow.add(veh_type       = "human",
           edge           = "-100822066",
           probability    = 0.25,
           depart_speed   = 7,
           depart_lane    = "random")

inflow.add(veh_type       = "rl",
           edge           = "-100822066",
           probability    = 0.05,
           depart_speed   = 7,
           depart_lane    = "random",
           color          = "blue")

inflow.add(veh_type       = "human",
           edge          = "155558218",
           probability   = 0.2,
           depart_speed  = 7,
           depart_lane   = "random")

## Personnalise un Environnement pour le RL

plus de méthodes sur : http://berkeleyflow.readthedocs.io/en/latest/

In [5]:
from flow.envs.IssyEnv import IssyEnv1

## Lance une simulation avec Training RLlib

Pour qu'un environnement puisse être entrainé, l'environnement doit être accessible via l'importation à partir de flow.envs. 


<font color='red'>
Copier l'environnement créé dans un fichier .py et on importe l'environnement dans `flow.envs.__init__.py`.
Mettre le chemin absolu du fichier .osm .
</font> 

In [6]:
# possibles actions
action_spec = OrderedDict({ "30677963": [ "GGGGrrrGGGG", "rrrrGGGrrrr"],
                            "30763263": ["GGGGGGGGGG",  "rrrrrrrrrr"],
                            "30677810": [ "GGrr", "rrGG"]})

In [7]:
horizon  = 1000
SIM_STEP = 0.1
n_veh    = 12
rollouts = 10
n_cpus   = 3
discount_rate = 0.999

In [8]:
# SUMO PARAM
sumo_params = SumoParams(sim_step=SIM_STEP, render=False, restart_instance=True)

# ENVIRONMENT PARAM
ADDITIONAL_ENV_PARAMS = {"beta": n_veh, "action_spec": action_spec, "algorithm": "DQN", "tl_constraint_min": 100,  "tl_constraint_max": 600, "sim_step": SIM_STEP}
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS, horizon=horizon, warmup_steps=1)

# NETWORK PARAM
path_file  = '/home/lino/Documents/DQN_CIL4SYS/DQN_CIL4SYS/notebooks/issy.osm'
net_params = NetParams(inflows=inflow, osm_path=path_file) 

# NETWORK
network = IssyOSMNetwork

# INITIAL CONFIG
initial_config = InitialConfig(edges_distribution=EDGES_DISTRIBUTION)


flow_params = dict( exp_tag   = "ISSY_traffic", 
                    env_name  = IssyEnv1,  
                    network   = IssyOSMNetwork,
                    simulator = 'traci',
                    sim       = sumo_params,
                    env       = env_params,
                    net       = net_params,
                    veh       = vehicles,
                    initial   = initial_config)

# Setup RLlib library

Configures RLlib DQN algorithm to be used to train the RL model.

In [9]:
def setup_DQN_exp():

    alg_run   = 'DQN'
    agent_cls = get_agent_class(alg_run)
    config    = agent_cls._default_config.copy()
    config['num_workers']      = n_cpus
    config['train_batch_size'] = horizon * rollouts
    config['gamma']            = discount_rate
    config['clip_actions']     = False  # FIXME(ev) temporary ray bug
    config['horizon']          = horizon
    config["hiddens"]          = [256]
    config['model'].update({'fcnet_hiddens': [32, 32]})

    # save the flow params for replay
    flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    
    return alg_run, gym_name, config

Configures RLlib PPO algorithm to be used to train the RL model.

See: https://ray.readthedocs.io/en/latest/rllib-algorithms.html#proximal-policy-optimization-ppo

In [10]:
def setup_PPO_exp():

    alg_run   = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config    = agent_cls._default_config.copy()
    config['num_workers']      = n_cpus
    config['train_batch_size'] = horizon * rollouts
    config['gamma']            = discount_rate
    config['use_gae']          = True
    config['lambda']           = 0.97
    config['kl_target']        = 0.02
    config['num_sgd_iter']     = 10
    config['clip_actions']     = False  # FIXME(ev) temporary ray bug
    config['horizon']          = horizon
    config['model'].update({'fcnet_hiddens': [32, 32]})

    # save the flow params for replay
    flow_json = json.dumps(flow_params,cls=FlowParamsEncoder,sort_keys=True,indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params,version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    
    return alg_run, gym_name, config

# Run Experiment

In [11]:
alg_run, gym_name, config = setup_DQN_exp()

ray.init(num_cpus=n_cpus + 1)

2020-04-14 15:40:06,519	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-04-14_15-40-06_518906_10943/logs.
2020-04-14 15:40:06,631	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:26528 to respond...
2020-04-14 15:40:06,750	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:20306 to respond...
2020-04-14 15:40:06,755	INFO services.py:809 -- Starting Redis shard with 2.93 GB max memory.
2020-04-14 15:40:06,783	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-04-14_15-40-06_518906_10943/logs.
2020-04-14 15:40:06,785	INFO services.py:1475 -- Starting the Plasma object store with 4.4 GB memory using /dev/shm.


{'node_ip_address': '192.168.0.48',
 'redis_address': '192.168.0.48:26528',
 'object_store_address': '/tmp/ray/session_2020-04-14_15-40-06_518906_10943/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-04-14_15-40-06_518906_10943/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2020-04-14_15-40-06_518906_10943'}

In [None]:
exp_tag = {"run": alg_run,
           "env": gym_name,
           "config": {**config},
           "checkpoint_freq": 20,
           "checkpoint_at_end": True,
           "max_failures": 999,
           "stop": {"training_iteration": 1000}}


trials = run_experiments({flow_params["exp_tag"]: exp_tag})

2020-04-14 15:40:06,972	INFO trial_runner.py:176 -- Starting a new experiment.
2020-04-14 15:40:07,033	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs
Memory usage on this node: 3.5/14.7 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs
Memory usage on this node: 3.5/14.7 GB
Result logdir: /home/lino/ray_results/ISSY_traffic
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - DQN_IssyEnv1-v0_0:	RUNNING

[2m[36m(pid=10978)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=10978)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=10978)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=10978)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=10978)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=10978)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=10978)[0m Success.
[2m[36m(pid=10978)[0m 2020-04-14 15:40:11,068	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU

[2m[36m(pid=10977)[0m Success.
[2m[36m(pid=10979)[0m Success.
[2m[36m(pid=10980)[0m Success.
[2m[36m(pid=10980)[0m 2020-04-14 15:40:16,041	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10980)[0m 2020-04-14 15:40:16.054844: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10977)[0m 2020-04-14 15:40:16,039	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10977)[0m 2020-04-14 15:40:16.052861: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10979)[0m 2020-04-14 15:40:16,093	INFO rollout_worker.py:319 -- Creating policy evaluation worker 3 on CPU (please ignore any CUDA init errors)
[2m

[2m[36m(pid=10977)[0m Success.
[2m[36m(pid=10980)[0m Success.
[2m[36m(pid=10979)[0m Success.
[2m[36m(pid=10977)[0m 2020-04-14 15:40:18,835	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((112,), dtype=float64, min=0.0, max=2637.506, mean=292.907)}}
[2m[36m(pid=10977)[0m 2020-04-14 15:40:18,835	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10977)[0m 2020-04-14 15:40:18,836	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((112,), dtype=float64, min=0.0, max=2637.506, mean=292.907)
[2m[36m(pid=10977)[0m 2020-04-14 15:40:18,836	INFO sampler.py:407 -- Filtered obs: np.ndarray((112,), dtype=float64, min=0.0, max=2637.506, mean=292.907)
[2m[36m(pid=10977)[0m 2020-04-14 15:40:18,837	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10977)[0m 
[2m[36m(pid=10977)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10977)[0m                                   'env_id': 0,
[2m

Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_15-40-26
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: .nan
    learner: {}
    max_exploration: 1.0
    min_exploration: 1.0
    num_steps_sampled: 1008
    num_steps_trained: 0
    num_target_updates: 2
    opt_peak_throughput: 0.0
    opt_samples: .nan
    replay_time_ms: .nan
    sample_time_ms: 107.9
    update_time_ms: 5.513
  iterations_since_restore: 1
  node_ip: 192.168.0.48
  num_healthy_workers: 3
  off_policy_estimator: {}
  perf:
    cpu_util_percent: 64.92857142857142
    ram_util_percent: 29.090476190476195
  pid: 10978
  policy_reward_mean: {}
  sampler_perf: {}
  time_since_restore: 14.440845966339111
  time_this_iter_s: 14.440845966339111
  time_total_s: 14.440845966339111
  timestamp: 

[2m[36m(pid=10978)[0m 2020-04-14 15:40:28,803	INFO rollout_worker.py:597 -- Training output:
[2m[36m(pid=10978)[0m 
[2m[36m(pid=10978)[0m { 'default_policy': { 'learner_stats': { 'cur_lr': 0.0005000000237487257,
[2m[36m(pid=10978)[0m                                          'max_q': 0.39389974,
[2m[36m(pid=10978)[0m                                          'mean_q': -0.35938427,
[2m[36m(pid=10978)[0m                                          'mean_td_error': -0.32220873,
[2m[36m(pid=10978)[0m                                          'min_q': -1.2152717,
[2m[36m(pid=10978)[0m                                          'model': {}},
[2m[36m(pid=10978)[0m                       'td_error': np.ndarray((10000,), dtype=float32, min=-1.285, max=0.483, mean=-0.322)}}
[2m[36m(pid=10978)[0m 
Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_15-44-17
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episod

[2m[36m(pid=10979)[0m Success.
[2m[36m(pid=10977)[0m Success.
[2m[36m(pid=10980)[0m Success.
Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_15-48-04
  done: false
  episode_len_mean: 1000.0
  episode_reward_max: 16.41499999999998
  episode_reward_mean: 0.9746666666666733
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 3
  episodes_total: 3
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1151.316
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 0.4704775810241699
        mean_q: -0.23645468056201935
        mean_td_error: 0.016084536910057068
        min_q: -0.642932653427124
        model: {}
    max_exploration: 0.802432
    min_exploration: 0.802432
    num_steps_sampled: 3024
    num_steps_trained: 1680000
    num_target_updates: 6
    opt_peak_throughput: 8685.716
    opt_samples: 10000.0
    replay_time_ms: 1362.96
    sample_time_ms: 294.482
   

Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_15-51-44
  done: false
  episode_len_mean: 1000.0
  episode_reward_max: 16.41499999999998
  episode_reward_mean: 0.9746666666666733
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 0
  episodes_total: 3
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1143.536
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 0.568102240562439
        mean_q: -0.0936928316950798
        mean_td_error: 0.009166549891233444
        min_q: -0.4940423369407654
        model: {}
    max_exploration: 0.703648
    min_exploration: 0.703648
    num_steps_sampled: 4032
    num_steps_trained: 2520000
    num_target_updates: 8
    opt_peak_throughput: 8744.803
    opt_samples: 10000.0
    replay_time_ms: 1359.73
    sample_time_ms: 115.019
    update_time_ms: 5.569
  iterations_since_restore: 4
  node_ip: 192.168.0.48
  num_healthy_workers: 3
 

Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_15-55-32
  done: false
  episode_len_mean: 867.5
  episode_reward_max: 16.41499999999998
  episode_reward_mean: -2.7812499999999964
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1201.25
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 0.689331591129303
        mean_q: -0.0057802628725767136
        mean_td_error: -0.0013812436955049634
        min_q: -0.37617671489715576
        model: {}
    max_exploration: 0.6048640000000001
    min_exploration: 0.6048640000000001
    num_steps_sampled: 5040
    num_steps_trained: 3360000
    num_target_updates: 10
    opt_peak_throughput: 8324.659
    opt_samples: 10000.0
    replay_time_ms: 1409.383
    sample_time_ms: 176.05
    update_time_ms: 5.458
  iterations_since_restore: 5
  node_ip: 192.168.0.48


Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_15-59-24
  done: false
  episode_len_mean: 911.6666666666666
  episode_reward_max: 23.909999999999982
  episode_reward_mean: 5.966166666666659
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 2
  episodes_total: 6
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1172.887
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 0.9865819811820984
        mean_q: 0.08346664160490036
        mean_td_error: -0.014274551533162594
        min_q: -0.2908143401145935
        model: {}
    max_exploration: 0.5060800000000001
    min_exploration: 0.5060800000000001
    num_steps_sampled: 6048
    num_steps_trained: 4200000
    num_target_updates: 12
    opt_peak_throughput: 8525.968
    opt_samples: 10000.0
    replay_time_ms: 1404.662
    sample_time_ms: 301.996
    update_time_ms: 5.669
  iterations_since_restore: 6
  node_ip: 192.

[2m[36m(pid=10980)[0m Success.
Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_16-06-56
  done: false
  episode_len_mean: 924.2857142857143
  episode_reward_max: 23.909999999999982
  episode_reward_mean: 3.0272857142857044
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1171.673
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 1.0474700927734375
        mean_q: 0.23862913250923157
        mean_td_error: 0.010609126649796963
        min_q: -0.2211528718471527
        model: {}
    max_exploration: 0.308512
    min_exploration: 0.308512
    num_steps_sampled: 8064
    num_steps_trained: 5880000
    num_target_updates: 16
    opt_peak_throughput: 8534.808
    opt_samples: 10000.0
    replay_time_ms: 1409.854
    sample_time_ms: 153.703
    update_time_ms: 6.145
  iterations_since_restore: 8
 

[2m[36m(pid=10979)[0m Success.
[2m[36m(pid=10977)[0m Success.
Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_16-10-42
  done: false
  episode_len_mean: 941.1111111111111
  episode_reward_max: 23.909999999999982
  episode_reward_mean: 3.738111111111108
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1135.237
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 1.1324269771575928
        mean_q: 0.3515506982803345
        mean_td_error: 0.01290928479284048
        min_q: -0.2059687376022339
        model: {}
    max_exploration: 0.20972800000000003
    min_exploration: 0.20972800000000003
    num_steps_sampled: 9072
    num_steps_trained: 6720000
    num_target_updates: 18
    opt_peak_throughput: 8808.734
    opt_samples: 10000.0
    replay_time_ms: 1363.637
    sample_time_ms: 267.877
    

Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_16-14-23
  done: false
  episode_len_mean: 941.1111111111111
  episode_reward_max: 23.909999999999982
  episode_reward_mean: 3.7381111111111087
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 0
  episodes_total: 9
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1133.888
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 1.2864742279052734
        mean_q: 0.43044713139533997
        mean_td_error: 0.017527569085359573
        min_q: -0.21733804047107697
        model: {}
    max_exploration: 0.11094400000000004
    min_exploration: 0.11094400000000004
    num_steps_sampled: 10080
    num_steps_trained: 7560000
    num_target_updates: 20
    opt_peak_throughput: 8819.214
    opt_samples: 10000.0
    replay_time_ms: 1377.797
    sample_time_ms: 145.093
    update_time_ms: 5.773
  iterations_since_restore: 10
  node_ip:

Result for DQN_IssyEnv1-v0_0:
  custom_metrics: {}
  date: 2020-04-14_16-18-08
  done: false
  episode_len_mean: 947.0
  episode_reward_max: 23.909999999999982
  episode_reward_mean: 2.320499999999995
  episode_reward_min: -15.152999999999954
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: b3ea59ed448945129499dc5e0ad92044
  hostname: lino-iMac
  info:
    grad_time_ms: 1134.956
    learner:
      default_policy:
        cur_lr: 0.0005000000237487257
        max_q: 1.403146743774414
        mean_q: 0.5150392055511475
        mean_td_error: 0.01647764816880226
        min_q: -0.23451370000839233
        model: {}
    max_exploration: 0.020000000000000018
    min_exploration: 0.020000000000000018
    num_steps_sampled: 11088
    num_steps_trained: 8400000
    num_target_updates: 22
    opt_peak_throughput: 8810.911
    opt_samples: 10000.0
    replay_time_ms: 1383.193
    sample_time_ms: 154.746
    update_time_ms: 5.909
  iterations_since_restore: 11
  node_ip: 192.168.0.48