# Train Vehicles Agents


- crée un network à partir d'un fichier .osm et des trajectoires de véhiculess
- ajoute un flux de voiture sur les routes
- personnalise un Environnement pour le RL
- integre l'environnement pour RLlib et execute la simulation


In [1]:
from flow.core.params import VehicleParams
from flow.core.params import NetParams, SumoCarFollowingParams
from flow.core.params import InitialConfig
from flow.core.params import EnvParams
from flow.core.params import SumoParams
from flow.controllers import RLController, IDMController
from flow.networks.IssyOSMNetwork import IssyOSMNetwork
from flow.core.params import InFlows

## Importation du network d'Issy

On vérifie si IssyOSMNetwork est bien importé.

In [2]:
from flow.networks.IssyOSMNetwork import ADDITIONAL_NET_PARAMS, EDGES_DISTRIBUTION

print(ADDITIONAL_NET_PARAMS)
print(EDGES_DISTRIBUTION)

{'speed_limit': 50}
['-100822066', '4794817', '4783299#0', '155558218']


## Ajoute les flux de voiture

`IDMController` : The Intelligent Driver Model is a car-following model specifying vehicle dynamics by a differential equation for acceleration $\dot{v}$.

`RLController` : a trainable autuonomous vehicle whose actions are dictated by an RL agent. 

In [3]:
import MinicityRouter

In [4]:
vehicles = VehicleParams()
vehicles.add("human",
             acceleration_controller=(IDMController, {}),
             num_vehicles=20)

vehicles.add("rl",
             acceleration_controller=(RLController, {}),
             num_vehicles=5)

- `vehs_per_hour`: nombre de vehicule par heure, uniformément espacés. Par exemple, comme il y a $60 \times 60 = 3600$ secondes dans une heure, le parametre $\frac{3600}{5}=720$ va faire rentrer des vehicules dans le network toutes les $5$ secondes.

- `probability`: c'est la probabilité qu'un véhicule entre dans le network toutes les secondes. Par exemple, si on la fixe à $0.2$, alors chaque seconde de la simulation un véhicule aura $\frac{1}{5}$ chance d'entrer dans le network

- `period`: C'est le temps en secondes entre 2 véhicules qui sont insérés. Par exemple, le fixer à $5$ ferait rentrer des véhicules dans le network toutes les $5$ secondes (ce qui équivaut à mettre `vehs_per_hour` à $720$).

<font color='red'>
$\rightarrow$ Exactement 1 seul de ces 3 paramètres doit être configurer !
</font>

In [5]:
inflow = InFlows()
 
for edge in EDGES_DISTRIBUTION:
    inflow.add(edge = edge,
               veh_type = "human",
               probability = 0.3,
               depart_lane = "best",
               name = "human",
               depart_speed = 7)

## Lance une simulation avec Training RLlib

Pour qu'un environnement puisse être entrainé, l'environnement doit être accessible via l'importation à partir de flow.envs. 


<font color='red'>
Copier l'environnement créé dans un fichier .py et on importe l'environnement dans `flow.envs.__init__.py`.
Mettre le chemin absolu du fichier .osm .
</font> 

In [6]:
import json, ray
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import VehicleParams, SumoCarFollowingParams

#from flow.envs import AccelEnv as myEnv
#ADDITIONAL_ENV_PARAMS = {"target_velocity": 20,"sort_vehicles": False,"max_accel": 1,"max_decel": 1}
from myenv import myEnv
ADDITIONAL_ENV_PARAMS = {"max_accel": 2, "max_decel": 2}

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [7]:
HORIZON = 2000

In [8]:
# number of rollouts per training iteration
N_ROLLOUTS = 10
# number of parallel workers
N_CPUS = 2

# SUMO PARAM
sumo_params = SumoParams(sim_step=0.1, render=False, restart_instance=True)

# ENVIRONMENT PARAM
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS, horizon=HORIZON)

# NETWORK PARAM
path_file  = '/home/lino/Documents/DQN_CIL4SYS/DQN_CIL4SYS/notebooks/issy.osm'
net_params = NetParams(inflows=inflow, osm_path=path_file) 

# NETWORK
network = IssyOSMNetwork

# INITIAL CONFIG
initial_config = InitialConfig(edges_distribution=EDGES_DISTRIBUTION)


flow_params = dict( exp_tag   = "ISSY_RL_train", 
                    env_name  = myEnv,  
                    network   = IssyOSMNetwork,
                    simulator = 'traci', # simulator that is used by the experiment
                    sim       = sumo_params,
                    env       = env_params,
                    net       = net_params,
                    veh       = vehicles,
                    initial   = initial_config)

def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run   = "PPO"
    agent_cls = get_agent_class(alg_run)
    config    = agent_cls._default_config.copy()
    config["num_workers"]      = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"]            = 0.999  # discount rate
    config["use_gae"]          = True
    config["lambda"]           = 0.97
    config["kl_target"]        = 0.02
    config["num_sgd_iter"]     = 10
    config['clip_actions']     = False  # FIXME(ev) temporary ray bug
    config["horizon"]          = HORIZON
    config["model"].update({"fcnet_hiddens": [3, 3]})
    
    # save the flow params for replay
    flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    
    return alg_run, gym_name, config


alg_run, gym_name, config = setup_exps()

ray.init(num_cpus=N_CPUS + 1)

exp_tag = {"run": alg_run,
           "env": gym_name,
           "config": {**config},
           "checkpoint_freq": 500,
           "checkpoint_at_end": True,
           "max_failures": 5,
           "stop": {"training_iteration": 5}}

trials = run_experiments({flow_params["exp_tag"]: exp_tag}, verbose=0)

2020-04-13 18:46:58,143	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-04-13_18-46-58_142775_10269/logs.
2020-04-13 18:46:58,254	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:51713 to respond...
2020-04-13 18:46:58,374	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:64799 to respond...
2020-04-13 18:46:58,380	INFO services.py:809 -- Starting Redis shard with 2.93 GB max memory.
2020-04-13 18:46:58,407	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-04-13_18-46-58_142775_10269/logs.
2020-04-13 18:46:58,410	INFO services.py:1475 -- Starting the Plasma object store with 4.4 GB memory using /dev/shm.
2020-04-13 18:46:58,555	INFO trial_runner.py:176 -- Starting a new experiment.
2020-04-13 18:46:58,597	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


[2m[36m(pid=10303)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=10303)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=10303)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=10303)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=10303)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=10303)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=10303)[0m Success.
[2m[36m(pid=10303)[0m 2020-04-13 18:47:02,801	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10303)[0m 2020-04-13 18:47:02.802167: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10303)[0m 2020-04-13 18:47:02,979	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid

[2m[36m(pid=10304)[0m Success.
[2m[36m(pid=10302)[0m Success.
[2m[36m(pid=10304)[0m 2020-04-13 18:47:08,306	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10304)[0m 2020-04-13 18:47:08.320345: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10302)[0m 2020-04-13 18:47:08,420	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10302)[0m 2020-04-13 18:47:08.436405: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10302)[0m 2020-04-13 18:47:08,632	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10302)[0m 
[2m[36m(pid=10302)[0m { 'action_prob': <tf

[2m[36m(pid=10302)[0m Success.
[2m[36m(pid=10302)[0m 2020-04-13 18:47:11,228	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)}}
[2m[36m(pid=10302)[0m 2020-04-13 18:47:11,228	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10302)[0m 2020-04-13 18:47:11,228	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10302)[0m 2020-04-13 18:47:11,229	INFO sampler.py:407 -- Filtered obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10302)[0m 2020-04-13 18:47:11,230	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10302)[0m 
[2m[36m(pid=10302)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10302)[0m                                   'env_id': 0,
[2m[36m(pid=10302)[0m                                   'info': None,
[2m

2020-04-13 18:47:11,575	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10303, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray

[2m[36m(pid=10303)[0m 2020-04-13 18:47:11,571	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10304)[0m 2020-04-13 18:47:11,549	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 5) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 10) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 50) dtype=float32>: [array([9.21815129e+00, 9.21815129e+00, 3.50118362e+01, 3.50118362e+01,
[2m[36m(pid=10304)[0m        6.08925075e+01, 6.08925075e+01, 3.23526545e+02, 3.49319345e+02,
[2m[36m(pid=10304)[0m        3.75112145e+02, 4.00904951e+02, 4.26698636e+02, 4.52579307e

[2m[36m(pid=10473)[0m Success.
[2m[36m(pid=10473)[0m 2020-04-13 18:47:16,076	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10473)[0m 2020-04-13 18:47:16.077344: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10473)[0m 2020-04-13 18:47:16,246	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10473)[0m 
[2m[36m(pid=10473)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10473)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 5) dtype=float32>,
[2m[36m(pid=10473)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10473)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 10) dtype=float32

[2m[36m(pid=10472)[0m Success.
[2m[36m(pid=10471)[0m Success.
[2m[36m(pid=10472)[0m 2020-04-13 18:47:21,371	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10472)[0m 2020-04-13 18:47:21.383456: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10471)[0m 2020-04-13 18:47:21,457	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10471)[0m 2020-04-13 18:47:21.471704: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10472)[0m 2020-04-13 18:47:21,565	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10472)[0m 
[2m[36m(pid=10472)[0m { 'action_prob': <tf

[2m[36m(pid=10472)[0m 2020-04-13 18:47:24,073	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)}}
[2m[36m(pid=10472)[0m 2020-04-13 18:47:24,074	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10472)[0m 2020-04-13 18:47:24,074	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10472)[0m 2020-04-13 18:47:24,074	INFO sampler.py:407 -- Filtered obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10472)[0m 2020-04-13 18:47:24,075	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10472)[0m 
[2m[36m(pid=10472)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10472)[0m                                   'env_id': 0,
[2m[36m(pid=10472)[0m                                   'info': None,
[2m[36m(pid=10472)[0m               

2020-04-13 18:47:24,653	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10473, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray

[2m[36m(pid=10473)[0m 2020-04-13 18:47:24,650	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10471)[0m 2020-04-13 18:47:24,645	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 5) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 10) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 50) dtype=float32>: [array([ 1.29034455e+01,  1.29034455e+01,  3.87087252e+01,  3.87087252e+01,
[2m[36m(pid=10471)[0m        -1.00100000e+03, -1.00100000e+03,  3.27211710e+02,  3.53004510e+02,
[2m[36m(pid=10471)[0m         3.78797311e+02,  4.04590189e+02,  4.30387711e+02, 

[2m[36m(pid=10630)[0m Success.
[2m[36m(pid=10630)[0m 2020-04-13 18:47:29,106	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10630)[0m 2020-04-13 18:47:29.107540: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10630)[0m 2020-04-13 18:47:29,268	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10630)[0m 
[2m[36m(pid=10630)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10630)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 5) dtype=float32>,
[2m[36m(pid=10630)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10630)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 10) dtype=float32

[2m[36m(pid=10629)[0m Success.
[2m[36m(pid=10628)[0m Success.
[2m[36m(pid=10629)[0m 2020-04-13 18:47:34,302	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10629)[0m 2020-04-13 18:47:34.317072: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10628)[0m 2020-04-13 18:47:34,376	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10628)[0m 2020-04-13 18:47:34.390899: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10629)[0m 2020-04-13 18:47:34,526	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10629)[0m 
[2m[36m(pid=10629)[0m { 'action_prob': <tf

[2m[36m(pid=10629)[0m 2020-04-13 18:47:37,062	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)}}
[2m[36m(pid=10629)[0m 2020-04-13 18:47:37,062	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10629)[0m 2020-04-13 18:47:37,062	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10629)[0m 2020-04-13 18:47:37,063	INFO sampler.py:407 -- Filtered obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10629)[0m 2020-04-13 18:47:37,064	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10629)[0m 
[2m[36m(pid=10629)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10629)[0m                                   'env_id': 0,
[2m[36m(pid=10629)[0m                                   'info': None,
[2m[36m(pid=10629)[0m               

2020-04-13 18:47:37,471	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10630, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray

[2m[36m(pid=10630)[0m 2020-04-13 18:47:37,468	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10629)[0m 2020-04-13 18:47:37,464	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 5) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 10) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 50) dtype=float32>: [array([9.21815129e+00, 9.21815129e+00, 3.50118362e+01, 3.50118362e+01,
[2m[36m(pid=10629)[0m        6.08925075e+01, 6.08925075e+01, 3.23526545e+02, 3.49319345e+02,
[2m[36m(pid=10629)[0m        3.75112145e+02, 4.00904951e+02, 4.26698636e+02, 4.52579307e

[2m[36m(pid=10784)[0m Success.
[2m[36m(pid=10784)[0m 2020-04-13 18:47:41,996	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10784)[0m 2020-04-13 18:47:41.997275: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10784)[0m 2020-04-13 18:47:42,153	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10784)[0m 
[2m[36m(pid=10784)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10784)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 5) dtype=float32>,
[2m[36m(pid=10784)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10784)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 10) dtype=float32

[2m[36m(pid=10787)[0m Success.
[2m[36m(pid=10786)[0m Success.
[2m[36m(pid=10787)[0m 2020-04-13 18:47:47,194	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10787)[0m 2020-04-13 18:47:47.207395: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10786)[0m 2020-04-13 18:47:47,224	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10786)[0m 2020-04-13 18:47:47.237690: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10786)[0m 2020-04-13 18:47:47,399	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10786)[0m 
[2m[36m(pid=10786)[0m { 'action_prob': <tf

[2m[36m(pid=10786)[0m 2020-04-13 18:47:49,905	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)}}
[2m[36m(pid=10786)[0m 2020-04-13 18:47:49,906	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10786)[0m 2020-04-13 18:47:49,906	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10786)[0m 2020-04-13 18:47:49,906	INFO sampler.py:407 -- Filtered obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10786)[0m 2020-04-13 18:47:49,907	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10786)[0m 
[2m[36m(pid=10786)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10786)[0m                                   'env_id': 0,
[2m[36m(pid=10786)[0m                                   'info': None,
[2m[36m(pid=10786)[0m               

2020-04-13 18:47:50,265	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10784, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray

[2m[36m(pid=10784)[0m 2020-04-13 18:47:50,262	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10787)[0m 2020-04-13 18:47:50,255	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 5) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 10) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 50) dtype=float32>: [array([9.21815129e+00, 9.21815129e+00, 3.50118362e+01, 3.50118362e+01,
[2m[36m(pid=10787)[0m        6.08925075e+01, 6.08925075e+01, 3.23526545e+02, 3.49319345e+02,
[2m[36m(pid=10787)[0m        3.75112145e+02, 4.00904951e+02, 4.26698636e+02, 4.52579307e

[2m[36m(pid=10939)[0m Success.
[2m[36m(pid=10939)[0m 2020-04-13 18:47:54,711	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10939)[0m 2020-04-13 18:47:54.712965: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10939)[0m 2020-04-13 18:47:54,879	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10939)[0m 
[2m[36m(pid=10939)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10939)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 5) dtype=float32>,
[2m[36m(pid=10939)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10939)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 10) dtype=float32

[2m[36m(pid=10940)[0m Success.
[2m[36m(pid=10941)[0m Success.
[2m[36m(pid=10940)[0m 2020-04-13 18:47:59,916	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10940)[0m 2020-04-13 18:47:59.930123: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10941)[0m 2020-04-13 18:47:59,946	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10941)[0m 2020-04-13 18:47:59.959917: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=10940)[0m 2020-04-13 18:48:00,141	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10940)[0m 
[2m[36m(pid=10940)[0m { 'action_prob': <tf

[2m[36m(pid=10941)[0m Success.
[2m[36m(pid=10940)[0m Success.
[2m[36m(pid=10940)[0m 2020-04-13 18:48:02,767	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)}}
[2m[36m(pid=10940)[0m 2020-04-13 18:48:02,767	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10940)[0m 2020-04-13 18:48:02,768	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10940)[0m 2020-04-13 18:48:02,768	INFO sampler.py:407 -- Filtered obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=10940)[0m 2020-04-13 18:48:02,769	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10940)[0m 
[2m[36m(pid=10940)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10940)[0m                                   'env_id': 0,
[2m[36m(pid=10940)[0m                    

2020-04-13 18:48:03,232	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10939, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray

[2m[36m(pid=10939)[0m 2020-04-13 18:48:03,229	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10940)[0m 2020-04-13 18:48:03,231	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 5) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 10) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 50) dtype=float32>: [array([ 1.07543734e+01,  1.07543734e+01,  3.65496296e+01,  3.65496296e+01,
[2m[36m(pid=10940)[0m        -1.00100000e+03, -1.00100000e+03,  3.25062750e+02,  3.50855550e+02,
[2m[36m(pid=10940)[0m         3.76648351e+02,  4.02441173e+02,  4.28235985e+02, 

[2m[36m(pid=11097)[0m Success.
[2m[36m(pid=11097)[0m 2020-04-13 18:48:07,718	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11097)[0m 2020-04-13 18:48:07.719254: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=11097)[0m 2020-04-13 18:48:07,892	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11097)[0m 
[2m[36m(pid=11097)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11097)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 5) dtype=float32>,
[2m[36m(pid=11097)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11097)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 10) dtype=float32

[2m[36m(pid=11139)[0m Success.
[2m[36m(pid=11138)[0m Success.
[2m[36m(pid=11139)[0m 2020-04-13 18:48:13,928	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11139)[0m 2020-04-13 18:48:13.941238: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=11138)[0m 2020-04-13 18:48:14,040	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11138)[0m 2020-04-13 18:48:14.053774: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=11138)[0m 2020-04-13 18:48:14,232	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11138)[0m 
[2m[36m(pid=11138)[0m { 'action_prob': <tf

[2m[36m(pid=11138)[0m Success.
[2m[36m(pid=11138)[0m 2020-04-13 18:48:16,840	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)}}
[2m[36m(pid=11138)[0m 2020-04-13 18:48:16,840	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=11138)[0m 2020-04-13 18:48:16,841	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=11138)[0m 2020-04-13 18:48:16,841	INFO sampler.py:407 -- Filtered obs: np.ndarray((50,), dtype=float64, min=0.0, max=1997.571, mean=490.27)
[2m[36m(pid=11138)[0m 2020-04-13 18:48:16,842	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=11138)[0m 
[2m[36m(pid=11138)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=11138)[0m                                   'env_id': 0,
[2m[36m(pid=11138)[0m                                   'info': None,
[2m

2020-04-13 18:48:17,263	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=11097, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray

[2m[36m(pid=11097)[0m 2020-04-13 18:48:17,259	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=11138)[0m 2020-04-13 18:48:17,254	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 5) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 10) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 50) dtype=float32>: [array([9.21815129e+00, 9.21815129e+00, 3.50118362e+01, 3.50118362e+01,
[2m[36m(pid=11138)[0m        6.08925075e+01, 6.08925075e+01, 3.23526545e+02, 3.49319345e+02,
[2m[36m(pid=11138)[0m        3.75112145e+02, 4.00904951e+02, 4.26698636e+02, 4.52579307e

TuneError: ('Trials did not complete', [PPO_myEnv-v0_0])

2020-04-13 18:48:27,594	ERROR worker.py:1716 -- listen_error_messages_raylet: Error 111 connecting to 192.168.0.48:51713. Connection refused.
2020-04-13 18:48:27,596	ERROR worker.py:1616 -- print_logs: Error 111 connecting to 192.168.0.48:51713. Connection refused.
2020-04-13 18:48:27,598	ERROR import_thread.py:89 -- ImportThread: Error 111 connecting to 192.168.0.48:51713. Connection refused.
