# Train Vehicles Agents


- crée un network à partir d'un fichier .osm et des trajectoires de véhiculess
- ajoute un flux de voiture sur les routes
- personnalise un Environnement pour le RL
- integre l'environnement pour RLlib et execute la simulation


In [1]:
from flow.core.params import VehicleParams
from flow.core.params import NetParams, SumoCarFollowingParams,SumoLaneChangeParams
from flow.core.params import InitialConfig
from flow.core.params import EnvParams
from flow.core.params import SumoParams
from flow.controllers import RLController, IDMController
from flow.networks.IssyOSMNetwork import IssyOSMNetwork
from flow.core.params import InFlows

## Importation du network d'Issy

On vérifie si IssyOSMNetwork est bien importé.

In [2]:
from flow.networks.IssyOSMNetwork import ADDITIONAL_NET_PARAMS, EDGES_DISTRIBUTION

print(ADDITIONAL_NET_PARAMS)
print(EDGES_DISTRIBUTION)

{'speed_limit': 50}
['-100822066', '4794817', '4783299#0', '155558218']


## Ajoute les flux de voiture

`IDMController` : The Intelligent Driver Model is a car-following model specifying vehicle dynamics by a differential equation for acceleration $\dot{v}$.

`RLController` : a trainable autuonomous vehicle whose actions are dictated by an RL agent. 

In [3]:
import MinicityRouter

In [4]:
vehicles = VehicleParams()
vehicles.add("human",
             acceleration_controller=(IDMController, {}),
             car_following_params=SumoCarFollowingParams(
                 speed_mode="right_of_way"),
             lane_change_params=SumoLaneChangeParams(
                 lane_change_mode=2722)
             )

vehicles.add("rl",
             acceleration_controller=(RLController, {}),
             car_following_params=SumoCarFollowingParams(
                 speed_mode="right_of_way"),
             lane_change_params=SumoLaneChangeParams(
                 lane_change_mode=2722)
            )

- `vehs_per_hour`: nombre de vehicule par heure, uniformément espacés. Par exemple, comme il y a $60 \times 60 = 3600$ secondes dans une heure, le parametre $\frac{3600}{5}=720$ va faire rentrer des vehicules dans le network toutes les $5$ secondes.

- `probability`: c'est la probabilité qu'un véhicule entre dans le network toutes les secondes. Par exemple, si on la fixe à $0.2$, alors chaque seconde de la simulation un véhicule aura $\frac{1}{5}$ chance d'entrer dans le network

- `period`: C'est le temps en secondes entre 2 véhicules qui sont insérés. Par exemple, le fixer à $5$ ferait rentrer des véhicules dans le network toutes les $5$ secondes (ce qui équivaut à mettre `vehs_per_hour` à $720$).

<font color='red'>
$\rightarrow$ Exactement 1 seul de ces 3 paramètres doit être configurer !
</font>

## Lance une simulation avec Training RLlib

Pour qu'un environnement puisse être entrainé, l'environnement doit être accessible via l'importation à partir de flow.envs. 


<font color='red'>
Copier l'environnement créé dans un fichier .py et on importe l'environnement dans `flow.envs.__init__.py`.
Mettre le chemin absolu du fichier .osm .
</font> 

In [5]:
import json, ray
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import VehicleParams, SumoCarFollowingParams

#from flow.envs import AccelEnv as myEnv
#ADDITIONAL_ENV_PARAMS = {"target_velocity": 20,"sort_vehicles": False,"max_accel": 1,"max_decel": 1}
from myenv import myEnv
ADDITIONAL_ENV_PARAMS = {"max_accel": 2, "max_decel": 2}

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [6]:
HORIZON = 2000

In [7]:
# number of rollouts per training iteration
N_ROLLOUTS = 10
# number of parallel workers
N_CPUS = 2

# SUMO PARAM
sumo_params = SumoParams(sim_step=0.2, 
                         print_warnings=False, 
                         render=False, 
                         restart_instance=True,
                         overtake_right=True)

# ENVIRONMENT PARAM
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS, horizon=HORIZON)

# NETWORK PARAM
path_file  = '/home/lino/Documents/DQN_CIL4SYS/DQN_CIL4SYS/notebooks/issy.osm'
net_params = NetParams(osm_path=path_file) #inflows=inflow, 

# NETWORK
network = IssyOSMNetwork

# INITIAL CONFIG
initial_config = InitialConfig(edges_distribution=EDGES_DISTRIBUTION)


flow_params = dict( exp_tag   = "ISSY_RL_trial01", 
                    env_name  = myEnv,  
                    network   = IssyOSMNetwork,
                    simulator = 'traci', # simulator that is used by the experiment
                    sim       = sumo_params,
                    env       = env_params,
                    net       = net_params,
                    veh       = vehicles,
                    initial   = initial_config)

def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run   = "PPO"
    agent_cls = get_agent_class(alg_run)
    config    = agent_cls._default_config.copy()
    config["num_workers"]      = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"]            = 0.999  # discount rate
    config["use_gae"]          = True
    config["lambda"]           = 0.97
    config["kl_target"]        = 0.02
    config["num_sgd_iter"]     = 10
    config['clip_actions']     = False  # FIXME(ev) temporary ray bug
    config["horizon"]          = HORIZON
    config["model"].update({"fcnet_hiddens": [3, 3]})
    
    # save the flow params for replay
    flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    
    return alg_run, gym_name, config


alg_run, gym_name, config = setup_exps()

ray.init(num_cpus=N_CPUS + 1)

exp_tag = {"run": alg_run,
           "env": gym_name,
           "config": {**config},
           "checkpoint_freq": 500,
           "checkpoint_at_end": True,
           "max_failures": 5,
           "stop": {"training_iteration": 5}}

trials = run_experiments({flow_params["exp_tag"]: exp_tag}, verbose=0)

2020-05-09 17:56:24,837	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-05-09_17-56-24_836698_3536/logs.
2020-05-09 17:56:25,119	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:54752 to respond...
2020-05-09 17:56:25,387	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:22151 to respond...
2020-05-09 17:56:25,392	INFO services.py:809 -- Starting Redis shard with 2.93 GB max memory.
2020-05-09 17:56:25,436	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-05-09_17-56-24_836698_3536/logs.
2020-05-09 17:56:25,440	INFO services.py:1475 -- Starting the Plasma object store with 4.4 GB memory using /dev/shm.
2020-05-09 17:56:26,217	INFO trial_runner.py:176 -- Starting a new experiment.
2020-05-09 17:56:26,286	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


[2m[36m(pid=3572)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=3572)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=3572)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=3572)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=3572)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=3572)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=3572)[0m Success.
[2m[36m(pid=3572)[0m 2020-05-09 17:56:31,615	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=3572)[0m 2020-05-09 17:56:31.633368: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=3572)[0m   out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
[2m[36m(pid=3572)[0m 2020-05-09 17:56:32,170	INFO dynami

[2m[36m(pid=3573)[0m Success.
[2m[36m(pid=3574)[0m Success.
[2m[36m(pid=3572)[0m 2020-05-09 17:56:36.604832: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3572)[0m 2020-05-09 17:56:36.604891: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3572)[0m 2020-05-09 17:56:36.604916: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3572)[0m 2020-05-09 17:56:36.722543: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3572)[0m 2020-05-09 17:56:36.722612: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3572)[0m 2020-05-09 17:56:36.722632: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3572)[0m 2020-05-09 17:56:36.756268: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0

[2m[36m(pid=3573)[0m Success.
[2m[36m(pid=3574)[0m Success.
[2m[36m(pid=3574)[0m 2020-05-09 17:56:39,785	INFO sampler.py:304 -- Raw obs from env: {0: {'agent0': np.ndarray((0,), dtype=float64)}}
[2m[36m(pid=3574)[0m 2020-05-09 17:56:39,785	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=3574)[0m 2020-05-09 17:56:39,786	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=3574)[0m 2020-05-09 17:56:39,786	INFO sampler.py:407 -- Filtered obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=3574)[0m 2020-05-09 17:56:39,787	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=3574)[0m 
[2m[36m(pid=3574)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=3574)[0m                                   'env_id': 0,
[2m[36m(pid=3574)[0m                                   'info': None,
[2m[36m(pid=3574)[0m                                   'obs': np.ndarray((0,), dtype=float64

2020-05-09 17:56:40,075	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=3572, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/

[2m[36m(pid=3572)[0m 2020-05-09 17:56:40,038	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=3574)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=3573)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=3741)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=3741)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=3741)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=3741)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=3741)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=3741)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=3741)[0m Success.
[2m[36m(pid=3741)[0m 2020-05-09 17:56:44,369	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=3741)[0m 202

[2m[36m(pid=3783)[0m Success.
[2m[36m(pid=3782)[0m Success.
[2m[36m(pid=3741)[0m 2020-05-09 17:56:49.511057: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3741)[0m 2020-05-09 17:56:49.511105: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3741)[0m 2020-05-09 17:56:49.511127: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3741)[0m 2020-05-09 17:56:49.625369: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3741)[0m 2020-05-09 17:56:49.625437: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3741)[0m 2020-05-09 17:56:49.625457: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3741)[0m 2020-05-09 17:56:49.653340: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0

[2m[36m(pid=3783)[0m Success.
[2m[36m(pid=3782)[0m Success.


2020-05-09 17:56:53,218	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=3741, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/

[2m[36m(pid=3783)[0m 2020-05-09 17:56:53,124	INFO sampler.py:304 -- Raw obs from env: {0: {'agent0': np.ndarray((0,), dtype=float64)}}
[2m[36m(pid=3783)[0m 2020-05-09 17:56:53,125	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=3783)[0m 2020-05-09 17:56:53,125	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=3783)[0m 2020-05-09 17:56:53,126	INFO sampler.py:407 -- Filtered obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=3783)[0m 2020-05-09 17:56:53,127	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=3783)[0m 
[2m[36m(pid=3783)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=3783)[0m                                   'env_id': 0,
[2m[36m(pid=3783)[0m                                   'info': None,
[2m[36m(pid=3783)[0m                                   'obs': np.ndarray((0,), dtype=float64),
[2m[36m(pid=3783)[0m                                   'prev

[2m[36m(pid=3788)[0m Success.
[2m[36m(pid=3788)[0m 2020-05-09 17:56:56,870	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=3788)[0m 2020-05-09 17:56:56.871515: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=3788)[0m   out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
[2m[36m(pid=3788)[0m 2020-05-09 17:56:57,016	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=3788)[0m 
[2m[36m(pid=3788)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=3788)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 0) dtype=float32>,
[2m[36m(pid=3788)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=3788)[0m   'behaviour_lo

[2m[36m(pid=3939)[0m Success.
[2m[36m(pid=3940)[0m Success.
[2m[36m(pid=3788)[0m 2020-05-09 17:57:01.525251: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3788)[0m 2020-05-09 17:57:01.525313: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3788)[0m 2020-05-09 17:57:01.525341: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3788)[0m 2020-05-09 17:57:01.637459: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3788)[0m 2020-05-09 17:57:01.637517: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3788)[0m 2020-05-09 17:57:01.637541: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3788)[0m 2020-05-09 17:57:01.665647: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0

[2m[36m(pid=3939)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=3940)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=3939)[0m 2020-05-09 17:57:03.689813: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3939)[0m 2020-05-09 17:57:03.689873: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3939)[0m 2020-05-09 17:57:03.689888: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3939)[0m 2020-05-09 17:57:03.689898: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3940)[0m 2020-05-09 17:57:03.741720: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3940)[0m 2020-05-09 17:57:03.741839: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes

2020-05-09 17:57:05,051	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=3788, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/

[2m[36m(pid=3939)[0m 2020-05-09 17:57:04,967	INFO sampler.py:304 -- Raw obs from env: {0: {'agent0': np.ndarray((0,), dtype=float64)}}
[2m[36m(pid=3939)[0m 2020-05-09 17:57:04,967	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=3939)[0m 2020-05-09 17:57:04,968	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=3939)[0m 2020-05-09 17:57:04,968	INFO sampler.py:407 -- Filtered obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=3939)[0m 2020-05-09 17:57:04,969	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=3939)[0m 
[2m[36m(pid=3939)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=3939)[0m                                   'env_id': 0,
[2m[36m(pid=3939)[0m                                   'info': None,
[2m[36m(pid=3939)[0m                                   'obs': np.ndarray((0,), dtype=float64),
[2m[36m(pid=3939)[0m                                   'prev

[2m[36m(pid=3946)[0m Success.
[2m[36m(pid=3946)[0m 2020-05-09 17:57:08,907	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=3946)[0m 2020-05-09 17:57:08.908434: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=3946)[0m   out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
[2m[36m(pid=3946)[0m 2020-05-09 17:57:09,065	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=3946)[0m 
[2m[36m(pid=3946)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=3946)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 0) dtype=float32>,
[2m[36m(pid=3946)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=3946)[0m   'behaviour_lo

[2m[36m(pid=4095)[0m Success.
[2m[36m(pid=4094)[0m Success.
[2m[36m(pid=3946)[0m 2020-05-09 17:57:13.513154: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3946)[0m 2020-05-09 17:57:13.513201: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3946)[0m 2020-05-09 17:57:13.513234: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3946)[0m 2020-05-09 17:57:13.626723: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=3946)[0m 2020-05-09 17:57:13.626779: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=3946)[0m 2020-05-09 17:57:13.626803: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=3946)[0m 2020-05-09 17:57:13.654447: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0

[2m[36m(pid=4095)[0m Success.
[2m[36m(pid=4094)[0m Success.


2020-05-09 17:57:16,981	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=3946, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/

[2m[36m(pid=4095)[0m 2020-05-09 17:57:16,903	INFO sampler.py:304 -- Raw obs from env: {0: {'agent0': np.ndarray((0,), dtype=float64)}}
[2m[36m(pid=4095)[0m 2020-05-09 17:57:16,903	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=4095)[0m 2020-05-09 17:57:16,904	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=4095)[0m 2020-05-09 17:57:16,904	INFO sampler.py:407 -- Filtered obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=4095)[0m 2020-05-09 17:57:16,905	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=4095)[0m 
[2m[36m(pid=4095)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=4095)[0m                                   'env_id': 0,
[2m[36m(pid=4095)[0m                                   'info': None,
[2m[36m(pid=4095)[0m                                   'obs': np.ndarray((0,), dtype=float64),
[2m[36m(pid=4095)[0m                                   'prev

[2m[36m(pid=4100)[0m Success.
[2m[36m(pid=4100)[0m 2020-05-09 17:57:20,686	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4100)[0m 2020-05-09 17:57:20.687481: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=4100)[0m   out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
[2m[36m(pid=4100)[0m 2020-05-09 17:57:20.808945: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4100)[0m 2020-05-09 17:57:20.808980: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4100)[0m 2020-05-09 17:57:20.825862: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4100)[0m 2020-05-09 17:57:20,834	INFO dynamic_tf_policy.py:324 -- Initializing loss function 

[2m[36m(pid=4250)[0m Success.
[2m[36m(pid=4249)[0m Success.
[2m[36m(pid=4100)[0m 2020-05-09 17:57:25.399649: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4100)[0m 2020-05-09 17:57:25.399705: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4100)[0m 2020-05-09 17:57:25.399726: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4100)[0m 2020-05-09 17:57:25.510345: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4100)[0m 2020-05-09 17:57:25.510401: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4100)[0m 2020-05-09 17:57:25.510420: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4100)[0m 2020-05-09 17:57:25.538210: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0

[2m[36m(pid=4249)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=4250)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=4249)[0m 2020-05-09 17:57:27.493283: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4249)[0m 2020-05-09 17:57:27.493322: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4249)[0m 2020-05-09 17:57:27.493341: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4249)[0m 2020-05-09 17:57:27.493350: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4249)[0m 2020-05-09 17:57:27.493360: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4249)[0m 2020-05-09 17:57:27.493367: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes

[2m[36m(pid=4250)[0m Success.
[2m[36m(pid=4249)[0m Success.


2020-05-09 17:57:28,839	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=4100, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/

[2m[36m(pid=4249)[0m 2020-05-09 17:57:28,768	INFO sampler.py:304 -- Raw obs from env: {0: {'agent0': np.ndarray((0,), dtype=float64)}}
[2m[36m(pid=4249)[0m 2020-05-09 17:57:28,768	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=4249)[0m 2020-05-09 17:57:28,769	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=4249)[0m 2020-05-09 17:57:28,769	INFO sampler.py:407 -- Filtered obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=4249)[0m 2020-05-09 17:57:28,770	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=4249)[0m 
[2m[36m(pid=4249)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=4249)[0m                                   'env_id': 0,
[2m[36m(pid=4249)[0m                                   'info': None,
[2m[36m(pid=4249)[0m                                   'obs': np.ndarray((0,), dtype=float64),
[2m[36m(pid=4249)[0m                                   'prev

[2m[36m(pid=4255)[0m Success.
[2m[36m(pid=4255)[0m 2020-05-09 17:57:32,387	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4255)[0m 2020-05-09 17:57:32.388642: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
[2m[36m(pid=4255)[0m   out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
[2m[36m(pid=4255)[0m 2020-05-09 17:57:32,536	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=4255)[0m 
[2m[36m(pid=4255)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4255)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 0) dtype=float32>,
[2m[36m(pid=4255)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4255)[0m   'behaviour_lo

[2m[36m(pid=4405)[0m Success.
[2m[36m(pid=4404)[0m Success.
[2m[36m(pid=4255)[0m 2020-05-09 17:57:37.307121: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4255)[0m 2020-05-09 17:57:37.307179: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4255)[0m 2020-05-09 17:57:37.307204: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4255)[0m 2020-05-09 17:57:37.422562: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4255)[0m 2020-05-09 17:57:37.422628: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4255)[0m 2020-05-09 17:57:37.422658: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4255)[0m 2020-05-09 17:57:37.451219: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0

[2m[36m(pid=4405)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=4404)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=4405)[0m 2020-05-09 17:57:39.486133: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4405)[0m 2020-05-09 17:57:39.486175: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4405)[0m 2020-05-09 17:57:39.486196: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4405)[0m 2020-05-09 17:57:39.486207: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4405)[0m 2020-05-09 17:57:39.486218: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4405)[0m 2020-05-09 17:57:39.486227: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes

2020-05-09 17:57:40,876	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=4255, host=lino-iMac)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/lino/anaconda3/envs/flow/lib/python3.6/site-packages/ray/

[2m[36m(pid=4405)[0m 2020-05-09 17:57:40,797	INFO sampler.py:304 -- Raw obs from env: {0: {'agent0': np.ndarray((0,), dtype=float64)}}
[2m[36m(pid=4405)[0m 2020-05-09 17:57:40,797	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=4405)[0m 2020-05-09 17:57:40,798	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=4405)[0m 2020-05-09 17:57:40,798	INFO sampler.py:407 -- Filtered obs: np.ndarray((0,), dtype=float64)
[2m[36m(pid=4405)[0m 2020-05-09 17:57:40,799	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=4405)[0m 
[2m[36m(pid=4405)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=4405)[0m                                   'env_id': 0,
[2m[36m(pid=4405)[0m                                   'info': None,
[2m[36m(pid=4405)[0m                                   'obs': np.ndarray((0,), dtype=float64),
[2m[36m(pid=4405)[0m                                   'prev

TuneError: ('Trials did not complete', [PPO_myEnv-v0_0])