# Networks from OpenStreetMap

- import networks from OpenStreetMap.
- integrate it and run it in Flow.


In [1]:
# the TestEnv environment is used to simply simulate the network
from flow.envs import TestEnv

# the Experiment class is used for running simulations
from flow.core.experiment import Experiment

# all other imports are standard
from flow.core.params import VehicleParams
from flow.core.params import NetParams, SumoCarFollowingParams
from flow.core.params import InitialConfig
from flow.core.params import EnvParams
from flow.core.params import SumoParams
from flow.controllers import RLController, IDMController
from flow.networks import Network
from flow.core.params import InFlows

## 1. Ajouter un flux de voiture

`IDMController` : The Intelligent Driver Model is a car-following model specifying vehicle dynamics by a differential equation for acceleration $\dot{v}$.

`RLController` : a trainable autuonomous vehicle whose actions are dictated by an RL agent. 

In [2]:
vehicles = VehicleParams()
vehicles.add("human",acceleration_controller=(IDMController, {}))
vehicles.add("rl",acceleration_controller=(RLController, {}),num_vehicles=2)

- `vehs_per_hour`: nombre de vehicule par heure, uniformément espacés. Par exemple, comme il y a $60 \times 60 = 3600$ secondes dans une heure, le parametre $\frac{3600}{5}=720$ va faire rentrer des vehicules dans le network toutes les $5$ secondes.

- `probability`: c'est la probabilité qu'un véhicule entre dans le network toutes les secondes. Par exemple, si on la fixe à $0.2$, alors chaque seconde de la simulation un véhicule aura $\frac{1}{5}$ chance d'entrer dans le network

- `period`: C'est le temps en secondes entre 2 véhicules qui sont insérés. Par exemple, le fixer à $5$ ferait rentrer des véhicules dans le network toutes les $5$ secondes (ce qui équivaut à mettre `vehs_per_hour` à $720$).

<font color='red'>
$\rightarrow$ Exactement 1 seul de ces 3 paramètres doit être configurer !
</font>

In [3]:
inflow = InFlows()

inflow.add(veh_type      = "human",
           edge          = "4794817",
           probability   = 0.3, 
           depart_speed  = 7,
           depart_lane   = "random")

inflow.add(veh_type      = "human",
           edge          = "4783299#0",
           probability   = 0.2,
           depart_speed  = 7,
           depart_lane   = "random")

inflow.add(veh_type       = "human",
           edge           = "-100822066",
           probability    = 0.25,
           depart_speed   = 7,
           depart_lane    = "random")

inflow.add(veh_type       = "rl",
           edge           = "-100822066",
           probability    = 0.05,
           depart_speed   = 7,
           depart_lane    = "random",
           color          = "blue")

inflow.add(veh_type       = "human",
           edge          = "155558218",
           probability   = 0.2,
           depart_speed  = 7,
           depart_lane   = "random")

## 2. Créer des itinéraires

In [4]:
# Specifie les noms des edges du network dont les vehicules peuvent être originaire
EDGES_DISTRIBUTION = [
    "-100822066",
    "4794817",
    "4783299#0",
    "155558218",
]

In [5]:
# créer une nouvelle classe Network pour spécifier les itinéraires possibles
class IssyOSMNetwork(Network):

    def specify_routes(self, net_params):
        return {
            "-100822066": [ #N
                "-100822066",
                "-352962858#1",
                "-352962858#0",
                "-4786940#1",
                 "-4786940#0",
            ],
            
            "4794817" : [ #Loop
                "4794817",
                "4786972#0",
                "4786972#1",
                "4786972#2",
                "4786965#1",
                "4786965#2",
                "4786965#3",
                "4795729",
                "-352962858#1",
                "4795742#0",
                "4795742#1",
                "4786965#3",
                "4786965#4",
                "4786965#5",
            ],
            
            "4783299#0": [    #E
                "4783299#0",
                "4783299#1",
                "4783299#2",
                "4783299#3",
                "4783299#4",
                "4783299#5",
                "4783299#6",
                "4786940#0",
                "4786940#1",
                "352962858#0",
                "352962858#1",
                "100822066",
            ],
            
            "155558218": [
                "155558218",
                "4786940#1",
                "352962858#0",
                "352962858#1",
                "100822066",
            ],     
        }

# Personnaliser un Environnement pour le RL

More accessor objects and methods can be found within the Flow documentation at: http://berkeleyflow.readthedocs.io/en/latest/

In [6]:
# import the base environment class
from flow.envs import Env
from gym.spaces.box import Box
from gym.spaces import Tuple
import numpy as np

## definition de la classe environnement 

In [7]:
class myEnv(Env):
    pass

## fonction action_space
2 actions possibles pour chaque véhicule RL : +1 acceleration ou -1 acceleration

In [8]:
class myEnv(myEnv): # update my environment class

    @property
    def action_space(self):
        num_actions = self.initial_vehicles.num_rl_vehicles
        accel_ub = self.env_params.additional_params["max_accel"]
        accel_lb = - abs(self.env_params.additional_params["max_decel"])

        return Box(low=accel_lb,
                   high=accel_ub,
                   shape=(num_actions,))

## observation_space
2 valeurs observé pour chaque véhicule: sa **position** et sa **vitesse**. En conséquence, nous avons besoin d'un espace d'observation qui est *deux fois plus grand que le nombre de véhicules* dans le network.

In [9]:
class myEnv(myEnv):  # update my environment class

    @property
    def observation_space(self):
        return Box(
            low=0,
            high=float("inf"),
            shape=(2*self.initial_vehicles.num_vehicles,))

## apply_rl_actions
`apply_rl_actions` : transforme les commandes de l'agent RL en actions réelles du simulateur.  

Pour notre exemple, l'agent RL peut spécifier que les accélérations des véhicules RL avec la fonction **apply_acceleration**

In [10]:
class myEnv(myEnv):  # update my environment class

    def _apply_rl_actions(self, rl_actions):
        # the names of all autonomous (RL) vehicles in the network
        rl_ids = self.k.vehicle.get_rl_ids()

        # use the base environment method to convert actions into accelerations for the rl vehicles
        self.k.vehicle.apply_acceleration(rl_ids, rl_actions)

## get_state

`get_state` : extrait des features de l'environnement et fournit ensuite des entrées à la stratégie fournie par l'agent RL. 

In [11]:
class myEnv(myEnv):  # update my environment class

    def get_state(self, **kwargs):
        # the get_ids() method is used to get the names of all vehicles in the network
        ids = self.k.vehicle.get_ids()

        # we use the get_absolute_position method to get the positions of all vehicles
        pos = [self.k.vehicle.get_x_by_id(veh_id) for veh_id in ids]

        # we use the get_speed method to get the velocities of all vehicles
        vel = [self.k.vehicle.get_speed(veh_id) for veh_id in ids]

        # the speeds and positions are concatenated to produce the state
        return np.concatenate((pos, vel))

## compute_reward

`compute_reward` : renvoie la récompense associée à un état donné. 

Ici, la fonction de récompense est la **vitesse moyenne de tous les véhicules actuellement sur le réseau**.

In [12]:
class myEnv(myEnv):  # update my environment class

    def compute_reward(self, rl_actions, **kwargs):
        # the get_ids() method is used to get the names of all vehicles in the network
        ids = self.k.vehicle.get_ids()

        # we next get a list of the speeds of all vehicles in the network
        speeds = self.k.vehicle.get_speed(ids)

        # finally, we return the average of all these speeds as the reward
        return np.mean(speeds)

# Testing 

In [13]:
HORIZON = 200

In [14]:
from flow.core.experiment import Experiment
from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams
from flow.core.params import VehicleParams

```python
# SUMO PARAM
sumo_params = SumoParams(sim_step=0.1, render=True)

# create VEHICLE
vehicles = VehicleParams()
vehicles.add(veh_id="human",
             acceleration_controller=(IDMController, {}),
             num_vehicles=22)

# ENVIRONMENT PARAM
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS, horizon=HORIZON)

# NETWORK PARAM
additional_net_params = ADDITIONAL_NET_PARAMS.copy()
net_params = NetParams(additional_params=additional_net_params, inflows=inflow, osm_path='issy.osm')

# NETWORK
network = IssyOSMNetwork

# INITIAL CONFIG
initial_config = InitialConfig(edges_distribution=EDGES_DISTRIBUTION) #spacing="random",

# dictionnaire FLOW_PARAM
flow_params = dict( exp_tag  = 'ISSY_RL_test',
                    env_name = myEnv,  # using my new environment for the simulation
                    network  = network,
                    simulator= 'traci',
                    sim      = sumo_params,
                    env      = env_params,
                    net      = net_params,
                    veh      = vehicles,
                    initial  = initial_config)

# create EXPERIMENT with class created
exp = Experiment(flow_params)

# RUN SIMULATION SUMO
_ = exp.run(1)
```

# Training

Pour qu'un environnement puisse être entrainé, l'environnement doit être accessible via l'importation à partir de flow.envs. 


<font color='red'>
Copier l'environnement créé dans un fichier .py et on importe l'environnement dans `flow.envs.__init__.py`.
Mettre le chemin absolu du fichier .osm .
</font> 

In [16]:
# NOTE: only runs if the above procedure have been performed
#from flow.envs import AccelEnv as myEnv
#ADDITIONAL_ENV_PARAMS = {"target_velocity": 20,"sort_vehicles": False,"max_accel": 1,"max_decel": 1}
from myenv import myEnv as myEnv
ADDITIONAL_ENV_PARAMS = {"max_accel": 1, "max_decel": 1}

In [17]:
import json
import ray
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.networks.ring import ADDITIONAL_NET_PARAMS
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import VehicleParams, SumoCarFollowingParams


# number of rollouts per training iteration
N_ROLLOUTS = 10
# number of parallel workers
N_CPUS = 2

# SUMO PARAM
sumo_params = SumoParams(sim_step=0.1, render=False, restart_instance=True)

# ENVIRONMENT PARAM
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS, horizon=HORIZON)

# NETWORK PARAM
additional_net_params = ADDITIONAL_NET_PARAMS.copy()
path_file  = '/home/julien/projet_CIL4SYS/issy.osm'
net_params = NetParams(inflows=inflow, osm_path=path_file) 

# NETWORK
network = IssyOSMNetwork

# INITIAL CONFIG
initial_config = InitialConfig(edges_distribution=EDGES_DISTRIBUTION)

vehicles = VehicleParams()
vehicles.add(veh_id="human", acceleration_controller=(IDMController, {}), num_vehicles=5)

flow_params = dict( exp_tag   = "ISSY_RL_train", 
                    env_name  = myEnv,  
                    network   = IssyOSMNetwork,
                    simulator = 'traci', # simulator that is used by the experiment
                    sim       = sumo_params,
                    env       = env_params,
                    net       = net_params,
                    veh       = vehicles,
                    initial   = initial_config)

def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run   = "PPO"
    agent_cls = get_agent_class(alg_run)
    config    = agent_cls._default_config.copy()
    config["num_workers"]      = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"]            = 0.999  # discount rate
    config["use_gae"]          = True
    config["lambda"]           = 0.97
    config["kl_target"]        = 0.02
    config["num_sgd_iter"]     = 10
    config['clip_actions']     = False  # FIXME(ev) temporary ray bug
    config["horizon"]          = HORIZON
    config["model"].update({"fcnet_hiddens": [3, 3]})
    
    # save the flow params for replay
    flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    
    return alg_run, gym_name, config


alg_run, gym_name, config = setup_exps()

ray.init(num_cpus=N_CPUS + 1)

trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,
        "env": gym_name,
        "config": {**config},
        "checkpoint_freq": 500,
        "checkpoint_at_end": True,
        "max_failures": 999,
        "stop": {"training_iteration": 5},
    }
})

2020-03-24 11:58:53,433	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-03-24_11-58-53_432989_4325/logs.
2020-03-24 11:58:53,558	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:16702 to respond...
2020-03-24 11:58:53,696	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:21691 to respond...
2020-03-24 11:58:53,704	INFO services.py:809 -- Starting Redis shard with 2.09 GB max memory.
2020-03-24 11:58:53,773	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-03-24_11-58-53_432989_4325/logs.
2020-03-24 11:58:53,787	INFO services.py:1475 -- Starting the Plasma object store with 3.14 GB memory using /dev/shm.
2020-03-24 11:58:54,040	INFO trial_runner.py:176 -- Starting a new experiment.
2020-03-24 11:58:54,203	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/0 GPUs
Memory usage on this node: 1.9/10.5 GB





== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 1.9/10.5 GB
Result logdir: /home/julien/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_myEnv-v0_0:	RUNNING

[2m[36m(pid=4360)[0m Success.
[2m[36m(pid=4360)[0m 2020-03-24 11:59:04,325	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4360)[0m 2020-03-24 11:59:04.328616: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4360)[0m 2020-03-24 11:59:05,358	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=4360)[0m 
[2m[36m(pid=4360)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4360)[0m   'actions': <tf.Tensor 'default_policy/actions:0' s

[2m[36m(pid=4430)[0m Success.
[2m[36m(pid=4361)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=4430)[0m 2020-03-24 11:59:26,261	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4360)[0m 2020-03-24 11:59:26,387	INFO trainable.py:105 -- _setup took 25.026 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=4430)[0m 2020-03-24 11:59:26.373405: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4430)[0m 2020-03-24 11:59:27.374307: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4430)[0m 2020-03-24 11:59:27.374357: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4430

2020-03-24 11:59:31,455	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=4360, host=julien-Ubuntu)
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/si

[2m[36m(pid=4361)[0m 2020-03-24 11:59:31.329626: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4361)[0m 2020-03-24 11:59:31.329677: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4361)[0m 2020-03-24 11:59:31.329693: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4361)[0m 2020-03-24 11:59:31.329704: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4361)[0m 2020-03-24 11:59:31.329728: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4361)[0m 2020-03-24 11:59:31.329748: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4361)[0m 2020-03-24 11:59:31,331	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=4361)[0m 
[2m[36m(pid=4361)[0m { 'default_policy': ( np.ndarray((1, 0), d



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 2.6/10.5 GB
Result logdir: /home/julien/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_myEnv-v0_0:	RUNNING, 1 failures: /home/julien/ray_results/ISSY_RL_train/PPO_myEnv-v0_0_2020-03-24_11-58-54d79qpqmw/error_2020-03-24_11-59-31.txt

[2m[36m(pid=4430)[0m Success.
[2m[36m(pid=4430)[0m 2020-03-24 11:59:33.810008: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4430)[0m 2020-03-24 11:59:33.810073: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4430)[0m 2020-03-24 11:59:33.810090: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4430)[0m 2020-03-24 11:59:33.810103: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4430)[0m 2020-03-24 11:59:33.

[2m[36m(pid=4552)[0m Success.
[2m[36m(pid=4552)[0m 2020-03-24 11:59:40,873	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4552)[0m 2020-03-24 11:59:40.875747: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4552)[0m 2020-03-24 11:59:41,213	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=4552)[0m 
[2m[36m(pid=4552)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4552)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 0) dtype=float32>,
[2m[36m(pid=4552)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4552)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 0) dtype=float32>,
[

[2m[36m(pid=4625)[0m 2020-03-24 11:59:59,886	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4625)[0m 2020-03-24 11:59:59.946634: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4552)[0m 2020-03-24 12:00:00.516751: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4552)[0m 2020-03-24 12:00:00.516797: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4552)[0m 2020-03-24 12:00:00.516824: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4552)[0m 2020-03-24 12:00:00.788715: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4552)[0m 2020-03-24 12:00:00.788770: W tensorflow/core/common_runtime/allocato

[2m[36m(pid=4625)[0m Success.
[2m[36m(pid=4625)[0m 2020-03-24 12:00:08,805	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((10,), dtype=float64, min=0.0, max=734.822, mean=148.747)}}
[2m[36m(pid=4625)[0m 2020-03-24 12:00:08,806	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=4625)[0m 2020-03-24 12:00:08,811	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((10,), dtype=float64, min=0.0, max=734.822, mean=148.747)
[2m[36m(pid=4625)[0m 2020-03-24 12:00:08,812	INFO sampler.py:407 -- Filtered obs: np.ndarray((10,), dtype=float64, min=0.0, max=734.822, mean=148.747)
[2m[36m(pid=4625)[0m 2020-03-24 12:00:08,819	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=4625)[0m 
[2m[36m(pid=4625)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=4625)[0m                                   'env_id': 0,
[2m[36m(pid=4625)[0m                                   'info': None,
[2m[36m(pid=4

2020-03-24 12:00:09,740	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=4552, host=julien-Ubuntu)
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/home/julien/anaconda3/envs/flow/lib/python3.6/si

[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.555531: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.555572: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.558630: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.558677: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.558703: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.558718: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.558742: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4626)[0m 2020-03-24 12:00:09.558766: E tens

2020-03-24 12:00:09,820	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


[2m[36m(pid=4625)[0m 2020-03-24 12:00:09.907175: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:09.907291: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:09.907319: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:09.907332: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:09.907355: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4625)[0m 2020-03-24 12:00:09.907373: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4625)[0m 2020-03-24 12:00:09,908	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=4625)[0m 
[2m[36m(pid=4625)[0m { 'default_policy': ( np.ndarray((1, 0), d

[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.113442: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.117565: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.131014: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.131058: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.131077: E tensorflow/core/common_runtime/bfc_allocator.cc:246] tried to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.131093: W tensorflow/core/common_runtime/allocator_retry.cc:32] Request to allocate 0 bytes
[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.131126: E tensorflow/core/common_runtime/bfc_allocator.cc:381] tried to deallocate nullptr
[2m[36m(pid=4625)[0m 2020-03-24 12:00:10.131150: E tens

[2m[36m(pid=4786)[0m 2020-03-24 12:00:40,823	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4786)[0m 2020-03-24 12:00:40.850099: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4787)[0m 2020-03-24 12:00:41,126	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4787)[0m 2020-03-24 12:00:41.289000: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4633)[0m 2020-03-24 12:00:41,887	INFO trainable.py:105 -- _setup took 26.022 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=4786)[0m 2020-03-24 12:00:41,921

2020-03-24 12:00:43,735	ERROR worker.py:1616 -- print_logs: Error 111 connecting to 10.0.2.15:16702. Connection refused.
2020-03-24 12:00:43,738	ERROR import_thread.py:89 -- ImportThread: Error 111 connecting to 10.0.2.15:16702. Connection refused.
2020-03-24 12:00:43,740	ERROR worker.py:1716 -- listen_error_messages_raylet: Error 111 connecting to 10.0.2.15:16702. Connection refused.


KeyboardInterrupt: 

# /!\ Voir tutoriel 10 pour controle des feux