# Networks from OpenStreetMap

- import networks from OpenStreetMap.
- integrate it and run it in Flow.


In [1]:
# the TestEnv environment is used to simply simulate the network
from flow.envs import TestEnv

# the Experiment class is used for running simulations
from flow.core.experiment import Experiment

# all other imports are standard
from flow.core.params import VehicleParams
from flow.core.params import NetParams, SumoCarFollowingParams
from flow.core.params import InitialConfig
from flow.core.params import EnvParams
from flow.core.params import SumoParams

from flow.networks import Network

## 1. Ajouter un flux de voiture

In [2]:
from flow.core.params import InFlows

inflow = InFlows()
inflow.add(veh_type="human",
           edge="4794817",
           vehs_per_hour=100,
           depart_speed=10,
           color="green")
inflow.add(veh_type="human",
            edge="4783299#0",
            vehs_per_hour=100,
            depart_lane="random",
            depart_speed="random",
            color="red")
inflow.add(veh_type="human",
           edge="-100822066",
           probability= 0.1,
           depart_lane= 1,  # left lane
           depart_speed= "max",
           begin= 60,  # 1 minute
           number= 30,
           color= "white")
inflow.add(veh_type="human",
            edge="155558218",
            period=2,
            depart_lane="random",
            depart_speed="random",
            color="white")

## 2. Créer des itinéraires

In [3]:
# Specifie les noms des edges du network dont les vehicules peuvent être originaire
EDGES_DISTRIBUTION = [
    "-100822066",
    "4794817",
    "4783299#0",
    "155558218",
]

In [4]:
# créer une nouvelle classe Network pour spécifier les itinéraires possibles
class IssyOSMNetwork(Network):

    def specify_routes(self, net_params):
        return {
            "-100822066": [ #N
                "-100822066",
                "-352962858#1",
                "-352962858#0",
                "-4786940#1",
                 "-4786940#0",
            ],
            
            "4794817" : [ #Loop
                "4794817",
                "4786972#0",
                "4786972#1",
                "4786972#2",
                "4786965#1",
                "4786965#2",
                "4786965#3",
                "4795729",
                "-352962858#1",
                "4795742#0",
                "4795742#1",
                "4786965#3",
                "4786965#4",
                "4786965#5",
            ],
            
            "4783299#0": [    #E
                "4783299#0",
                "4783299#1",
                "4783299#2",
                "4783299#3",
                "4783299#4",
                "4783299#5",
                "4783299#6",
                "4786940#0",
                "4786940#1",
                "352962858#0",
                "352962858#1",
                "100822066",
            ],
            
            "155558218": [
                "155558218",
                "4786940#1",
                "352962858#0",
                "352962858#1",
                "100822066",
            ],     
        }

# Personnaliser un Environnement pour le RL

More accessor objects and methods can be found within the Flow documentation at: http://berkeleyflow.readthedocs.io/en/latest/

In [5]:
# import the base environment class
from flow.envs import Env
from gym.spaces.box import Box
from gym.spaces import Tuple
import numpy as np

## definition de la classe environnement 

In [6]:
class myEnv(Env):
    pass

## fonction action_space
2 actions possibles pour chaque véhicule RL : +1 acceleration ou -1 acceleration

In [7]:
class myEnv(myEnv): # update my environment class

    @property
    def action_space(self):
        num_actions = self.initial_vehicles.num_rl_vehicles
        accel_ub = self.env_params.additional_params["max_accel"]
        accel_lb = - abs(self.env_params.additional_params["max_decel"])

        return Box(low=accel_lb,
                   high=accel_ub,
                   shape=(num_actions,))

## observation_space
2 valeurs observé pour chaque véhicule: sa **position** et sa **vitesse**. En conséquence, nous avons besoin d'un espace d'observation qui est *deux fois plus grand que le nombre de véhicules* dans le network.

In [8]:
class myEnv(myEnv):  # update my environment class

    @property
    def observation_space(self):
        return Box(
            low=0,
            high=float("inf"),
            shape=(2*self.initial_vehicles.num_vehicles,))

## apply_rl_actions
`apply_rl_actions` : transforme les commandes de l'agent RL en actions réelles du simulateur.  

Pour notre exemple, l'agent RL peut spécifier que les accélérations des véhicules RL avec la fonction **apply_acceleration**

In [9]:
class myEnv(myEnv):  # update my environment class

    def _apply_rl_actions(self, rl_actions):
        # the names of all autonomous (RL) vehicles in the network
        rl_ids = self.k.vehicle.get_rl_ids()

        # use the base environment method to convert actions into accelerations for the rl vehicles
        self.k.vehicle.apply_acceleration(rl_ids, rl_actions)

## get_state

`get_state` : extrait des features de l'environnement et fournit ensuite des entrées à la stratégie fournie par l'agent RL. 

In [10]:
class myEnv(myEnv):  # update my environment class

    def get_state(self, **kwargs):
        # the get_ids() method is used to get the names of all vehicles in the network
        ids = self.k.vehicle.get_ids()

        # we use the get_absolute_position method to get the positions of all vehicles
        pos = [self.k.vehicle.get_x_by_id(veh_id) for veh_id in ids]

        # we use the get_speed method to get the velocities of all vehicles
        vel = [self.k.vehicle.get_speed(veh_id) for veh_id in ids]

        # the speeds and positions are concatenated to produce the state
        return np.concatenate((pos, vel))

## compute_reward

`compute_reward` : renvoie la récompense associée à un état donné. 

Ici, la fonction de récompense est la **vitesse moyenne de tous les véhicules actuellement sur le réseau**.

In [11]:
class myEnv(myEnv):  # update my environment class

    def compute_reward(self, rl_actions, **kwargs):
        # the get_ids() method is used to get the names of all vehicles in the network
        ids = self.k.vehicle.get_ids()

        # we next get a list of the speeds of all vehicles in the network
        speeds = self.k.vehicle.get_speed(ids)

        # finally, we return the average of all these speeds as the reward
        return np.mean(speeds)

# Testing 

In [12]:
HORIZON = 1000

In [13]:
from flow.controllers import IDMController, ContinuousRouter
from flow.core.experiment import Experiment
from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams
from flow.core.params import VehicleParams
from flow.networks.ring import RingNetwork, ADDITIONAL_NET_PARAMS

ADDITIONAL_ENV_PARAMS = {"max_accel": 1, "max_decel": 1}

In [14]:
from flow.controllers import IDMController, ContinuousRouter
from flow.core.experiment import Experiment
from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams
from flow.core.params import VehicleParams
from flow.networks.ring import RingNetwork, ADDITIONAL_NET_PARAMS

ADDITIONAL_ENV_PARAMS = {"max_accel": 1, "max_decel": 1}

# SUMO PARAM
sumo_params = SumoParams(sim_step=0.1, render=True)

# create VEHICLE
vehicles = VehicleParams()
vehicles.add(veh_id="human",
             acceleration_controller=(IDMController, {}),
             num_vehicles=22)

# ENVIRONMENT PARAM
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS, horizon=HORIZON)

# NETWORK PARAM
additional_net_params = ADDITIONAL_NET_PARAMS.copy()
net_params = NetParams(additional_params=additional_net_params, inflows=inflow, osm_path='issy.osm')

# NETWORK
network = IssyOSMNetwork

# INITIAL CONFIG
initial_config = InitialConfig(edges_distribution=EDGES_DISTRIBUTION) #spacing="random",

# dictionnaire FLOW_PARAM
flow_params = dict( exp_tag  = 'ISSY_RL_test',
                    env_name = myEnv,  # using my new environment for the simulation
                    network  = network,
                    simulator='traci',
                    sim      = sumo_params,
                    env      = env_params,
                    net      = net_params,
                    veh      = vehicles,
                    initial  = initial_config)

# create EXPERIMENT with class created
exp = Experiment(flow_params)

# RUN SIMULATION SUMO
_ = exp.run(1)


Error during start: Traceback (most recent call last):
  File "/app/flow/flow/core/kernel/simulation/traci.py", line 158, in start_simulation
    traci_connection.setOrder(0)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/traci/connection.py", line 348, in setOrder
    self._sendExact()
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/traci/connection.py", line 99, in _sendExact
    raise FatalTraCIError("connection closed by SUMO")
traci.exceptions.FatalTraCIError: connection closed by SUMO



KeyboardInterrupt: 

# Training

Pour qu'un environnement puisse être entrainé, l'environnement doit être accessible via l'importation à partir de flow.envs. <font color='red'> On copie alors l'environnement créé dans un fichier .py et on importe l'environnement dans `flow.envs.__init__.py`. </font> 

In [None]:
# NOTE: only runs if the above procedure have been performed
from flow.envs import myEnv

In [14]:
from myEnv import MyEnv as myEnv

In [None]:
from 

In [15]:
import json
import ray
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

# from flow.networks.ring import RingNetwork, ADDITIONAL_NET_PARAMS
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import VehicleParams, SumoCarFollowingParams
from flow.controllers import RLController, IDMController, ContinuousRouter


# number of rollouts per training iteration
N_ROLLOUTS = 20
# number of parallel workers
N_CPUS = 2


# SUMO PARAM
sumo_params = SumoParams(sim_step=0.1, render=False, restart_instance=True)

# create VEHICLE : 1 RL and 21 humans (simulated comportement)
vehicles = VehicleParams()
vehicles.add(veh_id="rl", acceleration_controller=(RLController, {}), num_vehicles=1)
vehicles.add(veh_id="human", acceleration_controller=(IDMController, {}), num_vehicles=21)

# ENVIRONMENT PARAM
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS, horizon=HORIZON)

# NETWORK PARAM
additional_net_params = ADDITIONAL_NET_PARAMS.copy()
net_params = NetParams(additional_params=additional_net_params, inflows=inflow, osm_path='/app/notebooks/networks/issy.osm')

# NETWORK
network = IssyOSMNetwork

# INITIAL CONFIG
initial_config = InitialConfig(edges_distribution=EDGES_DISTRIBUTION)

flow_params = dict( exp_tag   = "ISSY_RL_train", 
                    env_name  = myEnv,  
                    network   = IssyOSMNetwork,
                    simulator = 'traci', # simulator that is used by the experiment
                    sim       = sumo_params,
                    env       = env_params,
                    net       = net_params,
                    veh       = vehicles,
                    initial   = initial_config)

def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run = "PPO"
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [3, 3]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02
    config["num_sgd_iter"] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config["horizon"] = HORIZON

    # save the flow params for replay
    flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    
    return alg_run, gym_name, config


alg_run, gym_name, config = setup_exps()

ray.init(num_cpus=N_CPUS + 1, object_store_memory=1000000000)

trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,
        "env": gym_name,
        "config": {
            **config
        },
        "checkpoint_freq": 20,
        "checkpoint_at_end": True,
        "max_failures": 999,
        "stop": {
            "training_iteration": 4,
        },
    }
})

2020-03-23 15:23:57,976	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-03-23_15-23-57_975317_4627/logs.
2020-03-23 15:23:58,100	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:41309 to respond...
2020-03-23 15:23:58,250	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:28026 to respond...
2020-03-23 15:23:58,261	INFO services.py:809 -- Starting Redis shard with 0.21 GB max memory.
2020-03-23 15:23:58,355	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-03-23_15-23-57_975317_4627/logs.
2020-03-23 15:23:58,372	INFO services.py:1475 -- Starting the Plasma object store with 1.0 GB memory using /tmp.
2020-03-23 15:23:58,701	INFO trial_runner.py:176 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.5/1.0 GB



2020-03-23 15:23:59,005	ERROR log_sync.py:28 -- Log sync requires rsync to be installed.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.5/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING

[2m[36m(pid=4662)[0m Success.
[2m[36m(pid=4662)[0m 2020-03-23 15:24:11,938	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4662)[0m 2020-03-23 15:24:11.940332: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4662)[0m 2020-03-23 15:24:12,461	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=4662)[0m 
[2m[36m(pid=4662)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4662)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?,

[2m[36m(pid=4720)[0m 2020-03-23 15:24:52,156	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4720)[0m 2020-03-23 15:24:52.313649: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4716)[0m 2020-03-23 15:24:53,313	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=4716)[0m 
[2m[36m(pid=4716)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4716)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=4716)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=4716)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=4716)[0m   'dones': 

2020-03-23 15:25:10,515	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=4662, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=4662)[0m 2020-03-23 15:25:10,320	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=4716)[0m 2020-03-23 15:25:10,289	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.17427231e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=4716)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=4716)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:25:10,763	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 1 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-25-10.txt

[2m[36m(pid=4830)[0m Success.
[2m[36m(pid=4830)[0m 2020-03-23 15:25:25,289	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4830)[0m 2020-03-23 15:25:25.290918: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4830)[0m 2020-03-23 15:25:25,806	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=4830)[0m 
[2m[36m(pid=4830)[0m { 'action_prob': <tf.Tensor 'default_policy/action_p

[2m[36m(pid=4875)[0m Success.
[2m[36m(pid=4830)[0m 2020-03-23 15:25:59,880	INFO trainable.py:105 -- _setup took 38.385 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=4875)[0m 2020-03-23 15:26:00,830	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=4875)[0m 2020-03-23 15:26:00.956261: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=4871)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=4875)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=4871)[0m 2020-03-23 15:26:09,224	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=4871)[0m Success.
[2m[36m(pid=4875)[0m Success.
[2m[36m(

2020-03-23 15:26:12,287	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=4830, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=4830)[0m 2020-03-23 15:26:12,270	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=4871)[0m 2020-03-23 15:26:12,255	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.33396801e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=4871)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=4871)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:26:12,393	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 2 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-26-12.txt

[2m[36m(pid=4875)[0m 2020-03-23 15:26:15,943	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.56470910e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=4875)[0m    

[2m[36m(pid=5026)[0m Success.
[2m[36m(pid=5026)[0m 2020-03-23 15:26:59,647	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5026)[0m 2020-03-23 15:26:59.777047: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5030)[0m Success.
[2m[36m(pid=5026)[0m 2020-03-23 15:27:01,545	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=5026)[0m 
[2m[36m(pid=5026)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5026)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=5026)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5026)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=5026)[0m 2020-03-23 15:27:14,825	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=5026)[0m 2020-03-23 15:27:14,827	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=5026)[0m 2020-03-23 15:27:14,832	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5026)[0m 2020-03-23 15:27:14,834	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5026)[0m 2020-03-23 15:27:14,842	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=5026)[0m 
[2m[36m(pid=5026)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=5026)[0m                                   'env_id': 0,
[2m[36m(pid=5026)[0m                                   'info': None,
[2m[36m(pid=5026)[0m                      

2020-03-23 15:27:15,612	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=4983, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=4983)[0m 2020-03-23 15:27:15,579	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=5026)[0m 2020-03-23 15:27:15,562	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.35346353e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5026)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=5026)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:27:15,703	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 3 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-27-15.txt

[2m[36m(pid=5030)[0m 2020-03-23 15:27:19,593	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.06799605e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5030)[0m    

[2m[36m(pid=5181)[0m Success.
[2m[36m(pid=5181)[0m 2020-03-23 15:28:06,420	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5181)[0m 2020-03-23 15:28:06.547638: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5185)[0m Success.
[2m[36m(pid=5181)[0m 2020-03-23 15:28:08,454	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=5181)[0m 
[2m[36m(pid=5181)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5181)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=5181)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5181)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=5185)[0m Success.
[2m[36m(pid=5181)[0m 2020-03-23 15:28:21,027	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=5181)[0m 2020-03-23 15:28:21,028	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=5181)[0m 2020-03-23 15:28:21,028	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5181)[0m 2020-03-23 15:28:21,029	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5181)[0m 2020-03-23 15:28:21,033	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=5181)[0m 
[2m[36m(pid=5181)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=5181)[0m                                   'env_id': 0,
[2m[36m(pid=5181)[0m                                   'info': None,
[2m[36m(pi

2020-03-23 15:28:21,741	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=5138, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=5181)[0m 2020-03-23 15:28:21,685	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.04917064e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5181)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=5181)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=5181)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=5181)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pid=518

2020-03-23 15:28:21,885	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 4 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-28-21.txt

[2m[36m(pid=5185)[0m 2020-03-23 15:28:25,761	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.15131448e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5185)[0m    

[2m[36m(pid=5336)[0m Success.
[2m[36m(pid=5336)[0m 2020-03-23 15:29:08,416	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5336)[0m 2020-03-23 15:29:08.547108: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5340)[0m Success.
[2m[36m(pid=5336)[0m 2020-03-23 15:29:10,577	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=5336)[0m 
[2m[36m(pid=5336)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5336)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=5336)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5336)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=5336)[0m 2020-03-23 15:29:22,198	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=5336)[0m 2020-03-23 15:29:22,198	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=5336)[0m 2020-03-23 15:29:22,199	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5336)[0m 2020-03-23 15:29:22,200	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5336)[0m 2020-03-23 15:29:22,202	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=5336)[0m 
[2m[36m(pid=5336)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=5336)[0m                                   'env_id': 0,
[2m[36m(pid=5336)[0m                                   'info': None,
[2m[36m(pid=5336)[0m                      

2020-03-23 15:29:22,815	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=5293, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=5336)[0m 2020-03-23 15:29:22,777	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.06286494e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5336)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=5336)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=5336)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=5336)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pid=533

2020-03-23 15:29:22,922	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 5 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-29-22.txt

[2m[36m(pid=5340)[0m 2020-03-23 15:29:26,624	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.05380613e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5340)[0m    

2020-03-23 15:29:36,128	ERROR worker.py:1654 -- Possible unhandled error from worker: [36mray_RolloutWorker:sample()[39m (pid=5340, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 48, in get
    self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 94, in run_timeline
    fetches = sess.run(ops, feed_dict=feed_dict)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1111, in _run
    str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1, 46) for Tensor 'default_policy/observation:0', which has shape '(?, 44)'

During handling of the above exception, another exception occurred:

[36mray_RolloutWorker:sample()[39m (pid=5340, h

[2m[36m(pid=5448)[0m Success.
[2m[36m(pid=5448)[0m 2020-03-23 15:29:40,433	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5448)[0m 2020-03-23 15:29:40.435983: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5448)[0m 2020-03-23 15:29:40,898	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=5448)[0m 
[2m[36m(pid=5448)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5448)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=5448)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5448)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=5448)[0m 2020-03-23 15:30:15,209	INFO trainable.py:105 -- _setup took 38.315 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=5495)[0m 2020-03-23 15:30:15,913	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5495)[0m 2020-03-23 15:30:16.051042: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5491)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=5495)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=5491)[0m 2020-03-23 15:30:23,477	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=5491)[0m Success.
[2m[36m(pid=5491)[0m 2020-03-23 15:30:26,205	INFO sampler.py:304 -- Raw o

2020-03-23 15:30:27,105	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=5448, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 6 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-30-27.txt

[2m[36m(pid=5495)[0m 2020-03-23 15:30:30,951	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.04230178e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5495)[0m    

[2m[36m(pid=5646)[0m Success.
[2m[36m(pid=5646)[0m 2020-03-23 15:31:15,055	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5646)[0m 2020-03-23 15:31:15.182934: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5646)[0m 2020-03-23 15:31:17,355	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=5646)[0m 
[2m[36m(pid=5646)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5646)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=5646)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5646)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=5650)[0m Success.
[2m[36m(pid=5646)[0m 2020-03-23 15:31:29,761	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=5646)[0m 
[2m[36m(pid=5646)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=-1.206, max=-1.206, mean=-1.206),
[2m[36m(pid=5646)[0m                       [],
[2m[36m(pid=5646)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.193, max=0.193, mean=0.193),
[2m[36m(pid=5646)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.001, max=0.0, mean=-0.0),
[2m[36m(pid=5646)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.002, max=-0.002, mean=-0.002)})}
[2m[36m(pid=5646)[0m 


2020-03-23 15:31:30,177	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=5603, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=5646)[0m 2020-03-23 15:31:30,112	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.40333753e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5646)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=5646)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=5646)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=5646)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pid=564

2020-03-23 15:31:30,314	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 7 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-31-30.txt

[2m[36m(pid=5650)[0m 2020-03-23 15:31:34,237	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.09973610e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5650)[0m    

[2m[36m(pid=5801)[0m Success.
[2m[36m(pid=5801)[0m 2020-03-23 15:32:15,978	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5801)[0m 2020-03-23 15:32:16.104244: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5805)[0m Success.
[2m[36m(pid=5801)[0m 2020-03-23 15:32:17,779	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=5801)[0m 
[2m[36m(pid=5801)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5801)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=5801)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5801)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=5805)[0m Success.
[2m[36m(pid=5801)[0m 2020-03-23 15:32:28,980	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=5801)[0m 2020-03-23 15:32:28,981	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=5801)[0m 2020-03-23 15:32:28,982	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5801)[0m 2020-03-23 15:32:28,983	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5801)[0m 2020-03-23 15:32:28,986	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=5801)[0m 
[2m[36m(pid=5801)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=5801)[0m                                   'env_id': 0,
[2m[36m(pid=5801)[0m                                   'info': None,
[2m[36m(pi

2020-03-23 15:32:29,598	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=5758, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=5758)[0m 2020-03-23 15:32:29,561	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=5801)[0m 2020-03-23 15:32:29,552	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.39035192e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5801)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=5801)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 8 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-32-29.txt

[2m[36m(pid=5805)[0m 2020-03-23 15:32:33,345	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12636348e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5805)[0m    

[2m[36m(pid=5956)[0m Success.
[2m[36m(pid=5956)[0m 2020-03-23 15:33:16,410	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=5956)[0m 2020-03-23 15:33:16.531237: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=5956)[0m 2020-03-23 15:33:18,188	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=5956)[0m 
[2m[36m(pid=5956)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5956)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=5956)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=5956)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=5960)[0m Success.
[2m[36m(pid=5956)[0m 2020-03-23 15:33:29,012	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=5956)[0m 2020-03-23 15:33:29,012	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=5956)[0m 2020-03-23 15:33:29,013	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5956)[0m 2020-03-23 15:33:29,017	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=5956)[0m 2020-03-23 15:33:29,020	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=5956)[0m 
[2m[36m(pid=5956)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=5956)[0m                                   'env_id': 0,
[2m[36m(pid=5956)[0m                                   'info': None,
[2m[36m(pi

2020-03-23 15:33:29,587	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=5913, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=5913)[0m 2020-03-23 15:33:29,528	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=5956)[0m 2020-03-23 15:33:29,520	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.36828562e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5956)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=5956)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 9 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-33-29.txt

[2m[36m(pid=5960)[0m 2020-03-23 15:33:33,173	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.23659346e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=5960)[0m    

[2m[36m(pid=6111)[0m Success.
[2m[36m(pid=6111)[0m 2020-03-23 15:34:12,896	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=6111)[0m 2020-03-23 15:34:13.014682: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=6115)[0m Success.
[2m[36m(pid=6111)[0m 2020-03-23 15:34:14,606	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=6111)[0m 
[2m[36m(pid=6111)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6111)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=6111)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6111)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=6111)[0m 2020-03-23 15:34:25,743	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=6111)[0m 2020-03-23 15:34:25,747	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=6111)[0m 2020-03-23 15:34:25,748	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6111)[0m 2020-03-23 15:34:25,748	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6111)[0m 2020-03-23 15:34:25,751	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=6111)[0m 
[2m[36m(pid=6111)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=6111)[0m                                   'env_id': 0,
[2m[36m(pid=6111)[0m                                   'info': None,
[2m[36m(pid=6111)[0m                      

2020-03-23 15:34:26,341	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=6068, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=6068)[0m 2020-03-23 15:34:26,290	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=6111)[0m 2020-03-23 15:34:26,280	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.05174126e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6111)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=6111)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 10 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-34-26.txt

[2m[36m(pid=6115)[0m 2020-03-23 15:34:29,888	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.36073031e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6115)[0m   

[2m[36m(pid=6266)[0m Success.
[2m[36m(pid=6266)[0m 2020-03-23 15:35:12,519	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=6266)[0m 2020-03-23 15:35:12.630228: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=6266)[0m 2020-03-23 15:35:14,235	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=6266)[0m 
[2m[36m(pid=6266)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6266)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=6266)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6266)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=6266)[0m 2020-03-23 15:35:25,583	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=6266)[0m 2020-03-23 15:35:25,584	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=6266)[0m 2020-03-23 15:35:25,584	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6266)[0m 2020-03-23 15:35:25,585	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6266)[0m 2020-03-23 15:35:25,586	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=6266)[0m 
[2m[36m(pid=6266)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=6266)[0m                                   'env_id': 0,
[2m[36m(pid=6266)[0m                                   'info': None,
[2m[36m(pid=6266)[0m                      

2020-03-23 15:35:26,214	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=6223, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=6223)[0m 2020-03-23 15:35:26,149	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=6266)[0m 2020-03-23 15:35:26,136	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.06436423e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6266)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=6266)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 11 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-35-26.txt

[2m[36m(pid=6270)[0m 2020-03-23 15:35:29,809	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.11950289e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6270)[0m   

[2m[36m(pid=6421)[0m Success.
[2m[36m(pid=6421)[0m 2020-03-23 15:36:11,273	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=6421)[0m 2020-03-23 15:36:11.412045: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=6421)[0m 2020-03-23 15:36:13,297	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=6421)[0m 
[2m[36m(pid=6421)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6421)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=6421)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6421)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=6421)[0m 2020-03-23 15:36:25,327	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=6421)[0m 2020-03-23 15:36:25,328	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=6421)[0m 2020-03-23 15:36:25,328	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6421)[0m 2020-03-23 15:36:25,329	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6421)[0m 2020-03-23 15:36:25,331	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=6421)[0m 
[2m[36m(pid=6421)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=6421)[0m                                   'env_id': 0,
[2m[36m(pid=6421)[0m                                   'info': None,
[2m[36m(pid=6421)[0m                      

2020-03-23 15:36:26,058	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=6378, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=6378)[0m 2020-03-23 15:36:25,973	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=6421)[0m 2020-03-23 15:36:25,961	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.14297457e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6421)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=6421)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 12 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-36-26.txt

[2m[36m(pid=6425)[0m 2020-03-23 15:36:29,519	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07099371e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6425)[0m   

[2m[36m(pid=6576)[0m Success.
[2m[36m(pid=6576)[0m 2020-03-23 15:37:12,659	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=6576)[0m 2020-03-23 15:37:12.766611: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=6576)[0m 2020-03-23 15:37:14,404	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=6576)[0m 
[2m[36m(pid=6576)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6576)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=6576)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6576)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=6576)[0m 2020-03-23 15:37:25,884	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=6576)[0m 2020-03-23 15:37:25,884	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=6576)[0m 2020-03-23 15:37:25,885	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6576)[0m 2020-03-23 15:37:25,885	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6576)[0m 2020-03-23 15:37:25,887	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=6576)[0m 
[2m[36m(pid=6576)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=6576)[0m                                   'env_id': 0,
[2m[36m(pid=6576)[0m                                   'info': None,
[2m[36m(pid=6576)[0m                      

2020-03-23 15:37:26,543	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=6533, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=6533)[0m 2020-03-23 15:37:26,470	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=6576)[0m 2020-03-23 15:37:26,456	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07582733e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6576)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=6576)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 13 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-37-26.txt

[2m[36m(pid=6580)[0m 2020-03-23 15:37:30,016	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.27018543e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6580)[0m   

[2m[36m(pid=6731)[0m Success.
[2m[36m(pid=6731)[0m 2020-03-23 15:38:12,839	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=6731)[0m 2020-03-23 15:38:12.964107: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=6735)[0m Success.
[2m[36m(pid=6731)[0m 2020-03-23 15:38:14,702	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=6731)[0m 
[2m[36m(pid=6731)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6731)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=6731)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6731)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=6731)[0m 2020-03-23 15:38:25,935	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=6731)[0m 2020-03-23 15:38:25,936	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=6731)[0m 2020-03-23 15:38:25,936	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6731)[0m 2020-03-23 15:38:25,936	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6731)[0m 2020-03-23 15:38:25,939	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=6731)[0m 
[2m[36m(pid=6731)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=6731)[0m                                   'env_id': 0,
[2m[36m(pid=6731)[0m                                   'info': None,
[2m[36m(pid=6731)[0m                      

2020-03-23 15:38:26,465	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=6688, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=6688)[0m 2020-03-23 15:38:26,434	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=6731)[0m 2020-03-23 15:38:26,425	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.03502100e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6731)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=6731)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:38:26,592	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 14 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-38-26.txt

[2m[36m(pid=6735)[0m 2020-03-23 15:38:29,948	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.37578868e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6735)[0m   

[2m[36m(pid=6886)[0m Success.
[2m[36m(pid=6886)[0m 2020-03-23 15:39:12,915	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=6886)[0m 2020-03-23 15:39:13.056792: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=6890)[0m Success.
[2m[36m(pid=6886)[0m 2020-03-23 15:39:14,827	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=6886)[0m 
[2m[36m(pid=6886)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6886)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=6886)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=6886)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=6886)[0m 2020-03-23 15:39:25,713	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=6886)[0m 2020-03-23 15:39:25,713	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=6886)[0m 2020-03-23 15:39:25,713	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6886)[0m 2020-03-23 15:39:25,714	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=6886)[0m 2020-03-23 15:39:25,715	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=6886)[0m 
[2m[36m(pid=6886)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=6886)[0m                                   'env_id': 0,
[2m[36m(pid=6886)[0m                                   'info': None,
[2m[36m(pid=6886)[0m                      

2020-03-23 15:39:26,367	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=6843, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=6843)[0m 2020-03-23 15:39:26,325	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=6886)[0m 2020-03-23 15:39:26,297	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.53521605e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6886)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=6886)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:39:26,478	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.



[2m[36m(pid=6886)[0m     fetches = sess.run(ops, feed_dict=feed_dict)
[2m[36m(pid=6886)[0m   File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
[2m[36m(pid=6886)[0m     run_metadata_ptr)
[2m[36m(pid=6886)[0m   File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1111, in _run
[2m[36m(pid=6886)[0m     str(subfeed_t.get_shape())))
[2m[36m(pid=6886)[0m ValueError: Cannot feed value of shape (1, 46) for Tensor 'default_policy/observation:0', which has shape '(?, 44)'




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 15 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-39-26.txt

[2m[36m(pid=6890)[0m 2020-03-23 15:39:30,027	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12215463e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=6890)[0m   

[2m[36m(pid=7041)[0m Success.
[2m[36m(pid=7041)[0m 2020-03-23 15:40:10,693	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7041)[0m 2020-03-23 15:40:10.836756: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7045)[0m Success.
[2m[36m(pid=7041)[0m 2020-03-23 15:40:12,474	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=7041)[0m 
[2m[36m(pid=7041)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7041)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=7041)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7041)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=7041)[0m 2020-03-23 15:40:24,028	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=7041)[0m 
[2m[36m(pid=7041)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=-0.202, max=-0.202, mean=-0.202),
[2m[36m(pid=7041)[0m                       [],
[2m[36m(pid=7041)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.391, max=0.391, mean=0.391),
[2m[36m(pid=7041)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.005, max=0.002, mean=-0.002),
[2m[36m(pid=7041)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.008, max=-0.008, mean=-0.008)})}
[2m[36m(pid=7041)[0m 


2020-03-23 15:40:24,363	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=6998, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=6998)[0m 2020-03-23 15:40:24,343	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=7041)[0m 2020-03-23 15:40:24,317	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07137453e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7041)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=7041)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 16 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-40-24.txt

[2m[36m(pid=7045)[0m 2020-03-23 15:40:28,091	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.08753635e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7045)[0m   

[2m[36m(pid=7196)[0m Success.
[2m[36m(pid=7196)[0m 2020-03-23 15:41:13,065	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7196)[0m 2020-03-23 15:41:13.197003: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7196)[0m 2020-03-23 15:41:14,945	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=7196)[0m 
[2m[36m(pid=7196)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7196)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=7196)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7196)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=7200)[0m Success.
[2m[36m(pid=7196)[0m 2020-03-23 15:41:27,424	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=7196)[0m 2020-03-23 15:41:27,432	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=7196)[0m 2020-03-23 15:41:27,436	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=7196)[0m 2020-03-23 15:41:27,436	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=7196)[0m 2020-03-23 15:41:27,444	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=7196)[0m 
[2m[36m(pid=7196)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=7196)[0m                                   'env_id': 0,
[2m[36m(pid=7196)[0m                                   'info': None,
[2m[36m(pi

2020-03-23 15:41:28,070	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=7153, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=7153)[0m 2020-03-23 15:41:27,999	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=7196)[0m 2020-03-23 15:41:27,982	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.18639501e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7196)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=7196)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 17 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-41-28.txt

[2m[36m(pid=7200)[0m 2020-03-23 15:41:32,001	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.10937750e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7200)[0m   

[2m[36m(pid=7351)[0m Success.
[2m[36m(pid=7351)[0m 2020-03-23 15:42:11,882	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7351)[0m 2020-03-23 15:42:12.003797: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7351)[0m 2020-03-23 15:42:13,583	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=7351)[0m 
[2m[36m(pid=7351)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7351)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=7351)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7351)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=7355)[0m Success.
[2m[36m(pid=7351)[0m 2020-03-23 15:42:26,745	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=7351)[0m 
[2m[36m(pid=7351)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=-0.659, max=-0.659, mean=-0.659),
[2m[36m(pid=7351)[0m                       [],
[2m[36m(pid=7351)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.32, max=0.32, mean=0.32),
[2m[36m(pid=7351)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.0, max=0.006, mean=0.003),
[2m[36m(pid=7351)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.001, max=-0.001, mean=-0.001)})}
[2m[36m(pid=7351)[0m 


2020-03-23 15:42:27,152	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=7308, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=7351)[0m 2020-03-23 15:42:27,108	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.24412224e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7351)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=7351)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=7351)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=7351)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pid=735

2020-03-23 15:42:27,271	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 18 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-42-27.txt

[2m[36m(pid=7355)[0m 2020-03-23 15:42:30,993	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.06480579e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7355)[0m   

[2m[36m(pid=7506)[0m Success.
[2m[36m(pid=7506)[0m 2020-03-23 15:43:13,432	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7506)[0m 2020-03-23 15:43:13.552766: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7506)[0m 2020-03-23 15:43:15,279	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=7506)[0m 
[2m[36m(pid=7506)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7506)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=7506)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7506)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=7510)[0m Success.
[2m[36m(pid=7506)[0m 2020-03-23 15:43:28,191	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=7506)[0m 
[2m[36m(pid=7506)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=-0.69, max=-0.69, mean=-0.69),
[2m[36m(pid=7506)[0m                       [],
[2m[36m(pid=7506)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.312, max=0.312, mean=0.312),
[2m[36m(pid=7506)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=0.007, max=0.007, mean=0.007),
[2m[36m(pid=7506)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.006, max=-0.006, mean=-0.006)})}
[2m[36m(pid=7506)[0m 


2020-03-23 15:43:28,589	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=7463, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=7463)[0m 2020-03-23 15:43:28,540	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=7506)[0m 2020-03-23 15:43:28,531	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12826667e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7506)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=7506)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:43:28,760	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 19 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-43-28.txt

[2m[36m(pid=7510)[0m 2020-03-23 15:43:33,810	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12928251e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7510)[0m   

2020-03-23 15:43:38,843	ERROR worker.py:1654 -- Possible unhandled error from worker: [36mray_RolloutWorker:sample()[39m (pid=7510, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 48, in get
    self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 94, in run_timeline
    fetches = sess.run(ops, feed_dict=feed_dict)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1111, in _run
    str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1, 46) for Tensor 'default_policy/observation:0', which has shape '(?, 44)'

During handling of the above exception, another exception occurred:

[36mray_RolloutWorker:sample()[39m (pid=7510, h

[2m[36m(pid=7618)[0m Success.
[2m[36m(pid=7618)[0m 2020-03-23 15:43:44,533	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7618)[0m 2020-03-23 15:43:44.534966: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7618)[0m 2020-03-23 15:43:45,033	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=7618)[0m 
[2m[36m(pid=7618)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7618)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=7618)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7618)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=7665)[0m Success.
[2m[36m(pid=7618)[0m 2020-03-23 15:44:25,302	INFO trainable.py:105 -- _setup took 44.315 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=7661)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=7665)[0m 2020-03-23 15:44:25,908	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7665)[0m 2020-03-23 15:44:26.025948: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7665)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=7661)[0m 2020-03-23 15:44:32,932	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=7661)[0m Success.
[2m[36m(pid=7661)[0m 2020-03-23 15:44:35

2020-03-23 15:44:36,581	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=7618, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=7618)[0m 2020-03-23 15:44:36,528	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=7661)[0m 2020-03-23 15:44:36,520	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.16071305e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7661)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=7661)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 20 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-44-36.txt

[2m[36m(pid=7665)[0m 2020-03-23 15:44:40,449	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12547832e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7665)[0m   

[2m[36m(pid=7816)[0m Success.
[2m[36m(pid=7816)[0m 2020-03-23 15:45:23,372	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7816)[0m 2020-03-23 15:45:23.502289: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7820)[0m Success.
[2m[36m(pid=7816)[0m 2020-03-23 15:45:25,685	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=7816)[0m 
[2m[36m(pid=7816)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7816)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=7816)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7816)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0

[2m[36m(pid=7816)[0m 2020-03-23 15:45:39,500	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=7816)[0m 2020-03-23 15:45:39,506	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=7816)[0m 2020-03-23 15:45:39,506	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=7816)[0m 2020-03-23 15:45:39,507	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=7816)[0m 2020-03-23 15:45:39,509	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=7816)[0m 
[2m[36m(pid=7816)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=7816)[0m                                   'env_id': 0,
[2m[36m(pid=7816)[0m                                   'info': None,
[2m[36m(pid=7816)[0m                      

2020-03-23 15:45:40,045	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=7773, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=7773)[0m 2020-03-23 15:45:40,027	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=7816)[0m 2020-03-23 15:45:40,018	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.38517618e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7816)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=7816)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:45:40,144	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 21 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-45-40.txt

[2m[36m(pid=7820)[0m 2020-03-23 15:45:44,027	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.05017691e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7820)[0m   

[2m[36m(pid=7971)[0m Success.
[2m[36m(pid=7971)[0m 2020-03-23 15:46:32,169	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=7971)[0m 2020-03-23 15:46:32.318268: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=7971)[0m 2020-03-23 15:46:34,781	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=7971)[0m 
[2m[36m(pid=7971)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7971)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=7971)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=7971)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=7971)[0m 2020-03-23 15:46:48,342	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=7971)[0m 
[2m[36m(pid=7971)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=-0.58, max=-0.58, mean=-0.58),
[2m[36m(pid=7971)[0m                       [],
[2m[36m(pid=7971)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.337, max=0.337, mean=0.337),
[2m[36m(pid=7971)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.008, max=0.007, mean=-0.001),
[2m[36m(pid=7971)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.005, max=-0.005, mean=-0.005)})}
[2m[36m(pid=7971)[0m 


2020-03-23 15:46:48,768	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=7928, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=7928)[0m 2020-03-23 15:46:48,746	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=7971)[0m 2020-03-23 15:46:48,729	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.01860406e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7971)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=7971)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 22 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-46-48.txt

[2m[36m(pid=7975)[0m 2020-03-23 15:46:52,607	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.09034845e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=7975)[0m   

[2m[36m(pid=8126)[0m Success.
[2m[36m(pid=8126)[0m 2020-03-23 15:47:39,034	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=8126)[0m 2020-03-23 15:47:39.160323: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=8126)[0m 2020-03-23 15:47:40,798	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=8126)[0m 
[2m[36m(pid=8126)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8126)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=8126)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8126)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=8126)[0m 2020-03-23 15:47:52,294	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=8126)[0m 2020-03-23 15:47:52,294	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=8126)[0m 2020-03-23 15:47:52,295	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=8126)[0m 2020-03-23 15:47:52,295	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=8126)[0m 2020-03-23 15:47:52,297	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=8126)[0m 
[2m[36m(pid=8126)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=8126)[0m                                   'env_id': 0,
[2m[36m(pid=8126)[0m                                   'info': None,
[2m[36m(pid=8126)[0m                      

2020-03-23 15:47:52,868	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=8083, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=8083)[0m 2020-03-23 15:47:52,852	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=8126)[0m 2020-03-23 15:47:52,839	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.27752530e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8126)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=8126)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 23 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-47-52.txt

[2m[36m(pid=8130)[0m 2020-03-23 15:47:56,355	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.18932872e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8130)[0m   

[2m[36m(pid=8281)[0m Success.
[2m[36m(pid=8281)[0m 2020-03-23 15:48:37,098	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=8281)[0m 2020-03-23 15:48:37.231405: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=8281)[0m 2020-03-23 15:48:38,885	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=8281)[0m 
[2m[36m(pid=8281)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8281)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=8281)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8281)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=8285)[0m Success.
[2m[36m(pid=8281)[0m 2020-03-23 15:48:51,797	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=8281)[0m 2020-03-23 15:48:51,798	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=8281)[0m 2020-03-23 15:48:51,798	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=8281)[0m 2020-03-23 15:48:51,799	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=8281)[0m 2020-03-23 15:48:51,805	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=8281)[0m 
[2m[36m(pid=8281)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=8281)[0m                                   'env_id': 0,
[2m[36m(pid=8281)[0m                                   'info': None,
[2m[36m(pi

2020-03-23 15:48:52,493	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=8238, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=8281)[0m 2020-03-23 15:48:52,401	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12436218e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8281)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=8281)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=8281)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=8281)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pid=828



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 24 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-48-52.txt

[2m[36m(pid=8285)[0m 2020-03-23 15:48:56,811	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.40087142e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8285)[0m   

[2m[36m(pid=8436)[0m Success.
[2m[36m(pid=8436)[0m 2020-03-23 15:49:43,020	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=8436)[0m 2020-03-23 15:49:43.150212: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=8436)[0m 2020-03-23 15:49:45,306	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=8436)[0m 
[2m[36m(pid=8436)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8436)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=8436)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8436)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=8440)[0m Success.
[2m[36m(pid=8436)[0m 2020-03-23 15:50:00,089	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=8436)[0m 
[2m[36m(pid=8436)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=1.386, max=1.386, mean=1.386),
[2m[36m(pid=8436)[0m                       [],
[2m[36m(pid=8436)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.154, max=0.154, mean=0.154),
[2m[36m(pid=8436)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.001, max=0.005, mean=0.002),
[2m[36m(pid=8436)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.002, max=-0.002, mean=-0.002)})}
[2m[36m(pid=8436)[0m 


2020-03-23 15:50:00,486	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=8393, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=8393)[0m 2020-03-23 15:50:00,429	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=8436)[0m 2020-03-23 15:50:00,419	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.06400062e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8436)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=8436)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 25 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-50-00.txt

[2m[36m(pid=8440)[0m 2020-03-23 15:50:04,733	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.02705721e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8440)[0m   

[2m[36m(pid=8591)[0m Success.
[2m[36m(pid=8591)[0m 2020-03-23 15:50:49,300	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=8591)[0m 2020-03-23 15:50:49.464411: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=8591)[0m 2020-03-23 15:50:51,316	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=8591)[0m 
[2m[36m(pid=8591)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8591)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=8591)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8591)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=8591)[0m 2020-03-23 15:51:04,180	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=8591)[0m 
[2m[36m(pid=8591)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=1.764, max=1.764, mean=1.764),
[2m[36m(pid=8591)[0m                       [],
[2m[36m(pid=8591)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.085, max=0.085, mean=0.085),
[2m[36m(pid=8591)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.001, max=0.004, mean=0.002),
[2m[36m(pid=8591)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.006, max=0.006, mean=0.006)})}
[2m[36m(pid=8591)[0m 


2020-03-23 15:51:04,671	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=8548, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=8591)[0m 2020-03-23 15:51:04,588	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.15245307e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8591)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=8591)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=8591)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=8591)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pid=859

2020-03-23 15:51:04,774	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 26 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-51-04.txt

[2m[36m(pid=8595)[0m 2020-03-23 15:51:09,461	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.30750823e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8595)[0m   

[2m[36m(pid=8746)[0m Success.
[2m[36m(pid=8746)[0m 2020-03-23 15:51:53,607	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=8746)[0m 2020-03-23 15:51:53.777948: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=8746)[0m 2020-03-23 15:51:56,069	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=8746)[0m 
[2m[36m(pid=8746)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8746)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=8746)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8746)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=8750)[0m Success.
[2m[36m(pid=8746)[0m 2020-03-23 15:52:10,511	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=8746)[0m 2020-03-23 15:52:10,511	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=8746)[0m 2020-03-23 15:52:10,512	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=8746)[0m 2020-03-23 15:52:10,513	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=8746)[0m 2020-03-23 15:52:10,515	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=8746)[0m 
[2m[36m(pid=8746)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=8746)[0m                                   'env_id': 0,
[2m[36m(pid=8746)[0m                                   'info': None,
[2m[36m(pi

2020-03-23 15:52:11,131	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=8703, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=8703)[0m 2020-03-23 15:52:11,091	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=8746)[0m 2020-03-23 15:52:11,081	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.19702108e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8746)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=8746)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 27 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-52-11.txt

[2m[36m(pid=8750)[0m 2020-03-23 15:52:15,026	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.20846685e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8750)[0m   

[2m[36m(pid=8901)[0m Success.
[2m[36m(pid=8901)[0m 2020-03-23 15:53:00,601	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=8901)[0m 2020-03-23 15:53:00.730951: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=8901)[0m 2020-03-23 15:53:03,150	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=8901)[0m 
[2m[36m(pid=8901)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8901)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=8901)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=8901)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=8905)[0m Success.
[2m[36m(pid=8901)[0m 2020-03-23 15:53:17,068	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=8901)[0m 
[2m[36m(pid=8901)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=1.243, max=1.243, mean=1.243),
[2m[36m(pid=8901)[0m                       [],
[2m[36m(pid=8901)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.184, max=0.184, mean=0.184),
[2m[36m(pid=8901)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.001, max=-0.001, mean=-0.001),
[2m[36m(pid=8901)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.009, max=0.009, mean=0.009)})}
[2m[36m(pid=8901)[0m 


2020-03-23 15:53:17,498	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=8858, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=8858)[0m 2020-03-23 15:53:17,459	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=8901)[0m 2020-03-23 15:53:17,440	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.26874750e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8901)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=8901)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:53:17,622	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 28 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-53-17.txt

[2m[36m(pid=8905)[0m 2020-03-23 15:53:21,739	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.02122485e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=8905)[0m   

[2m[36m(pid=9056)[0m Success.
[2m[36m(pid=9056)[0m 2020-03-23 15:54:07,505	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9056)[0m 2020-03-23 15:54:07.640934: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9056)[0m 2020-03-23 15:54:09,633	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=9056)[0m 
[2m[36m(pid=9056)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9056)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=9056)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9056)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=9060)[0m Success.
[2m[36m(pid=9056)[0m 2020-03-23 15:54:23,782	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=9056)[0m 
[2m[36m(pid=9056)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=0.463, max=0.463, mean=0.463),
[2m[36m(pid=9056)[0m                       [],
[2m[36m(pid=9056)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.361, max=0.361, mean=0.361),
[2m[36m(pid=9056)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.007, max=0.005, mean=-0.001),
[2m[36m(pid=9056)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.002, max=0.002, mean=0.002)})}
[2m[36m(pid=9056)[0m 


2020-03-23 15:54:24,208	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=9013, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=9013)[0m 2020-03-23 15:54:24,148	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=9056)[0m 2020-03-23 15:54:24,138	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12262719e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9056)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=9056)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 29 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-54-24.txt

[2m[36m(pid=9060)[0m 2020-03-23 15:54:28,110	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.15895837e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9060)[0m   

[2m[36m(pid=9211)[0m Success.
[2m[36m(pid=9211)[0m 2020-03-23 15:55:15,838	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9211)[0m 2020-03-23 15:55:15.989763: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9211)[0m 2020-03-23 15:55:17,762	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=9211)[0m 
[2m[36m(pid=9211)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9211)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=9211)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9211)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=9215)[0m Success.
[2m[36m(pid=9211)[0m 2020-03-23 15:55:32,019	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=9211)[0m 
[2m[36m(pid=9211)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=0.128, max=0.128, mean=0.128),
[2m[36m(pid=9211)[0m                       [],
[2m[36m(pid=9211)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.394, max=0.394, mean=0.394),
[2m[36m(pid=9211)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.012, max=0.002, mean=-0.005),
[2m[36m(pid=9211)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.011, max=-0.011, mean=-0.011)})}
[2m[36m(pid=9211)[0m 


2020-03-23 15:55:32,483	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=9168, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=9168)[0m 2020-03-23 15:55:32,434	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=9211)[0m 2020-03-23 15:55:32,411	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.14665031e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9211)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=9211)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:55:32,603	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 30 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-55-32.txt

[2m[36m(pid=9215)[0m 2020-03-23 15:55:36,547	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.19120888e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9215)[0m   

[2m[36m(pid=9366)[0m Success.
[2m[36m(pid=9366)[0m 2020-03-23 15:56:23,901	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9366)[0m 2020-03-23 15:56:24.034499: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9366)[0m 2020-03-23 15:56:25,955	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=9366)[0m 
[2m[36m(pid=9366)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9366)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=9366)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9366)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=9370)[0m Success.
[2m[36m(pid=9366)[0m 2020-03-23 15:56:38,582	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=9366)[0m 2020-03-23 15:56:38,588	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=9366)[0m 2020-03-23 15:56:38,593	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=9366)[0m 2020-03-23 15:56:38,593	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=9366)[0m 2020-03-23 15:56:38,605	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=9366)[0m 
[2m[36m(pid=9366)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=9366)[0m                                   'env_id': 0,
[2m[36m(pid=9366)[0m                                   'info': None,
[2m[36m(pi

2020-03-23 15:56:39,299	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=9323, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=9323)[0m 2020-03-23 15:56:39,282	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=9366)[0m 2020-03-23 15:56:39,273	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.12453429e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9366)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=9366)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 31 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-56-39.txt

[2m[36m(pid=9370)[0m 2020-03-23 15:56:43,494	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.64126701e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9370)[0m   

[2m[36m(pid=9521)[0m Success.
[2m[36m(pid=9521)[0m 2020-03-23 15:57:31,688	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9521)[0m 2020-03-23 15:57:31.810680: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9521)[0m 2020-03-23 15:57:33,889	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=9521)[0m 
[2m[36m(pid=9521)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9521)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=9521)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9521)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=9525)[0m Success.
[2m[36m(pid=9521)[0m 2020-03-23 15:57:47,971	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=9521)[0m 
[2m[36m(pid=9521)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=1.004, max=1.004, mean=1.004),
[2m[36m(pid=9521)[0m                       [],
[2m[36m(pid=9521)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.24, max=0.24, mean=0.24),
[2m[36m(pid=9521)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.012, max=-0.003, mean=-0.007),
[2m[36m(pid=9521)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.005, max=0.005, mean=0.005)})}
[2m[36m(pid=9521)[0m 


2020-03-23 15:57:48,481	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=9478, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=9478)[0m 2020-03-23 15:57:48,448	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=9521)[0m 2020-03-23 15:57:48,433	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07246656e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9521)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=9521)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,


2020-03-23 15:57:48,582	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.



[2m[36m(pid=9521)[0m        1.38613757e+00, 1.38613757e+00, 1.38613757e+00, 1.36916417e+00,
[2m[36m(pid=9521)[0m        1.36834513e+00, 1.36834513e+00, 1.36834513e+00, 1.36834513e+00,
[2m[36m(pid=9521)[0m        1.36834513e+00, 1.36834513e+00, 1.36834518e+00, 1.36838931e+00,
[2m[36m(pid=9521)[0m        1.38613757e+00, 1.87778272e-01])], <tf.Tensor 'default_policy/action:0' shape=(?, 1) dtype=float32>: [array([0.42911097], dtype=float32)], <tf.Tensor 'default_policy/prev_reward:0' shape=(?,) dtype=float32>: [1.2646320263163011], <tf.Tensor 'default_policy/PlaceholderWithDefault:0' shape=() dtype=bool>: False}
[2m[36m(pid=9521)[0m Traceback (most recent call last):
[2m[36m(pid=9521)[0m   File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 48, in get
[2m[36m(pid=9521)[0m     self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
[2m[36m(pid=9521)[0m   File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.8/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 32 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-57-48.txt

[2m[36m(pid=9525)[0m 2020-03-23 15:57:52,934	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07837139e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9525)[0m   

[2m[36m(pid=9676)[0m Success.
[2m[36m(pid=9676)[0m 2020-03-23 15:58:46,166	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9676)[0m 2020-03-23 15:58:46.310716: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9676)[0m 2020-03-23 15:58:48,457	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=9676)[0m 
[2m[36m(pid=9676)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9676)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=9676)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9676)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=9676)[0m 2020-03-23 15:59:05,630	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=9676)[0m 
[2m[36m(pid=9676)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=2.067, max=2.067, mean=2.067),
[2m[36m(pid=9676)[0m                       [],
[2m[36m(pid=9676)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.047, max=0.047, mean=0.047),
[2m[36m(pid=9676)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.002, max=0.004, mean=0.001),
[2m[36m(pid=9676)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.009, max=0.009, mean=0.009)})}
[2m[36m(pid=9676)[0m 
[2m[36m(pid=9680)[0m Success.


2020-03-23 15:59:06,699	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=9633, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.

[2m[36m(pid=9633)[0m 2020-03-23 15:59:06,662	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=9676)[0m 2020-03-23 15:59:06,634	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.08207737e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9676)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=9676)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 33 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_15-59-06.txt

[2m[36m(pid=9680)[0m 2020-03-23 15:59:11,833	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.45396683e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9680)[0m   

2020-03-23 15:59:17,683	ERROR worker.py:1654 -- Possible unhandled error from worker: [36mray_RolloutWorker:sample()[39m (pid=9680, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 48, in get
    self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 94, in run_timeline
    fetches = sess.run(ops, feed_dict=feed_dict)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1111, in _run
    str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1, 46) for Tensor 'default_policy/observation:0', which has shape '(?, 44)'

During handling of the above exception, another exception occurred:

[36mray_RolloutWorker:sample()[39m (pid=9680, h

[2m[36m(pid=9788)[0m Success.
[2m[36m(pid=9788)[0m 2020-03-23 15:59:26,584	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9788)[0m 2020-03-23 15:59:26.586251: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9788)[0m 2020-03-23 15:59:27,242	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=9788)[0m 
[2m[36m(pid=9788)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9788)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=9788)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9788)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[

[2m[36m(pid=9835)[0m Success.
[2m[36m(pid=9788)[0m 2020-03-23 16:00:09,802	INFO trainable.py:105 -- _setup took 47.771 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=9835)[0m 2020-03-23 16:00:10,327	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9835)[0m 2020-03-23 16:00:10.483128: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9831)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=9835)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=9831)[0m 2020-03-23 16:00:19,662	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=9831)[0m Success.
[2m[36m(pid=9831)[0m 2020-03-23 16:00:22

2020-03-23 16:00:24,349	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=9788, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.



2020-03-23 16:00:24,454	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


[2m[36m(pid=9831)[0m 2020-03-23 16:00:24,312	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.08925910e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9831)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=9831)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=9831)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=9831)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pid=983

[2m[36m(pid=9943)[0m 2020-03-23 16:00:43,488	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=9943)[0m 2020-03-23 16:00:43.503735: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=9943)[0m 2020-03-23 16:00:44,828	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=9943)[0m 
[2m[36m(pid=9943)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9943)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=9943)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=9943)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=9943)[0m   'dones': 

[2m[36m(pid=9986)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=9990)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=9986)[0m 2020-03-23 16:01:39,588	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=9986)[0m Success.
[2m[36m(pid=9986)[0m 2020-03-23 16:01:42,809	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=9986)[0m 2020-03-23 16:01:42,810	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=9986)[0m 2020-03-23 16:01:42,818	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=9986)[0m 2020-03-23 16:01:42,818	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=9986)[0m 2020-03-23 16:01:42,826	INFO

2020-03-23 16:01:44,060	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=9943, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self.



2020-03-23 16:01:44,172	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


[2m[36m(pid=9943)[0m 2020-03-23 16:01:44,036	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=9986)[0m 2020-03-23 16:01:44,015	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.10223992e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9986)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=9986)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 35 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-01-44.txt

[2m[36m(pid=9990)[0m 2020-03-23 16:01:48,642	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07688602e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=9990)[0m   

[2m[36m(pid=10141)[0m Success.
[2m[36m(pid=10141)[0m 2020-03-23 16:02:45,887	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10141)[0m 2020-03-23 16:02:46.079228: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10141)[0m 2020-03-23 16:02:48,578	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10141)[0m 
[2m[36m(pid=10141)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10141)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=10141)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10141)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=flo

[2m[36m(pid=10141)[0m 2020-03-23 16:03:04,832	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=10141)[0m 
[2m[36m(pid=10141)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=0.274, max=0.274, mean=0.274),
[2m[36m(pid=10141)[0m                       [],
[2m[36m(pid=10141)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.381, max=0.381, mean=0.381),
[2m[36m(pid=10141)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.007, max=0.006, mean=-0.001),
[2m[36m(pid=10141)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.002, max=0.002, mean=0.002)})}
[2m[36m(pid=10141)[0m 
[2m[36m(pid=10145)[0m Success.


2020-03-23 16:03:05,577	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10098, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self



2020-03-23 16:03:05,706	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


[2m[36m(pid=10141)[0m 2020-03-23 16:03:05,510	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.22849678e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10141)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=10141)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=10141)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=10141)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 36 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-03-05.txt

[2m[36m(pid=10145)[0m 2020-03-23 16:03:10,467	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.43688484e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10145)[0m 

[2m[36m(pid=10296)[0m Success.
[2m[36m(pid=10296)[0m 2020-03-23 16:04:03,694	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10296)[0m 2020-03-23 16:04:03.879418: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10296)[0m 2020-03-23 16:04:06,125	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10296)[0m 
[2m[36m(pid=10296)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10296)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=10296)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10296)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=flo

[2m[36m(pid=10296)[0m 2020-03-23 16:04:23,928	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=10296)[0m 
[2m[36m(pid=10296)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=1.098, max=1.098, mean=1.098),
[2m[36m(pid=10296)[0m                       [],
[2m[36m(pid=10296)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.217, max=0.217, mean=0.217),
[2m[36m(pid=10296)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.006, max=-0.004, mean=-0.005),
[2m[36m(pid=10296)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.003, max=-0.003, mean=-0.003)})}
[2m[36m(pid=10296)[0m 
[2m[36m(pid=10300)[0m Success.


2020-03-23 16:04:24,942	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10253, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=10253)[0m 2020-03-23 16:04:24,925	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10296)[0m 2020-03-23 16:04:24,858	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07086036e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10296)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=10296)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+

2020-03-23 16:04:35,943	ERROR worker.py:1654 -- Possible unhandled error from worker: [36mray_RolloutWorker:sample()[39m (pid=10300, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 48, in get
    self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 94, in run_timeline
    fetches = sess.run(ops, feed_dict=feed_dict)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1111, in _run
    str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1, 46) for Tensor 'default_policy/observation:0', which has shape '(?, 44)'

During handling of the above exception, another exception occurred:

[36mray_RolloutWorker:sample()[39m (pid=10300,

[2m[36m(pid=10408)[0m Success.
[2m[36m(pid=10408)[0m 2020-03-23 16:04:46,610	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10408)[0m 2020-03-23 16:04:46.616473: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10408)[0m 2020-03-23 16:04:47,452	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10408)[0m 
[2m[36m(pid=10408)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10408)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=10408)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10408)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=flo

[2m[36m(pid=10455)[0m Success.
[2m[36m(pid=10408)[0m 2020-03-23 16:05:34,565	INFO trainable.py:105 -- _setup took 54.365 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=10455)[0m 2020-03-23 16:05:34,658	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10455)[0m 2020-03-23 16:05:34.775081: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10451)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=10455)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=10451)[0m 2020-03-23 16:05:47,436	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=10451)[0m Success.
[2m[36m(pid=10451)[0m 2020-03-23

2020-03-23 16:05:54,228	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10408, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=10408)[0m 2020-03-23 16:05:54,182	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10451)[0m 2020-03-23 16:05:54,146	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.11677300e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10451)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=10451)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 38 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-05-54.txt

[2m[36m(pid=10455)[0m 2020-03-23 16:05:58,913	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.39395888e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10455)[0m 

[2m[36m(pid=10606)[0m Success.
[2m[36m(pid=10606)[0m 2020-03-23 16:07:08,805	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10606)[0m 2020-03-23 16:07:08.947745: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10563)[0m 2020-03-23 16:07:09,508	INFO trainable.py:105 -- _setup took 57.707 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=10610)[0m Success.
[2m[36m(pid=10606)[0m 2020-03-23 16:07:10,513	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10606)[0m 
[2m[36m(pid=10606)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10606)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=

[2m[36m(pid=10606)[0m 2020-03-23 16:07:28,540	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=10606)[0m 2020-03-23 16:07:28,564	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10606)[0m 2020-03-23 16:07:28,565	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=10606)[0m 2020-03-23 16:07:28,565	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=10606)[0m 2020-03-23 16:07:28,589	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10606)[0m 
[2m[36m(pid=10606)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10606)[0m                                   'env_id': 0,
[2m[36m(pid=10606)[0m                                   'info': None,
[2m[36m(pid=10606)[0m            

2020-03-23 16:07:31,755	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10563, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=10606)[0m 2020-03-23 16:07:31,467	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.15840790e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10606)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=10606)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=10606)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=10606)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi

2020-03-23 16:07:32,771	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 39 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-07-31.txt

[2m[36m(pid=10720)[0m Success.
[2m[36m(pid=10720)[0m 2020-03-23 16:08:04,655	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10720)[0m 2020-03-23 16:08:04.657844: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10720)[0m 2020-03-23 16:08:05,379	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10720)[0m 
[2m[36m(pid=10720)[0m { 'action_prob': <tf.Tensor 'default_policy/a

[2m[36m(pid=10761)[0m 2020-03-23 16:09:01,131	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10761)[0m 2020-03-23 16:09:01.275542: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10765)[0m 2020-03-23 16:09:02,636	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10765)[0m 2020-03-23 16:09:02.777280: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10761)[0m 2020-03-23 16:09:02,873	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10761)[0m 
[2m[36m(pid=10761)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=fl



2020-03-23 16:09:18,474	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10720, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=10720)[0m 2020-03-23 16:09:18,388	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.
[2m[36m(pid=10761)[0m 2020-03-23 16:09:18,277	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.09412886e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10761)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=10761)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+

2020-03-23 16:09:18,806	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 40 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-09-18.txt

[2m[36m(pid=10765)[0m 2020-03-23 16:09:20,356	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.06438013e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10765)[0m 

[2m[36m(pid=10916)[0m Success.
[2m[36m(pid=10916)[0m 2020-03-23 16:10:22,872	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=10916)[0m 2020-03-23 16:10:23.030045: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=10916)[0m 2020-03-23 16:10:25,282	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=10916)[0m 
[2m[36m(pid=10916)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10916)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=10916)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=10916)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=flo

[2m[36m(pid=10916)[0m 2020-03-23 16:10:41,068	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=10916)[0m 2020-03-23 16:10:41,069	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=10916)[0m 2020-03-23 16:10:41,069	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=10916)[0m 2020-03-23 16:10:41,072	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=10916)[0m 2020-03-23 16:10:41,080	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=10916)[0m 
[2m[36m(pid=10916)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=10916)[0m                                   'env_id': 0,
[2m[36m(pid=10916)[0m                                   'info': None,
[2m[36m(pid=10916)[0m            

2020-03-23 16:10:42,512	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=10875, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=10916)[0m 2020-03-23 16:10:42,372	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.01634303e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10916)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=10916)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=10916)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=10916)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 41 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-10-42.txt

[2m[36m(pid=10920)[0m 2020-03-23 16:10:46,936	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07469086e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=10920)[0m 

[2m[36m(pid=11071)[0m Success.
[2m[36m(pid=11071)[0m 2020-03-23 16:12:00,101	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11071)[0m 2020-03-23 16:12:00.290872: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11075)[0m Success.
[2m[36m(pid=11071)[0m 2020-03-23 16:12:03,681	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11071)[0m 
[2m[36m(pid=11071)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11071)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=11071)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11071)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviou

[2m[36m(pid=11071)[0m 2020-03-23 16:12:25,412	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)}}
[2m[36m(pid=11071)[0m 2020-03-23 16:12:25,413	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=11071)[0m 2020-03-23 16:12:25,413	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=11071)[0m 2020-03-23 16:12:25,414	INFO sampler.py:407 -- Filtered obs: np.ndarray((44,), dtype=float64, min=0.0, max=2001.046, mean=512.057)
[2m[36m(pid=11071)[0m 2020-03-23 16:12:25,421	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=11071)[0m 
[2m[36m(pid=11071)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=11071)[0m                                   'env_id': 0,
[2m[36m(pid=11071)[0m                                   'info': None,
[2m[36m(pid=11071)[0m            

2020-03-23 16:12:29,598	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=11028, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=11028)[0m 2020-03-23 16:12:29,392	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.


2020-03-23 16:12:31,829	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 42 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-12-29.txt

[2m[36m(pid=11185)[0m Success.
[2m[36m(pid=11185)[0m 2020-03-23 16:13:45,970	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11185)[0m 2020-03-23 16:13:45.973983: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11185)[0m 2020-03-23 16:13:46,838	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11185)[0m 
[2m[36m(pid=11185)[0m { 'action_prob': <tf.Tensor 'default_policy/a

[2m[36m(pid=11230)[0m 2020-03-23 16:14:39,849	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11226)[0m 2020-03-23 16:14:39,858	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11226)[0m 2020-03-23 16:14:39.981049: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11230)[0m 2020-03-23 16:14:39.973714: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11226)[0m 2020-03-23 16:14:41,390	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11226)[0m 
[2m[36m(pid=11226)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=fl



2020-03-23 16:14:58,686	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=11185, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=11226)[0m 2020-03-23 16:14:58,303	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.42667406e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11226)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=11226)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=11226)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=11226)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi

2020-03-23 16:14:59,098	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 43 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-14-58.txt

[2m[36m(pid=11340)[0m Success.
[2m[36m(pid=11340)[0m 2020-03-23 16:16:38,097	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11340)[0m 2020-03-23 16:16:38.099423: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11340)[0m 2020-03-23 16:16:38,843	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11340)[0m 
[2m[36m(pid=11340)[0m { 'action_prob': <tf.Tensor 'default_policy/a

[2m[36m(pid=11381)[0m 2020-03-23 16:17:24,991	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11381)[0m 
[2m[36m(pid=11381)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11381)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=11381)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11381)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=11381)[0m   'dones': <tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=bool>,
[2m[36m(pid=11381)[0m   'new_obs': <tf.Tensor 'default_policy/new_obs:0' shape=(?, 44) dtype=float32>,
[2m[36m(pid=11381)[0m   'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>,
[2m[36m(pid=11381)[0m   'prev_actions': <tf.Tensor 'default_policy/action:0' shape=(?, 1

2020-03-23 16:17:40,809	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=11340, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=11381)[0m 2020-03-23 16:17:40,541	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.05235337e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11381)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=11381)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=11381)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=11381)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi

2020-03-23 16:17:40,921	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 44 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-17-40.txt

[2m[36m(pid=11385)[0m 2020-03-23 16:17:43,393	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.08049381e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11385)[0m 

[2m[36m(pid=11540)[0m Success.
[2m[36m(pid=11536)[0m Success.
[2m[36m(pid=11495)[0m 2020-03-23 16:19:25,260	INFO trainable.py:105 -- _setup took 62.783 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=11536)[0m 2020-03-23 16:19:27,233	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11536)[0m 2020-03-23 16:19:27.381378: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11540)[0m 2020-03-23 16:19:27,313	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11540)[0m 2020-03-23 16:19:27.468511: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to us

[2m[36m(pid=11536)[0m 2020-03-23 16:19:46,361	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=11536)[0m 
[2m[36m(pid=11536)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=-0.736, max=-0.736, mean=-0.736),
[2m[36m(pid=11536)[0m                       [],
[2m[36m(pid=11536)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.304, max=0.304, mean=0.304),
[2m[36m(pid=11536)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.003, max=0.005, mean=0.001),
[2m[36m(pid=11536)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.009, max=0.009, mean=0.009)})}
[2m[36m(pid=11536)[0m 


2020-03-23 16:19:47,861	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=11495, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=11540)[0m 2020-03-23 16:19:47,575	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.08216928e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11540)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=11540)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=11540)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=11540)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi

2020-03-23 16:19:47,974	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 45 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-19-47.txt

[2m[36m(pid=11650)[0m Success.
[2m[36m(pid=11650)[0m 2020-03-23 16:20:13,307	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11650)[0m 2020-03-23 16:20:13.309428: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11650)[0m 2020-03-23 16:20:14,318	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11650)[0m 
[2m[36m(pid=11650)[0m { 'action_prob': <tf.Tensor 'default_policy/a

[2m[36m(pid=11695)[0m Success.
[2m[36m(pid=11650)[0m 2020-03-23 16:21:01,565	INFO trainable.py:105 -- _setup took 54.115 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=11695)[0m 2020-03-23 16:21:01,844	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11695)[0m 2020-03-23 16:21:01.986750: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11691)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=11695)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=11691)[0m 2020-03-23 16:21:13,304	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=11691)[0m Success.
[2m[36m(pid=11691)[0m 2020-03-23



2020-03-23 16:21:19,947	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=11650, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=11691)[0m 2020-03-23 16:21:19,851	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.25268292e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11691)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=11691)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=11691)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=11691)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi



[2m[36m(pid=11650)[0m 2020-03-23 16:21:19,912	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.




== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 46 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-21-19.txt

[2m[36m(pid=11695)[0m 2020-03-23 16:21:23,843	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.08155661e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11695)[0m 

[2m[36m(pid=11846)[0m Success.
[2m[36m(pid=11846)[0m 2020-03-23 16:22:30,124	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=11846)[0m 2020-03-23 16:22:30.296089: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=11846)[0m 2020-03-23 16:22:32,478	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=11846)[0m 
[2m[36m(pid=11846)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11846)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=11846)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=11846)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=flo

[2m[36m(pid=11850)[0m Success.
[2m[36m(pid=11846)[0m 2020-03-23 16:22:53,089	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=11846)[0m 
[2m[36m(pid=11846)[0m { 'default_policy': ( np.ndarray((1, 1), dtype=float32, min=-1.276, max=-1.276, mean=-1.276),
[2m[36m(pid=11846)[0m                       [],
[2m[36m(pid=11846)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.179, max=0.179, mean=0.179),
[2m[36m(pid=11846)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.008, max=0.005, mean=-0.001),
[2m[36m(pid=11846)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.005, max=0.005, mean=0.005)})}
[2m[36m(pid=11846)[0m 


2020-03-23 16:22:53,669	ERROR trial_runner.py:550 -- Error processing event.
Traceback (most recent call last):
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 342, in fetch_result
    result = ray.get(trial_future[0])
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 2247, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=11805, host=ccc9c1a7c0dc)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 369, in train
    raise e
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 358, in train
    result = Trainable.train(self)
  File "/opt/conda/envs/flow/lib/python3.6/site-packages/ray/tune/trainable.py", line 171, in train
    result = self

[2m[36m(pid=11805)[0m 2020-03-23 16:22:53,652	INFO trainer.py:366 -- Worker crashed during call to train(). To attempt to continue training without the failed worker, set `'ignore_worker_failures': True`.


2020-03-23 16:22:53,810	INFO trial_runner.py:587 -- Attempting to recover trial state from last checkpoint.


[2m[36m(pid=11846)[0m 2020-03-23 16:22:53,629	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.07305681e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11846)[0m        3.05569667e+02, 3.35611040e+02, 3.64643263e+02, 3.93675485e+02,
[2m[36m(pid=11846)[0m        4.22707721e+02, 4.51750715e+02, 7.30122937e+02, 7.30122937e+02,
[2m[36m(pid=11846)[0m        1.74471888e+03, 1.77381613e+03, 1.80234836e+03, 1.83088058e+03,
[2m[36m(pid=11846)[0m        1.85941280e+03, 1.88794502e+03, 1.91647725e+03, 1.94500947e+03,
[2m[36m(pi



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/0 GPUs
Memory usage on this node: 0.9/1.0 GB
Result logdir: /root/ray_results/ISSY_RL_train
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_MyEnv-v0_0:	RUNNING, 47 failures: /root/ray_results/ISSY_RL_train/PPO_MyEnv-v0_0_2020-03-23_15-23-5844w02ijm/error_2020-03-23_16-22-53.txt

[2m[36m(pid=11850)[0m 2020-03-23 16:22:58,075	ERROR tf_run_builder.py:51 -- Error fetching: [<tf.Tensor 'default_policy/add:0' shape=(?, 1) dtype=float32>, {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'vf_preds': <tf.Tensor 'default_policy/value_function/Reshape:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/default_model_1/fc_net/fc_out/BiasAdd:0' shape=(?, 2) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 44) dtype=float32>: [array([5.23774632e+00, 6.02907553e+00, 3.53496037e+01, 3.53496037e+01,
[2m[36m(pid=11850)[0m 

[2m[36m(pid=12001)[0m Success.
[2m[36m(pid=12001)[0m 2020-03-23 16:23:51,123	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=12001)[0m 2020-03-23 16:23:51.264994: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
[2m[36m(pid=12001)[0m 2020-03-23 16:23:53,277	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=12001)[0m 
[2m[36m(pid=12001)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=12001)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 1) dtype=float32>,
[2m[36m(pid=12001)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=12001)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=flo

2020-03-23 16:24:14,219	ERROR worker.py:1716 -- listen_error_messages_raylet: Error 111 connecting to 172.17.0.2:41309. Connection refused.
2020-03-23 16:24:14,121	ERROR worker.py:1616 -- print_logs: Error 111 connecting to 172.17.0.2:41309. Connection refused.
2020-03-23 16:24:14,081	ERROR import_thread.py:89 -- ImportThread: Error 111 connecting to 172.17.0.2:41309. Connection refused.

KeyboardInterrupt



# /!\ Voir tutoriel 10 pour controle des feux