In [1]:
import rospy
import gym, ray
import numpy as np
import os
import torch.nn as nn

from ray import tune
from ray.rllib.agents import ppo
from ray.tune.registry import register_env
from scouting_gym.tasks.scouting_discrete_task import ScoutingDiscreteTask

from ray.tune import grid_search
from ray.rllib.models import ModelCatalog
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.torch.visionnet import VisionNetwork
import ray.rllib.agents.ppo as ppo
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.dqn import DQNTrainer
from ray.rllib.models.tf.misc import normc_initializer
from ray.tune.logger import pretty_print
from matplotlib import pyplot as plt
import tensorflow as tf

Instructions for updating:
non-resource variables are not supported in the long term
None


In [2]:
env = gym.make('Scouting-v0')

print(env.observation_space)

[ERROR] [1610548342.794446, 0.000000]: NOT Initialising Simulation Physics Parameters
[WARN] [1610548342.796945, 0.000000]: Start Init ControllersConnection
[WARN] [1610548342.797597, 0.000000]: END Init ControllersConnection


Box(0.0, 10.0, (500,), float32)


In [3]:
env.reset()
for _ in range(10):
    obs, reward, done, _ = env.step(1)
print(obs.min())
print(obs.max())

1.1092607
6.8552985


In [4]:
class CustomModel(TFModelV2):
    """Example of a keras custom model that just delegates to an fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(CustomModel, self).__init__(obs_space, action_space, num_outputs,
                                          model_config, name)
        self.model = FullyConnectedNetwork(obs_space, action_space,
                                           num_outputs, model_config, name)
        self.register_variables(self.model.variables())

    def forward(self, input_dict, state, seq_lens):
        return self.model.forward(input_dict, state, seq_lens)

    def value_function(self):
        return self.model.value_function()

class MyKerasModel(TFModelV2):
    """Custom model for policy gradient algorithms."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(MyKerasModel, self).__init__(obs_space, action_space,
                                           num_outputs, model_config, name)
        self.inputs = tf.keras.layers.Input(
            shape=obs_space.shape, name="observations")

        layer_dense_1 = tf.keras.layers.Dense(
            256,
            name="Dense1",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(self.inputs)

        layer_dense_2 = tf.keras.layers.Dense(
            128,
            name="Dense2",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(layer_dense_1)
        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="my_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(layer_dense_2)
        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(layer_dense_2)
        self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
        self.register_variables(self.base_model.variables)

    def forward(self, input_dict, state, seq_lens):
        model_out, self._value_out = self.base_model(input_dict["obs"])
        return model_out, state

    def value_function(self):
        return tf.reshape(self._value_out, [-1])

    def metrics(self):
        return {"foo": tf.constant(42.0)}

In [5]:
ray.init()

ModelCatalog.register_custom_model(
    "my_model", CustomModel)

config = {
    "env": ScoutingDiscreteTask,  # or "corridor" if registered above
    "env_config": {
        "corridor_length": 5,
    },
    # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.

    "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
    "model": {
        "custom_model": "my_model",
    },
    "num_workers": 1,  # parallelism
}



stop = {
    "training_iteration": 500,
    "timesteps_total": 1000000
}

2021-01-13 15:32:26,032	INFO services.py:1171 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [6]:
trainer = DQNTrainer(config=config)
policy = trainer.get_policy()

print(policy.model.model.base_model.summary())

2021-01-13 15:32:28,557	INFO trainer.py:591 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2021-01-13 15:32:28,558	INFO trainer.py:616 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=2410893)[0m Instructions for updating:
[2m[36m(pid=2410893)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2410893)[0m [ERROR] [1610548351.316829, 0.000000]: NOT Initialising Simulation Physics Parameters
[2m[36m(pid=2410893)[0m [WARN] [1610548351.320070, 0.000000]: Start Init ControllersConnection
[2m[36m(pid=2410893)[0m [WARN] [1610548351.320991, 0.000000]: END Init ControllersConnection


[2m[36m(pid=2410893)[0m None
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 500)]        0                                            
__________________________________________________________________________________________________
fc_1 (Dense)                    (None, 256)          128256      observations[0][0]               
__________________________________________________________________________________________________
fc_out (Dense)                  (None, 256)          65792       fc_1[0][0]                       
__________________________________________________________________________________________________
value_out (Dense)               (None, 1)            257         fc_1[0][0]                       
Total params: 194,305
Trainable params: 194,305
Non-trainable 

In [7]:
results = []
for i in range(1000):
    result = trainer.train()
    results.append(result)

    if i % 5 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)

    if i % 20 == 0:
        print(pretty_print(result))

[2m[36m(pid=2410893)[0m Instructions for updating:
[2m[36m(pid=2410893)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.
checkpoint saved at /home/dschori/ray_results/DQN_ScoutingDiscreteTask_2021-01-13_15-32-28bt4ptmxt/checkpoint_1/checkpoint-1
custom_metrics: {}
date: 2021-01-13_15-33-06
done: false
episode_len_mean: 135.83333333333334
episode_reward_max: -195.95
episode_reward_mean: -203.59166666666667
episode_reward_min: -208.29999999999998
episodes_this_iter: 6
episodes_total: 6
experiment_id: 774e2ab249634f6690119605a078a679
hostname: workstation
info:
  last_target_update_ts: 1000
  learner:
    default_policy:
      cur_lr: 0.0005000000237487257
      max_q: 1.1383541822433472
      mean_q: 0.27207210659980774
      mean_td_error: -0.45600634813308716
      min_q: -1.2729443311691284
      model: {}
  num_steps_sampled: 1000
  num_steps_trained: 32
  num_target_updates: 1
iterations_since_restore: 1
node_ip: 192.168.178.60
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 68.51333333333

TypeError: object of type 'NoneType' has no len()