# WasteNet

## Setup

In [None]:
# Colab
!rm -r smart-cities-drl
!git clone https://github.com/eescriba/smart-cities-drl
!cd smart-cities-drl/ && pip install -e .

import sys
sys.path.insert(0,'./smart-cities-drl/src/')

In [25]:
# Local
# !pip install -e ..
# import sys
# sys.path.insert(0,'../src/')

Obtaining file:///home/teto/workspace/master/smart-cities-drl
Installing collected packages: smart-cities-drl
  Attempting uninstall: smart-cities-drl
    Found existing installation: smart-cities-drl 0.1.0
    Uninstalling smart-cities-drl-0.1.0:
      Successfully uninstalled smart-cities-drl-0.1.0
  Running setup.py develop for smart-cities-drl
Successfully installed smart-cities-drl-0.1.0


In [26]:
import json
import shutil
import random

import gym
import ray
import ray.rllib.agents.ppo as ppo
from ray.tune import run, sample_from
from ray.tune.registry import register_env
from ray.tune.schedulers import PopulationBasedTraining
import tensorflow as tf

from wastenet.env import WasteNetEnv

In [32]:
register_env("WasteNet-v0", lambda config: WasteNetEnv(config))
num_gpus = len(tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", num_gpus)

Num GPUs Available:  0


In [33]:
ray.shutdown()
ray.init(ignore_reinit_error=True, num_gpus=num_gpus)

2021-05-23 18:01:13,196	INFO services.py:1267 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


{'node_ip_address': '192.168.100.180',
 'raylet_ip_address': '192.168.100.180',
 'redis_address': '192.168.100.180:6379',
 'object_store_address': '/tmp/ray/session_2021-05-23_18-01-10_132184_46580/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-05-23_18-01-10_132184_46580/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2021-05-23_18-01-10_132184_46580',
 'metrics_export_port': 47135,
 'node_id': '54ad67fc224d4ee2d2b27f9b809a3100bbd8caf4ff2177a84199c3f2'}

## Proximal Policy Optimization (PPO)

In [None]:
ray.shutdown()
ray.init(ignore_reinit_error=True)

CHECKPOINT_ROOT = "tmp/ppo/wastenet"
shutil.rmtree(CHECKPOINT_ROOT, ignore_errors=True, onerror=None)

config = ppo.DEFAULT_CONFIG.copy()
config["num_gpus"] = 1

agent = ppo.PPOTrainer(config, env=WasteNetEnv)

N_ITER = 1000
results = []
episode_data = []
episode_json = []

for n in range(N_ITER):
    result = agent.train()
    results.append(result)
    
    episode = {'n': n, 
               'episode_reward_min': result['episode_reward_min'], 
               'episode_reward_mean': result['episode_reward_mean'], 
               'episode_reward_max': result['episode_reward_max'],  
               'episode_len_mean': result['episode_len_mean']
              }
    
    episode_data.append(episode)
    episode_json.append(json.dumps(episode))
    file_name = agent.save(CHECKPOINT_ROOT)
    
    print(f'{n+1:3d}: Min/Mean/Max reward: {result["episode_reward_min"]:8.4f}/{result["episode_reward_mean"]:8.4f}/{result["episode_reward_max"]:8.4f}, len mean: {result["episode_len_mean"]:8.4f}. Checkpoint saved to {file_name}')

In [None]:
policy = agent.get_policy()
model = policy.model
print(model.base_model.summary())

In [None]:
obs = env.reset()
done = False
episode_reward = 0
sum_reward = 0
n_step = 1000
for step in range(n_step):
    action = agent.compute_action(obs)
    print(action)
    state, reward, done, info = env.step(action)
    print(state, reward, done, info)
    sum_reward += reward
    if done:
        print("cumulative reward", sum_reward)
        state = env.reset()
        sum_reward = 0

## Tuned PPO

In [21]:
# Population Based Training (PBT)

# Postprocess the perturbed config to ensure it's still valid used if PBT.
def explore(config):
    # ensure we collect enough timesteps to do sgd
    if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
        config["train_batch_size"] = config["sgd_minibatch_size"] * 2
    # ensure we run at least one sgd iter
    if config["num_sgd_iter"] < 1:
        config["num_sgd_iter"] = 1
    return config

pbt = PopulationBasedTraining(
    time_attr="time_total_s",
    perturbation_interval=120,
    resample_probability=0.25,
    metric="episode_reward_mean",
    mode="max",
    # Specifies the mutations of these hyperparams
    hyperparam_mutations={
        "lambda": lambda: random.uniform(0.7, 1.0),
        "clip_param": lambda: random.uniform(0.01, 0.5),
        "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
        "num_sgd_iter": lambda: random.randint(1, 30),
        "sgd_minibatch_size": lambda: random.randint(128, 16384),
        "train_batch_size": lambda: random.randint(2000, 160000),
    },
    custom_explore_fn=explore)

In [22]:
analysis = run(
        'PPO',
        name="wastenet_ppo",
        scheduler=pbt,
        num_samples=8,
        config={
            "env": WasteNetEnv,
            "seed": 123,
            "num_gpus": num_gpus,
            "num_workers": 1,
            "observation_filter": "MeanStdFilter",
            "model": {
                # "fcnet_hiddens": [
                #     32,
                #     32
                # ],
                "free_log_std": True
            },
            "num_sgd_iter": 10,
            "sgd_minibatch_size": 128,
            "lambda": sample_from(lambda spec: random.uniform(0.9, 1.0)),
            "clip_param": sample_from(lambda spec: random.uniform(0.1, 0.5)),
            "lr": sample_from(lambda spec: random.uniform(1e-3, 1e-5)),
            "train_batch_size": sample_from(
                lambda spec: random.randint(1000, 60000))
        })
print("best hyperparameters: ", analysis.best_config)

Trial name,status,loc,clip_param,lambda,lr,train_batch_size
PPO_WasteNetEnv_9c2b2_00000,RUNNING,,0.207877,0.931832,0.000220299,58269
PPO_WasteNetEnv_9c2b2_00001,PENDING,,0.282321,0.952828,0.000286907,7605
PPO_WasteNetEnv_9c2b2_00002,PENDING,,0.441161,0.96429,9.79392e-05,11860
PPO_WasteNetEnv_9c2b2_00003,PENDING,,0.269488,0.952489,0.000584088,42266
PPO_WasteNetEnv_9c2b2_00004,PENDING,,0.248345,0.943345,0.000561721,40877
PPO_WasteNetEnv_9c2b2_00005,PENDING,,0.444519,0.944545,0.000637151,14377
PPO_WasteNetEnv_9c2b2_00006,PENDING,,0.45855,0.952304,0.000755521,19818
PPO_WasteNetEnv_9c2b2_00007,PENDING,,0.232987,0.922463,0.000356169,58339


[2m[36m(pid=51864)[0m Instructions for updating:
[2m[36m(pid=51864)[0m non-resource variables are not supported in the long term
[2m[36m(pid=51864)[0m Instructions for updating:
[2m[36m(pid=51864)[0m non-resource variables are not supported in the long term
[2m[36m(pid=51864)[0m Instructions for updating:
[2m[36m(pid=51864)[0m non-resource variables are not supported in the long term
[2m[36m(pid=51864)[0m Instructions for updating:
[2m[36m(pid=51864)[0m non-resource variables are not supported in the long term
[2m[36m(pid=51864)[0m Instructions for updating:
[2m[36m(pid=51864)[0m non-resource variables are not supported in the long term
[2m[36m(pid=51864)[0m Instructions for updating:
[2m[36m(pid=51864)[0m non-resource variables are not supported in the long term
[2m[36m(pid=51864)[0m 2021-05-23 17:36:19,350	INFO trainer.py:669 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=51864)[0m 2021-05-23 17

Trial name,status,loc,clip_param,lambda,lr,train_batch_size
PPO_WasteNetEnv_9c2b2_00000,RUNNING,,0.207877,0.931832,0.000220299,58269
PPO_WasteNetEnv_9c2b2_00001,PENDING,,0.282321,0.952828,0.000286907,7605
PPO_WasteNetEnv_9c2b2_00002,PENDING,,0.441161,0.96429,9.79392e-05,11860
PPO_WasteNetEnv_9c2b2_00003,PENDING,,0.269488,0.952489,0.000584088,42266
PPO_WasteNetEnv_9c2b2_00004,PENDING,,0.248345,0.943345,0.000561721,40877
PPO_WasteNetEnv_9c2b2_00005,PENDING,,0.444519,0.944545,0.000637151,14377
PPO_WasteNetEnv_9c2b2_00006,PENDING,,0.45855,0.952304,0.000755521,19818
PPO_WasteNetEnv_9c2b2_00007,PENDING,,0.232987,0.922463,0.000356169,58339


ce/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 516, in __init__
[2m[36m(pid=51864)[0m     super().__init__(config, logger_creator)
[2m[36m(pid=51864)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/tune/trainable.py", line 98, in __init__
[2m[36m(pid=51864)[0m     self.setup(copy.deepcopy(self.config))
[2m[36m(pid=51864)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 707, in setup
[2m[36m(pid=51864)[0m     self._init(self.config, self.env_creator)
[2m[36m(pid=51864)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/rllib/agents/trainer_template.py", line 148, in _init
[2m[36m(pid=51864)[0m     self.workers = self._make_workers(
[2m[36m(pid=51864)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/rllib/agents/tr

Trial name,status,loc,clip_param,lambda,lr,train_batch_size
PPO_WasteNetEnv_9c2b2_00001,PENDING,,0.282321,0.952828,0.000286907,7605
PPO_WasteNetEnv_9c2b2_00002,PENDING,,0.441161,0.96429,9.79392e-05,11860
PPO_WasteNetEnv_9c2b2_00003,PENDING,,0.269488,0.952489,0.000584088,42266
PPO_WasteNetEnv_9c2b2_00004,PENDING,,0.248345,0.943345,0.000561721,40877
PPO_WasteNetEnv_9c2b2_00005,PENDING,,0.444519,0.944545,0.000637151,14377
PPO_WasteNetEnv_9c2b2_00006,PENDING,,0.45855,0.952304,0.000755521,19818
PPO_WasteNetEnv_9c2b2_00007,PENDING,,0.232987,0.922463,0.000356169,58339
PPO_WasteNetEnv_9c2b2_00000,ERROR,,0.207877,0.931832,0.000220299,58269

Trial name,# failures,error file
PPO_WasteNetEnv_9c2b2_00000,1,"/home/teto/ray_results/prueba_ppo/PPO_WasteNetEnv_9c2b2_00000_0_clip_param=0.20788,lambda=0.93183,lr=0.0002203,train_batch_size=58269_2021-05-23_17-36-15/error.txt"


ecute_task
[2m[36m(pid=51865)[0m   File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
[2m[36m(pid=51865)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
[2m[36m(pid=51865)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(pid=51865)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/rllib/agents/trainer_template.py", line 121, in __init__
[2m[36m(pid=51865)[0m     Trainer.__init__(self, config, env, logger_creator)
[2m[36m(pid=51865)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 516, in __init__
[2m[36m(pid=51865)[0m     super().__init__(config, logger_creator)
[2m[36m(pid=51865)[0m   File "/home/teto/workspace/master/smart-cities-drl/venv/lib/python3.8/site-packages/ray/tune/trainable.py", 

KeyboardInterrupt: 

## Evaluation

In [None]:
!rllib rollout ~/ray_results/... --run PPO --env WasteNet-v0 --config best_config --steps 10000

In [14]:
!tensorboard --logdir=$HOME/ray_results/

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.4.1 at http://localhost:6006/ (Press CTRL+C to quit)
^C
