# WasteNet

## Setup

In [None]:
# Colab
!rm -r smart-cities-drl
!git clone https://github.com/eescriba/smart-cities-drl
!cd smart-cities-drl/ && pip install -e .

import sys
sys.path.insert(0,'./smart-cities-drl/src/')

# Local
# !pip install -e ..
# import sys
# sys.path.insert(0,'../src/')

In [5]:
import json
import shutil
import random

import gym
import ray
from ray.rllib.agents.ppo import DEFAULT_CONFIG
from ray.tune import run, sample_from
from core.rl import PPOAgent
from wastenet.env import WasteNetEnv

## Proximal Policy Optimization (PPO)

### Tune hyperparameters

In [None]:
tune_config = {
    "env": WasteNetEnv,
    "seed": 123,
    "num_gpus": num_gpus,
    "num_workers": 1,
    "observation_filter": "MeanStdFilter",
    "model": {
        "free_log_std": True
    },
    "num_sgd_iter": 10,
    "sgd_minibatch_size": 128,
    "lambda": sample_from(lambda spec: random.uniform(0.9, 1.0)),
    "clip_param": sample_from(lambda spec: random.uniform(0.1, 0.5)),
    "lr": sample_from(lambda spec: random.uniform(1e-3, 1e-5)),
    "train_batch_size": sample_from(
        lambda spec: random.randint(1000, 60000))
}
hyperparam_mutations={
    "lambda": lambda: random.uniform(0.7, 1.0),
    "clip_param": lambda: random.uniform(0.01, 0.5),
    "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
    "num_sgd_iter": lambda: random.randint(1, 30),
    "sgd_minibatch_size": lambda: random.randint(128, 16384),
    "train_batch_size": lambda: random.randint(2000, 160000),
}
stop_criteria = {
    
}
pbt = PbtOptimizer(hyperparam_mutations)
ppo = PPOAgent("wastenet_ppo_tune", tune_config, WasteNetEnv, {})

In [None]:
analysis = ppo.tune(tune_config, stop_criteria, scheduler=pbt)
best_config = analysis.best_config
print("Best hyperparameters found: ", best_config)

In [None]:
best_config = {
    "observation_filter": "MeanStdFilter",
    "model": {"free_log_std": True},
    "num_sgd_iter": 10,
    "sgd_minibatch_size": 128,
    "lambda": 0.731396,
    "clip_param": 0.317651,
    "lr": 5e-05,
    "train_batch_size": 18812,
}

### Training

In [6]:
ppo = PPOAgent("wastenet_ppo_train", best_config, WasteNetEnv, {})
ppo.train(num_iter=1000)

In [None]:
policy = ppo.agent.get_policy()
model = policy.model
print(model.base_model.summary())

## Evaluation

In [None]:
ppo = PPOAgent("wastenet_ppo_test", best_config, WasteNetEnv, {})
ppo.test(num_episodes=1000)

In [None]:
%load_ext tensorboard 
%tensorboard --logdir="/content/ray_results/wastenet_ppo"

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.4.1 at http://localhost:6006/ (Press CTRL+C to quit)
^C
