# Testing compatability of `ddls` with `rllib`

In [1]:
%load_ext autoreload
from ddls.environments.job_placing.job_placing_all_nodes_environment import JobPlacingAllNodesEnvironment
from ddls.devices.processors.gpus.A100 import A100
from ddls.distributions.uniform import Uniform
from ddls.dgl_tests.rllib_model_test import GNNPolicy
from ddls.plotting.plotting import plot_line

import ray
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print
ray.shutdown()
ray.init()

from ray.rllib.models import ModelCatalog
from ray.rllib.agents import ppo


from omegaconf import OmegaConf
from IPython.display import display
from collections import defaultdict
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


### Register the custom environment with `ray`

In [2]:
register_env('job_placing_all_nodes_environment', lambda env_config: JobPlacingAllNodesEnvironment(**env_config))

### Register the custom model with `rllib`

In [3]:
ModelCatalog.register_custom_model('my_model', GNNPolicy)

### Load `rllib` config

In [4]:
# # load config
# config = OmegaConf.load('configs/ddls_job_placing_rllib.yaml')
# print(OmegaConf.to_yaml(config))

# # convert config to dict so that is comparible with rllib
# config = OmegaConf.to_container(config, resolve=False)
# print(config)

In [7]:
%autoreload
node_config = {'type_1':
                  {
                      'num_nodes': 16,
                      'workers_config': 
                          [
                              {
                               'num_workers': 4,
#                                'worker': A100
                               'worker': 'ddls.devices.processors.gpus.A100.A100'
                              }
                          ]
                  }
              }

topology_config = {'type':
                      'torus',
                   'kwargs':
                      {
                          'x_dims': 4,
                          'y_dims': 4
                      }
                  }

jobs_config = {'path_to_files': '/scratch/datasets/ddls/jobs/tensorflow_synthetic_graphs/valid',
               'job_interarrival_time_dist': Uniform(min_val=1, max_val=1000),
               'max_files': 1,
               'job_sampling_mode': 'remove'}


env_config = {'node_config': node_config,
              'topology_config': topology_config,
              'jobs_config': jobs_config,
              'reward_function': 'mean_job_completion_time'}


model_config = {
        'in_features_node':5,
        'in_features_edge':1,
        'out_features_msg':8,
        'out_features_hidden':16,
        'out_features':4,
        'in_features_graph':130,
        'out_features_graph':4,
        'num_layers':1,
        'aggregator_type':'mean'
    }



rllib_config = {
    
    'seed': 0,
    
    'env': 'job_placing_all_nodes_environment',
    
    'env_config': env_config,
    
    'batch_mode': 'complete_episodes',
    'train_batch_size': 1, # 128
    'sgd_minibatch_size': 1, # 128
    
    'model':{
            'fcnet_hiddens':[8],
            'fcnet_activation':'relu',
            'custom_model':'my_model',
            'custom_model_config': model_config
        },
    
    'framework': 'torch'
    
    }

# print(OmegaConf.to_yaml(rllib_config))
print(rllib_config)

{'seed': 0, 'env': 'job_placing_all_nodes_environment', 'env_config': {'node_config': {'type_1': {'num_nodes': 16, 'workers_config': [{'num_workers': 4, 'worker': 'ddls.devices.processors.gpus.A100.A100'}]}}, 'topology_config': {'type': 'torus', 'kwargs': {'x_dims': 4, 'y_dims': 4}}, 'jobs_config': {'path_to_files': '/scratch/datasets/ddls/jobs/tensorflow_synthetic_graphs/valid', 'job_interarrival_time_dist': <ddls.distributions.uniform.Uniform object at 0x7fe3d06440d0>, 'max_files': 1, 'job_sampling_mode': 'remove'}, 'reward_function': 'mean_job_completion_time'}, 'batch_mode': 'complete_episodes', 'train_batch_size': 1, 'sgd_minibatch_size': 1, 'model': {'fcnet_hiddens': [8], 'fcnet_activation': 'relu', 'custom_model': 'my_model', 'custom_model_config': {'in_features_node': 5, 'in_features_edge': 1, 'out_features_msg': 8, 'out_features_hidden': 16, 'out_features': 4, 'in_features_graph': 130, 'out_features_graph': 4, 'num_layers': 1, 'aggregator_type': 'mean'}}, 'framework': 'torch'}

### Initialise an `rllib` epoch loop

In [8]:
%autoreload

# load default PPO config and update with custom config params
ppo_config = ppo.DEFAULT_CONFIG.copy()
ppo_config.update(rllib_config)
print(f'Config:\n{ppo_config}')

# initialise rllib trainer
epoch_loop = ppo.PPOTrainer(config=ppo_config)
print('\nInitialised trainer.')



Config:
{'num_workers': 2, 'num_envs_per_worker': 1, 'create_env_on_driver': False, 'rollout_fragment_length': 200, 'batch_mode': 'complete_episodes', 'gamma': 0.99, 'lr': 5e-05, 'train_batch_size': 1, 'model': {'fcnet_hiddens': [8], 'fcnet_activation': 'relu', 'custom_model': 'my_model', 'custom_model_config': {'in_features_node': 5, 'in_features_edge': 1, 'out_features_msg': 8, 'out_features_hidden': 16, 'out_features': 4, 'in_features_graph': 130, 'out_features_graph': 4, 'num_layers': 1, 'aggregator_type': 'mean'}}, 'optimizer': {}, 'horizon': None, 'soft_horizon': False, 'no_done_at_end': False, 'env': 'job_placing_all_nodes_environment', 'observation_space': None, 'action_space': None, 'env_config': {'node_config': {'type_1': {'num_nodes': 16, 'workers_config': [{'num_workers': 4, 'worker': 'ddls.devices.processors.gpus.A100.A100'}]}}, 'topology_config': {'type': 'torus', 'kwargs': {'x_dims': 4, 'y_dims': 4}}, 'jobs_config': {'path_to_files': '/scratch/datasets/ddls/jobs/tensorfl

[2m[36m(RolloutWorker pid=2044987)[0m 2022-04-14 16:22:13,907	ERROR worker.py:430 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=2044987, ip=128.40.41.23, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f98b4c902e0>)
[2m[36m(RolloutWorker pid=2044987)[0m   File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 586, in __init__
[2m[36m(RolloutWorker pid=2044987)[0m     self._build_policy_map(
[2m[36m(RolloutWorker pid=2044987)[0m   File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1577, in _build_policy_map
[2m[36m(RolloutWorker pid=2044987)[0m     self.policy_map.create_policy(name, orig_cls, obs_space, act_space,
[2m[36m(RolloutWorker pid=2044987)[0m   File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/poli

RayActorError: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=2044934, ip=128.40.41.23, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f170fb422e0>)
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 586, in __init__
    self._build_policy_map(
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1577, in _build_policy_map
    self.policy_map.create_policy(name, orig_cls, obs_space, act_space,
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/policy/policy_map.py", line 143, in create_policy
    self[policy_id] = class_(observation_space, action_space,
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/agents/ppo/ppo_torch_policy.py", line 50, in __init__
    self._initialize_loss_from_dummy_batch()
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/policy/policy.py", line 832, in _initialize_loss_from_dummy_batch
    self.compute_actions_from_input_dict(
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 294, in compute_actions_from_input_dict
    return self._compute_action_helper(input_dict, state_batches,
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
    return func(self, *a, **k)
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 934, in _compute_action_helper
    dist_inputs, state_out = self.model(input_dict, state_batches,
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/models/modelv2.py", line 243, in __call__
    res = self.forward(restored, state or [], seq_lens)
  File "/home/zciccwf/phd_project/projects/ddls/ddls/dgl_tests/rllib_model_test.py", line 150, in forward
    graph.edata['z'] = torch.Tensor(edge_features)
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/dgl/view.py", line 198, in __setitem__
    self._graph._set_e_repr(self._etid, self._edges, {key : val})
  File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/dgl/heterograph.py", line 4220, in _set_e_repr
    raise DGLError('Expect number of features to match number of edges.'
dgl._ffi.base.DGLError: Expect number of features to match number of edges. Got 13120 and 145920 instead.

[2m[36m(RolloutWorker pid=2044934)[0m 2022-04-14 16:22:13,988	ERROR worker.py:430 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=2044934, ip=128.40.41.23, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f170fb422e0>)
[2m[36m(RolloutWorker pid=2044934)[0m   File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 586, in __init__
[2m[36m(RolloutWorker pid=2044934)[0m     self._build_policy_map(
[2m[36m(RolloutWorker pid=2044934)[0m   File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1577, in _build_policy_map
[2m[36m(RolloutWorker pid=2044934)[0m     self.policy_map.create_policy(name, orig_cls, obs_space, act_space,
[2m[36m(RolloutWorker pid=2044934)[0m   File "/scratch/zciccwf/py36/envs/ddls/lib/python3.9/site-packages/ray/rllib/poli

### Run `rllib` on the `ddls` environment

In [None]:
%autoreload

agent_name = 'PPO'
num_epochs = 50
rl_training_stats = defaultdict(lambda: [])
for epoch in range(num_epochs):
    print(f'\n------- Epoch {epoch+1} of {num_epochs} -------')
    result = epoch_loop.train()
    
    # print epoch data
    print(pretty_print(result))
    
    # save epoch data
    for key, val in result['hist_stats'].items():
        rl_training_stats[key].extend(val)
    for _ in range(len(val)):
        rl_training_stats['seed'].append(result['config']['seed'])
        rl_training_stats['agent'].append(agent_name)
        rl_training_stats['epoch'].append(epoch)
        
# display(pd.DataFrame(rl_training_stats))

In [None]:
x = 'epoch'
scaling_factor = 1
metrics_to_plot = {'episode_reward', 'episode_lengths'}

for metric in metrics_to_plot:
    print(f'Plotting metric {metric}')
    fig = plt.figure()
    fig = plot_line(pd.DataFrame(rl_training_stats), 
                    x=x, 
                    y=metric, 
                    hue='agent', 
                    xlabel=x, 
                    ylabel=metric, 
                    err_style='band', # 'band' 'bars'
                    ci=68, # 95 68
                    scaling_factor=scaling_factor,
                    show_fig=False)
#     plt.axhline(y=np.mean(random_baseline_stats[metric]), linestyle='--', color='#a84a32', label='Random')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.3), ncol=2)
    plt.show()