# CPR appropriation

## Pre-requisites

In [31]:
%%capture
!pip install src/gym_cpr_grid
!pip install -r requirements.txt

In [81]:
import numpy as np
import gym
import matplotlib.pyplot as plt
from IPython import display
from ray import tune
from ray.tune import JupyterNotebookReporter
from ray.rllib.models import ModelCatalog
from ray.rllib.agents.dqn import DQNTrainer
from ray.tune.logger import DEFAULT_LOGGERS
from ray.tune.integration.wandb import WandbLogger

from src import models, metrics

%load_ext tensorboard
%load_ext autoreload
%autoreload 2

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Random

In [None]:
env = gym.make('gym_cpr_grid:CPRGridEnv-v0', n_agents=3, grid_width=5, grid_height=7)
observations = env.reset()
fig, ax, img = env.plot(env.render('rgb_array'))
for _ in range(env.max_steps):
    display.display(plt.gcf())
    action_dict = {h: env.action_space.sample() for h in range(env.n_agents)}
    observations, rewards, dones, infos = env.step(action_dict)
    display.clear_output(wait=True)
    img.set_data(env.render(mode='rgb_array'))
env.close()

## DQN

In [61]:
%tensorboard --logdir "~/ray_results"

In [62]:
ModelCatalog.register_custom_model("fcn", models.FCNetwork)

In [86]:
reporter = JupyterNotebookReporter(
    overwrite=True, 
    metric_columns={
        "custom_metrics/efficiency_mean": "U",
        "custom_metrics/equality_mean": "E",
        "custom_metrics/sustainability_mean": "S",
        "custom_metrics/peace_mean": "P"
    }
)

In [90]:
experiment_analysis = tune.run(
    DQNTrainer, 
    config={
        "env": "gym_cpr_grid:CPRGridEnv-v0", 
        "env_config": {
            "n_agents": 3,
            "grid_width": 5,
            "grid_height": 7
        }, 
        "num_workers": 1, 
        "framework": "torch",
        "model": {
            "custom_model": "fcn",
            "fcnet_hiddens": [32, 32],
            "fcnet_activation": "relu",
        },
        "exploration_config": {
            "type": "EpsilonGreedy",
            "initial_epsilon": 1.0,
            "final_epsilon": 0.1,
            "epsilon_timesteps": env.max_steps,
        },
        "callbacks": metrics.SocialOutcomeMetrics,
        "logger_config": {
            "wandb": {
                "project": "cpr-appropriation",
                "api_key_file": "./wandb_api_key_file",
                "log_config": True,
                'sync_tensorboard': True,
            }
        }
    },
    progress_reporter=reporter,
    loggers=DEFAULT_LOGGERS + (WandbLogger, )
)

Trial name,status,loc,U,E,S,P
DQN_gym_cpr_grid:CPRGridEnv-v0_e4f3f_00000,RUNNING,192.168.1.110:62411,0.528453,0.958059,254.142,0


[2m[36m(pid=62411)[0m 2021-08-17 11:47:02,047	ERROR worker.py:421 -- SystemExit was raised from the worker
[2m[36m(pid=62411)[0m Traceback (most recent call last):
[2m[36m(pid=62411)[0m   File "python/ray/_raylet.pyx", line 632, in ray._raylet.task_execution_handler
[2m[36m(pid=62411)[0m   File "python/ray/_raylet.pyx", line 486, in ray._raylet.execute_task
[2m[36m(pid=62411)[0m   File "python/ray/_raylet.pyx", line 523, in ray._raylet.execute_task
[2m[36m(pid=62411)[0m   File "python/ray/_raylet.pyx", line 530, in ray._raylet.execute_task
[2m[36m(pid=62411)[0m   File "python/ray/_raylet.pyx", line 534, in ray._raylet.execute_task
[2m[36m(pid=62411)[0m   File "python/ray/_raylet.pyx", line 484, in ray._raylet.execute_task.function_executor
[2m[36m(pid=62411)[0m   File "/Users/jobs/Github/cpr-appropriation/venv/lib/python3.9/site-packages/ray/_private/function_manager.py", line 563, in actor_method_executor
[2m[36m(pid=62411)[0m     return method(__ray_acto

2021-08-17 11:47:02,232	INFO tune.py:550 -- Total run time: 76.82 seconds (76.59 seconds for the tuning loop).
Exception ignored in: <function WandbLoggerCallback.__del__ at 0x1e12408b0>
Traceback (most recent call last):
  File "/Users/jobs/Github/cpr-appropriation/venv/lib/python3.9/site-packages/ray/tune/integration/wandb.py", line 378, in __del__
    for trial in self._trial_processes:
RuntimeError: dictionary changed size during iteration


In [77]:
experiment_analysis.results_df

Unnamed: 0_level_0,episode_reward_max,episode_reward_min,episode_reward_mean,episode_len_mean,episodes_this_iter,num_healthy_workers,timesteps_total,agent_timesteps_total,done,episodes_total,...,info.learner.default_policy.td_error,config.tf_session_args.gpu_options.allow_growth,config.tf_session_args.device_count.CPU,info.learner.default_policy.learner_stats.allreduce_latency,info.learner.default_policy.learner_stats.grad_gnorm,info.learner.default_policy.learner_stats.cur_lr,info.learner.default_policy.learner_stats.mean_q,info.learner.default_policy.learner_stats.min_q,info.learner.default_policy.learner_stats.max_q,info.learner.default_policy.learner_stats.mean_td_error
trial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0bdeb_00000,1321.0,408.0,864.5,1000.0,0,1,7056,7056,False,2,...,"[-0.03420925, 0.017646313, 0.23368692, 0.65992...",True,1,0.0,0.17243336,0.0005,4.851447,3.356795,6.587478,0.132203
