In [1]:
# Parameters
artificial_humans = (
    "artifacts/artificial_humans/04_3_2_model/model/architecture_node+edge+rnn.pt"
)
artificial_humans_valid = "artifacts/artificial_humans/02_4_valid/model/rnn_True.pt"
artificial_humans_model = "graph"
manager_args = {
    "opt_args": {"lr": 0.0003},
    "gamma": 1.0,
    "eps": 0.1,
    "target_update_freq": 100,
    "model_args": {
        "hidden_size": 20,
        "add_rnn": True,
        "add_edge_model": False,
        "add_global_model": False,
        "x_encoding": [
            {"name": "contribution", "n_levels": 21, "encoding": "numeric"},
            {"name": "prev_punishment", "n_levels": 31, "encoding": "numeric"},
        ],
        "b_encoding": [{"name": "round_number", "n_levels": 16, "encoding": "onehot"}],
    },
}
replay_memory_args = {"n_episodes": 10}
n_update_steps = 20
training_batch_size = 3
eval_period = 5
env_args = {
    "n_agents": 4,
    "n_contributions": 21,
    "n_punishments": 31,
    "n_rounds": 16,
    "batch_size": 1000,
}
device = "cpu"
seed = 42
output_dir = "../../notebooks/manager/rl_manager/01_rnn_node"
basedir = "../.."

In [1]:
%load_ext autoreload
%autoreload 2

import torch as th
import pandas as pd
import numpy as np
import random
import os
from itertools import count

from aimanager.manager.memory import Memory
from aimanager.manager.environment import ArtificialHumanEnv
from aimanager.artificial_humans import AH_MODELS
from aimanager.manager.manager import ArtificalManager
from aimanager.utils.utils import make_dir
from aimanager.utils.array_to_df import add_labels


if "data_dir" in locals():
    output_dir = data_dir

metrics_dir = os.path.join(output_dir, 'metrics')
model_dir = os.path.join(output_dir, 'model')
make_dir(metrics_dir)
make_dir(model_dir)


th.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# check if job_id is already set
if "job_id" not in locals():
    job_id = 'none'
if "labels" not in locals():
    labels = {}

NameError: name 'output_dir' is not defined

In [None]:
rec_keys = [
    "punishment",
    "contribution",
    "common_good",
    "contributor_payoff",
    "manager_payoff",
]


replay_keys = [n["name"] for n in manager_args["model_args"]["x_encoding"]]
replay_keys += [n["name"] for n in manager_args["model_args"]["b_encoding"]]
replay_keys += ["punishment"]
replay_keys = list(set(replay_keys))


def run_batch(manager, env, replay_mem=None, on_policy=True, update_step=None):

    state = env.reset()
    metric_list = []
    for round_number in count():
        # encoded = manager.encode(state)
        statecopy = {k: v.clone() for k, v in state.items() if k in replay_keys}

        # Get q values from controller
        q_values = manager.get_q(
            state, first=round_number == 0, edge_index=env.batch_edge_index
        )
        if on_policy:
            action = q_values.argmax(-1)
        else:
            # Sample a action
            action = manager.eps_greedy(q_values=q_values)

        state = env.punish(action)

        metrics = {k: state[k].to(th.float).mean().item() for k in rec_keys}

        # pass actions to environment and advance by one step
        state, reward, done = env.step()
        if replay_mem is not None:
            replay_mem.add(
                episode_step=round_number, action=action, reward=reward, **statecopy
            )

        metrics["next_reward"] = reward.mean().item()
        metrics["q_min"] = q_values.min().item()
        metrics["q_max"] = q_values.max().item()
        metrics["q_mean"] = q_values.mean().item()
        metrics["round_number"] = round_number
        metrics["sampling"] = "greedy" if on_policy else "eps-greedy"
        metrics["update_step"] = update_step
        metric_list.append(metrics)

        if done:
            break
    return metric_list

In [4]:
device = th.device(device)
cpu = th.device("cpu")

artificial_humans_ = os.path.join(basedir, artificial_humans)
artificial_humans_valid_ = os.path.join(basedir, artificial_humans_valid)

ah = (
    AH_MODELS[artificial_humans_model]
    .load(artificial_humans_, device=device)
    .to(device)
)
ahv = (
    AH_MODELS[artificial_humans_model]
    .load(artificial_humans_valid_, device=device)
    .to(device)
)

env = ArtificialHumanEnv(
    artifical_humans=ah, artifical_humans_valid=ahv, device=device, **env_args
)

manager = ArtificalManager(
    n_contributions=env.n_contributions,
    n_punishments=env.n_punishments,
    default_values=ah.default_values,
    device=device,
    **manager_args,
)

replay_mem = Memory(n_episode_steps=env.n_rounds, device=cpu, **replay_memory_args)

metrics_list = []

for update_step in range(n_update_steps):
    # replay_mem.start_batch(env.groups)

    # here we sample one batch of episodes and add them to the replay buffer
    off_policy_metrics = run_batch(
        manager, env, replay_mem, on_policy=False, update_step=update_step
    )

    replay_mem.next_episode(update_step)

    # allow manager to update itself
    sample = replay_mem.get_random(device=device, n_episodes=training_batch_size)

    if sample is not None:
        loss = manager.update(
            update_step, **sample, batch=env.batch, edge_index=env.batch_edge_index
        )

    if (update_step % eval_period) == 0:
        metrics_list.extend(
            [{**m, "loss": l.item()} for m, l in zip(off_policy_metrics, loss)]
        )
        metrics_list.extend(
            run_batch(
                manager, env, replay_mem=None, on_policy=True, update_step=update_step
            )
        )

model_file = os.path.join(model_dir, f"{job_id}_manager.pt")

manager.save(model_file)

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


TypeError: load() missing 1 required positional argument: 'device'

In [5]:
# test model saving and loading
manager.load(model_file, device=device)

<aimanager.manager.manager.ArtificalManager at 0x7fe0993cec70>

In [None]:
id_vars = ["round_number", "sampling", "update_step"]
value_vars = [
    "punishment",
    "contribution",
    "common_good",
    "contributor_payoff",
    "manager_payoff",
    "next_reward",
    "q_min",
    "q_max",
    "q_mean",
    "loss",
]

df = pd.DataFrame.from_records(metrics_list)

df = df.melt(id_vars=id_vars, value_vars=value_vars, var_name="metric")

df = add_labels(df, {**labels, "job_id": job_id})

df.to_parquet(os.path.join(metrics_dir, f"{job_id}.parquet"))