In [16]:
# Parameters
artificial_humans = "../../data/training/ah_10/data/model.pt"
artificial_humans_model = "graph"
data_dir = "../../train_data/manager/v2/dev/"
manager_args = {
    "opt_args": {"lr": 0.003},
    "gamma": 1.0,
    "eps": 0.2,
    "target_update_freq": 20,
    "model_args": {
        "hidden_size": 5,
        "add_rnn": True,
        "add_edge_model": True,
        "add_global_model": False,
        "x_encoding": [
            {"name": "contributions", "n_levels": 21, "encoding": "numeric"},
            {"name": "prev_punishments", "n_levels": 31, "encoding": "numeric"},
        ],
        "u_encoding": [
            {"name": "round_number", "n_levels": 16, "encoding": "numeric"},
            {"name": "prev_common_good", "norm": 128, "etype": "float"},
        ],
    },
}
replay_memory_args = {"n_batches": 10}
n_update_steps = 30
eval_period = 5
env_args = {
    "n_agents": 4,
    "n_contributions": 21,
    "n_punishments": 31,
    "n_rounds": 16,
    "batch_size": 5,
}
device = "cpu"
job_id = "dev"
labels = {}


In [17]:
%load_ext autoreload
%autoreload 2

import torch as th
import pandas as pd
import os
from itertools import count

from aimanager.manager.memory import Memory
from aimanager.manager.environment_v3 import ArtificialHumanEnv
from aimanager.artificial_humans import AH_MODELS
from aimanager.manager.manager import ArtificalManager
from aimanager.utils.utils import make_dir
from aimanager.utils.array_to_df import add_labels

metrics_dir = os.path.join(data_dir, 'metrics')
model_dir = os.path.join(data_dir, 'model')
make_dir(metrics_dir)
make_dir(model_dir)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
device = th.device(device)
cpu = th.device('cpu')

artifical_humans = AH_MODELS[artificial_humans_model].load(artificial_humans).to(device)

env = ArtificialHumanEnv(
    artifical_humans=artifical_humans, device=device, **env_args)

manager = ArtificalManager(
    n_contributions=env.n_contributions, n_punishments=env.n_punishments, 
    default_values=artifical_humans.default_values, device=device, **manager_args)

replay_mem = Memory(
    n_rounds=env.n_rounds, device=cpu, batch_size=env.batch_size, group_size=env.n_agents,
    **replay_memory_args)


state = env.reset()
encoded = manager.encode_pure(state)

batch_structure = env.get_batch_structure()

obs = {**encoded, **batch_structure}

# Get q values from controller
q_values = manager.get_q(obs, first=True)
action = q_values.argmax(-1)

state = env.punish(action)

# pass actions to environment and advance by one step
state, reward, done = env.step()
for update_step in range(n_update_steps):
    print(update_step, replay_mem.start_row, replay_mem.end_row)
    replay_mem.start_batch(env.groups)
    for round_number in count():
        if replay_mem is not None:
            us_ten = th.full_like(action, fill_value=update_step)
            replay_mem.add(
                round_number=round_number, action=action, reward=reward, update_step=us_ten,
                **encoded)

        if round_number == 14:
            break

    sample, groups = replay_mem.sample(device=device)
    print(groups)
    if sample is not None:
        print(sample['update_step'][:,0])
    replay_mem.finish_batch()

0 None None
None
1 0 20
[[4, 5, 6, 7], [0, 1, 2, 3], [12, 13, 14, 15], [8, 9, 10, 11], [16, 17, 18, 19]]
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
2 20 40
[[28, 29, 30, 31], [24, 25, 26, 27], [32, 33, 34, 35], [8, 9, 10, 11], [20, 21, 22, 23]]
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])
3 40 60
[[12, 13, 14, 15], [0, 1, 2, 3], [32, 33, 34, 35], [56, 57, 58, 59], [24, 25, 26, 27]]
tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1])
4 60 80
[[0, 1, 2, 3], [56, 57, 58, 59], [16, 17, 18, 19], [28, 29, 30, 31], [68, 69, 70, 71]]
tensor([0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 3])
5 80 100
[[92, 93, 94, 95], [88, 89, 90, 91], [56, 57, 58, 59], [96, 97, 98, 99], [24, 25, 26, 27]]
tensor([4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 1, 1, 1, 1])
6 100 120
[[12, 13, 14, 15], [116, 117, 118, 119], [100, 101, 102, 103], [76, 77, 78, 79], [36, 37, 38, 39]]
tensor([0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3

In [None]:
sample.keys()

dict_keys(['action', 'reward', 'update_step', 'mask', 'x', 'y_enc', 'y', 'u'])

In [None]:
sample['update_step'][:20,0]

tensor([24, 24, 24, 24, 28, 28, 28, 28, 23, 23, 23, 23, 28, 28, 28, 28, 27, 27,
        27, 27])

In [None]:
q_values.shape

torch.Size([4000, 1, 31])