In [30]:
# Parameters
artificial_humans = {
    "simple": "../../data/artificial_humans/ah_1_1_simple/data/model.pt",
    "complex": "../../data/artificial_humans/ah_1_1/data/model.pt",
}
artificial_humans_model = "graph"
output_path = "../../data/manager/simulate_rule/v2/dev"
n_episode_steps = 16
manager_args = {"s": 0, "b": 0, "c": 0}
n_episodes = 1
agents = None
round_numbers = None


In [31]:
%load_ext autoreload
%autoreload 2

import os
import torch as th
import numpy as np
import pandas as pd
from itertools import count
from aimanager.utils.utils import make_dir

from aimanager.artificial_humans import AH_MODELS

output_path = os.path.join(output_path, 'data')

make_dir(output_path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
class RuleManager:
    def __init__(self, s, b, c, agents=None, round_numbers=None):
        self.s = s
        self.b = b
        self.c = c
        self.agents = agents
        self.round_numbers = round_numbers

    def get_punishment(self, contributions, round_number,  **_):
        punishments = th.zeros_like(contributions)
        if (self.round_numbers is None) or (round_number[0] in self.round_numbers):
            punishments = (20-contributions) * self.s + (20 != contributions).to(th.float) * self.c - self.b
            punishments = punishments.round().to(th.int64)
            punishments = th.minimum(th.maximum(punishments, th.zeros_like(punishments)), th.full_like(punishments, 30))
        if self.agents is not None:
            punishments_ = th.zeros_like(contributions)
            punishments_[self.agents] = punishments[self.agents]
            punishments = punishments_

        return punishments

In [33]:
all_memory = {}

In [40]:
from aimanager.manager.environment_v3 import ArtificialHumanEnv as ArtificialHumanEnv_v3
from aimanager.manager.memory_v3 import Memory as Memory_v3
from aimanager.manager.memory import Memory



device = th.device('cpu')

rec_keys = ['punishments', 'contributions', 'common_good', 'contributor_payoff', 'manager_payoff']
metric_list = []



for ah_name, ah in artificial_humans.items():
    ah = AH_MODELS[artificial_humans_model].load(ah).to(device)
    env = ArtificialHumanEnv_v3(
        artifical_humans=ah, n_agents=4, n_contributions=21, n_punishments=31, batch_size=n_episodes, n_rounds=16, device=device)
    replay_mem = Memory(n_episodes=1, n_episode_steps=n_episode_steps, output_file=None, device=device)


    manager = RuleManager(agents=agents, round_numbers=round_numbers, **manager_args)
    state = env.reset()
    for round_number in count():
        action = manager.get_punishment(**state)
        state = env.punish(action)
        
        replay_mem.add(episode_step=round_number, action=action, **state)

        metrics = {
            k: state[k].to(th.float).mean().item() for k in rec_keys}
        
        metrics = {**metrics,'artificial_humans': ah_name}

        # pass actions to environment and advance by one step
        state, reward, done = env.step()

        metrics['next_reward'] = reward.mean().item()
        metrics['round_number'] = round_number
        metric_list.append(metrics)
        # break
        if done:
            break
    all_memory[f'new_{ah_name}'] = replay_mem.memory

In [41]:
from aimanager.manager.environment import ArtificialHumanEnv
from aimanager.manager.memory import Memory

device = th.device('cpu')
rec_device = th.device('cpu')

for ah_name, ah in artificial_humans.items():
    ah = AH_MODELS[artificial_humans_model].load(ah).to(device)
    env = ArtificialHumanEnv(
        artifical_humans=ah, n_agents=4, n_contributions=21, n_punishments=31, episode_steps=n_episode_steps, device=device)
    recorder = Memory(n_episodes=n_episodes, n_episode_steps=n_episode_steps, output_file=None, device=device)
    for episode in range(n_episodes):
        manager = RuleManager(agents=agents, round_numbers=round_numbers, **manager_args)
        state = env.init_episode()
        for step in count():
            action = manager.get_punishment(**state)
            state = env.punish(action)
            recorder.add(**state, episode_step=step)
            state, reward, done = env.step()
            if done:
                break
        recorder.next_episode(episode)
    
    all_memory[f'old_{ah_name}'] = recorder.memory

In [42]:
all_memory['old_complex']['contributions'].shape

torch.Size([100, 16, 4])

In [43]:
all_memory['new_complex']['contributions'].shape

torch.Size([1, 16, 400, 1])

In [None]:
state['prev_punishments'].shape

torch.Size([4000, 1])

In [None]:
# id_vars = ['round_number', 's', 'c', 'b', 'artificial_humans']

# df = pd.DataFrame.from_records(metric_list)

# value_vars = list(set(df.columns) - set(id_vars))
# df = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='metric')

# df.to_parquet(os.path.join(output_path, f'metrics.parquet'))