In [4]:
# Parameters
artificial_humans = "../../data/artificial_humans/01_rnn_edge_features/model/rnn_True__edge_True__global_features_False.pt"
artificial_humans_model = "graph"
managers = {
    "humanlike": {
        "type": "human",
        "path": "aimanager/model/human_like_manager/03_features_features_cont+ppun+valid.pt",
    },
    "optimal": {
        "type": "rl",
        "path": "aimanager/model/manager/17_model__rnn_True__edge_False__global_features_False__run_0.pt",
    },
}
output_path = "../../data/manager/simulate_rule/v1_comp/"
n_episode_steps = 16
n_episodes = 5

In [5]:
import requests
import datetime
import os
import random
import numpy as np


BACKEND_URL = os.environ.get('MANAGER_BACKEND_URL', 'http://localhost:8000')


def get_punishments(contributions, missing_inputs, groups, *, session, group_idx, round):
    """
    Requests the punishments form the manger backend.

    Args:
        contributions [int]: contributions in fixed order (order has to remain throughout the game)
        missing_inputs [boolean]: missing_inputs in fixed order (same order as contributions)
        groups [str]: group identifier in fixed order (same order as contributions)
        session str: session indentifier
        subsession str: subsession identifier
        round int: round number

    Returns:
        [int]: punishments for each player in the same order then contributions
    """
    data = {
        'contributions': contributions,
        'missing_inputs': missing_inputs,
        'groups': groups,
        'round': round,
        'group_idx': group_idx,
        'session': session,
        'datetime': str(datetime.datetime.now())
    }
    url = BACKEND_URL + '/get_punishments'
    try:
        response = requests.post(url, json={'data': data}).json()
        return response['data']['punishments']
    except Exception as e:
        print(f'Failed to reach manager backend.')
        return [None]*len(contributions)


In [6]:
%load_ext autoreload
%autoreload 2

import os
import torch as th
import numpy as np
import seaborn as sns
import pandas as pd
from itertools import count
from aimanager.utils.utils import make_dir

from aimanager.manager.memory import Memory
from aimanager.manager.environment_v3 import ArtificialHumanEnv
from aimanager.artificial_humans import GraphNetwork
from aimanager.utils.array_to_df import using_multiindex, add_labels

output_path = os.path.join(output_path, 'data')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


NameError: name '_C' is not defined

In [None]:
device = th.device('cpu')
rec_device = th.device('cpu')
artifical_humans = GraphNetwork.load(artificial_humans, device=device)

env = ArtificialHumanEnv(
    artifical_humans=artifical_humans, n_agents=4, n_contributions=21, n_punishments=31, 
    n_rounds=n_episode_steps, batch_size=1, device=device)
mg = GraphNetwork.load(managers['humanlike']['path'], device=device)

groups = ['human'] * 4
session = 'asdasd'
missing_inputs = [False] * 4

recorder = Memory(n_episodes=n_episodes, n_episode_steps=n_episode_steps, output_file=None, device=device)

for e in range(n_episodes):
    state = env.reset()
    for round_number in count():

        contributions = state['contributions'].squeeze().tolist()
        b_action = get_punishments(
            contributions, missing_inputs, groups, session=session, group_idx=e, round=round_number)

        _state = {**state, **env.get_batch_structure()}
        encoded = mg.encode_pure(_state)

        action, proba = mg.predict_pure(encoded, reset_rnn=round_number == 0, sample=True)

        state = env.punish(b_action)

        recorder.add(**{k: v.squeeze(-1) for k, v in state.items()}, control_punishment=action, episode_step=round_number)
        state, reward, done = env.step()
        if done:
            break
    recorder.next_episode(e)

Failed to reach manager backend.


AttributeError: 'list' object has no attribute 'max'

In [None]:
columns = ['episode', 'round_number','participant_code']

punishments = using_multiindex(recorder.memory['punishments'][0].numpy(), columns=columns, value_name='punishment')
control_punishment = using_multiindex(recorder.memory['control_punishment'][0].numpy(), columns=columns, value_name='control_punishment')
common_good = using_multiindex(recorder.memory['common_good'][0].numpy(), columns=columns, value_name='common_good')
contributions = using_multiindex(recorder.memory['contributions'][0].numpy(), columns=columns, value_name='contribution')

df_sim = punishments.merge(common_good).merge(contributions).merge(control_punishment)
df_sim['punishment_diff'] = df_sim['punishments'] - df_sim['control_punishment']

columns = ['round_number', 'common_good', 'contribution', 'participant_code', 'punishment', 'data_source']

df = pd.concat([df_sim]).reset_index(drop=True)

dfm = df.melt(
    id_vars=['round_number', 'participant_code', 'data_source'], 
    value_vars=['punishment', 'contribution', 'punishment_diff', 'control_punishment'])

In [None]:
g = sns.relplot(
    data=dfm,
    x="round_number", 
    y="value",
    col='variable',
    kind="line",
    height=3, aspect=1,
)