In [21]:
# Parameters
y_encoding = "numeric"
x_encoding = [
    {"name": "prev_contributions", "n_levels": 21, "encoding": "numeric"},
    {"name": "prev_punishments", "n_levels": 31, "encoding": "numeric"},
    {"name": "round_number", "n_levels": 16, "encoding": "numeric"},
    {"name": "prev_common_good", "norm": 128, "etype": "float"},
    {"name": "prev_valid", "etype": "bool"},
]
n_contributions = 21
n_punishments = 31
n_cross_val = 2
fraction_training = 1.0
data = "../../data/experiments/pilot_random1_player_round_slim.csv"
output_path = "../../data/training/dev"
labels = {}
model_args = {"n_layers": 2, "hidden_size": 40}
optimizer_args = {"lr": 0.0001, "weight_decay": 1e-05}
train_args = {"epochs": 1000, "batch_size": 40, "clamp_grad": 1, "eval_period": 10}
device = "cpu"


In [22]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import torch as th
from aimanager.generic.data import create_syn_data, create_torch_data, get_cross_validations
from aimanager.artificial_humans.artificial_humans import ArtificialHuman
from aimanager.artificial_humans.evaluation import Evaluator

output_path = os.path.join(output_path, 'data')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
df = pd.read_csv(data)
# df.head()

In [24]:
df = pd.read_csv(data)


data = create_torch_data(df)
syn_data = create_syn_data(n_contribution=21, n_punishment=31)

In [25]:
th_device = th.device(device)

metrics = []
confusion_matrix = []
syn_pred = []
ev = Evaluator()

for i, (train_data, test_data) in enumerate(get_cross_validations(data, n_cross_val)):
    model = ArtificialHuman(
        y_encoding=y_encoding, n_contributions=n_contributions, n_punishments=n_punishments, x_encoding=x_encoding,
        **model_args).to(th_device)

    train_data = {
        **model.encode_x(**train_data),
        **model.encode_y(**train_data),
        **train_data
    }
    train_data = {
        k: v.to(device)
        for k, v in train_data.items()
    }

    test_data = {
        **model.encode_x(**test_data),
        **model.encode_y(**test_data),
        **test_data
    }
    test_data = {
        k: v.to(device)
        for k, v in test_data.items()
    }

    syn_data_ = {
        **model.encode_x(**syn_data),
    }
    syn_data_ = {
        k: v.to(device)
        for k, v in syn_data_.items()
    }
    ev.set_data(test=test_data, train=train_data, syn=syn_data_)

    loss_fn = model.get_lossfn()

    optimizer = th.optim.Adam(model.parameters(), **optimizer_args)
    sum_loss = 0
    n_steps = 0
    batch_size = train_args['batch_size']

    for e in range(train_args['epochs']):
        ev.set_labels(cv_split=i, epoch=e)
        model.train()
        perm = th.randperm(train_data['ah_y_enc'].size(0))
        for start_idx in range(0, train_data['ah_y_enc'].shape[0], batch_size):
            end_idx = start_idx+batch_size
            idx = perm[start_idx:end_idx]
            batch_data = {
                 k: v[idx]
                for k, v in train_data.items()
            }

            optimizer.zero_grad()

            py = model(**batch_data).flatten(end_dim=-2)
            y_true = batch_data['ah_y_enc'].flatten(end_dim=-2)
            mask = batch_data['valid'].flatten()

            loss = loss_fn(py, y_true)

            loss = (loss * mask).sum() / mask.sum()

            loss.backward()

            if train_args['clamp_grad']:
                for param in model.parameters():
                    param.grad.data.clamp_(-train_args['clamp_grad'], train_args['clamp_grad'])
            optimizer.step()
            sum_loss += loss.item()
            n_steps +=1
        
        if e % train_args['eval_period'] == 0:
            avg_loss = sum_loss/n_steps
            print(f'CV {i} | Epoch {e} | Loss {avg_loss}')
            ev.add_loss(avg_loss)
            ev.eval_set(model, 'train')
            ev.eval_set(model, 'test')
            sum_loss = 0
            n_steps = 0

    ev.eval_sync(model)

ev.save(output_path, labels)
model_path = os.path.join(output_path, 'model.pt')
model.save(model_path)

CV 0 | Epoch 0 | Loss 0.15756937488913536
CV 0 | Epoch 10 | Loss 0.15808738060295582
CV 0 | Epoch 20 | Loss 0.15521111115813255
CV 0 | Epoch 30 | Loss 0.1524423997849226
CV 0 | Epoch 40 | Loss 0.15019174963235854
CV 0 | Epoch 50 | Loss 0.14756412245333195
CV 0 | Epoch 60 | Loss 0.14448010064661504
CV 0 | Epoch 70 | Loss 0.1410291951149702
CV 0 | Epoch 80 | Loss 0.13885884173214436
CV 0 | Epoch 90 | Loss 0.13616674523800612
CV 0 | Epoch 100 | Loss 0.133415481261909
CV 0 | Epoch 110 | Loss 0.13094267696142198
CV 0 | Epoch 120 | Loss 0.128913083486259
CV 0 | Epoch 130 | Loss 0.1256242511793971
CV 0 | Epoch 140 | Loss 0.122998290322721
CV 0 | Epoch 150 | Loss 0.11926205400377513
CV 0 | Epoch 160 | Loss 0.1181063862517476
CV 0 | Epoch 170 | Loss 0.11467561945319175
CV 0 | Epoch 180 | Loss 0.11302029937505723
CV 0 | Epoch 190 | Loss 0.11054925769567489
CV 0 | Epoch 200 | Loss 0.10910341236740351
CV 0 | Epoch 210 | Loss 0.10718018356710672
CV 0 | Epoch 220 | Loss 0.10397224649786949
CV 0 | Ep