In [1]:
# Parameters
y_encoding = "ordinal"
x_encoding = [
    {"name": "prev_contributions", "n_levels": 21, "encoding": "numeric"},
    {"name": "prev_punishments", "n_levels": 31, "encoding": "numeric"},
]
n_contributions = 21
n_punishments = 31
n_cross_val = 2
fraction_training = 1.0
data = "../../data/experiments/pilot_random1_player_round_slim.csv"
output_path = "../../data/training/dev"
labels = {}
model_args = {"n_layers": 2, "hidden_size": 40}
optimizer_args = {"lr": 0.0001, "weight_decay": 1e-05}
train_args = {"epochs": 1000, "batch_size": 40, "clamp_grad": 1, "eval_period": 10}
device = "cpu"


In [2]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import torch as th
from aimanager.generic.data import create_syn_data, create_torch_data, get_cross_validations
from aimanager.artificial_humans.artificial_humans import ArtificialHuman
from aimanager.artificial_humans.evaluation import Evaluator

output_path = os.path.join(output_path, 'data')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = pd.read_csv(data)
# df.head()

In [4]:
df = pd.read_csv(data)


data = create_torch_data(df)
syn_data = create_syn_data(n_contribution=21, n_punishment=31)

In [5]:
th_device = th.device(device)

metrics = []
confusion_matrix = []
syn_pred = []
ev = Evaluator()

for i, (train_data, test_data) in enumerate(get_cross_validations(data, n_cross_val)):
    model = ArtificialHuman(
        y_encoding=y_encoding, n_contributions=n_contributions, n_punishments=n_punishments, x_encoding=x_encoding,
        **model_args).to(th_device)

    train_data = {
        **model.encode_x(**train_data),
        **model.encode_y(**train_data),
        **train_data
    }
    train_data = {
        k: v.to(device)
        for k, v in train_data.items()
    }

    test_data = {
        **model.encode_x(**test_data),
        **model.encode_y(**test_data),
        **test_data
    }
    test_data = {
        k: v.to(device)
        for k, v in test_data.items()
    }

    syn_data_ = {
        **model.encode_x(**syn_data),
    }
    syn_data_ = {
        k: v.to(device)
        for k, v in syn_data_.items()
    }
    ev.set_data(test=test_data, train=train_data, syn=syn_data_)

    loss_fn = model.get_lossfn()

    optimizer = th.optim.Adam(model.parameters(), **optimizer_args)
    sum_loss = 0
    n_steps = 0
    batch_size = train_args['batch_size']

    for e in range(train_args['epochs']):
        ev.set_labels(cv_split=i, epoch=e)
        model.train()
        perm = th.randperm(train_data['ah_y_enc'].size(0))
        for start_idx in range(0, train_data['ah_y_enc'].shape[0], batch_size):
            end_idx = start_idx+batch_size
            idx = perm[start_idx:end_idx]
            batch_data = {
                 k: v[idx]
                for k, v in train_data.items()
            }

            optimizer.zero_grad()

            py = model(**batch_data).flatten(end_dim=-2)
            y_true = batch_data['ah_y_enc'].flatten(end_dim=-2)
            mask = batch_data['valid'].flatten()

            loss = loss_fn(py, y_true)

            loss = (loss * mask).sum() / mask.sum()

            loss.backward()

            if train_args['clamp_grad']:
                for param in model.parameters():
                    param.grad.data.clamp_(-train_args['clamp_grad'], train_args['clamp_grad'])
            optimizer.step()
            sum_loss += loss.item()
            n_steps +=1
        
        if e % train_args['eval_period'] == 0:
            avg_loss = sum_loss/n_steps
            print(f'CV {i} | Epoch {e} | Loss {avg_loss}')
            ev.add_loss(avg_loss)
            ev.eval_set(model, 'train')
            ev.eval_set(model, 'test')
            sum_loss = 0
            n_steps = 0

    ev.eval_sync(model)

ev.save(output_path, labels)
model_path = os.path.join(output_path, 'model.pt')
model.save(model_path)


NotImplementedError: Currently not supported.