In [1]:
# Parameters
x_encoding = [
    {"name": "prev_contributions", "n_levels": 21, "encoding": "numeric"},
    {"name": "prev_punishments", "n_levels": 31, "encoding": "numeric"},
    {"name": "round_number", "n_levels": 16, "encoding": "numeric"},
    {"name": "prev_common_good", "norm": 128, "etype": "float"},
    {"name": "prev_valid", "etype": "bool"},
]
n_contributions = 21
n_punishments = 31
n_cross_val = 2
fraction_training = 1.0
data = "../../data/experiments/pilot_random1_player_round_slim.csv"
output_path = "../../data/training/dev"
labels = {}
model_name = 'dense'
model_args = {"n_layers": 2, "hidden_size": 40}
optimizer_args = {"lr": 0.001, "weight_decay": 1e-05}
train_args = {"epochs": 1000, "batch_size": 10, "clamp_grad": 1, "eval_period": 10}
device = "cpu"


In [6]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import torch as th
from aimanager.generic.data import create_syn_data, create_torch_data, get_cross_validations
from aimanager.artificial_humans import AH_MODELS
from aimanager.artificial_humans.evaluation import Evaluator
from aimanager.utils.array_to_df import using_multiindex
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

output_path = os.path.join(output_path, 'data')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
df = pd.read_csv(data)


data = create_torch_data(df)
syn_data = create_syn_data(n_contribution=21, n_punishment=31)

In [27]:
data_names = ['prev_punishments', 'prev_contributions']

index_names = ['episode_id', 'round_number', 'player_id']

columns = [
    using_multiindex(syn_data[name], index_names).rename(columns={'value': name}).set_index(index_names)[name]
    for name in data_names
]
syn_df = pd.concat(columns, axis=1).reset_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,prev_punishments,prev_contributions
episode_id,round_number,player_id,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,0,0,0
0,0,1,0,0
0,0,2,0,0
0,0,3,0,0
0,1,0,0,0
...,...,...,...,...
650,14,3,30,20
650,15,0,30,20
650,15,1,30,20
650,15,2,30,20


In [4]:
th_device = th.device(device)

metrics = []
confusion_matrix = []
syn_pred = []
ev = Evaluator()

th_device = th.device(device)



def create_fully_connected(n_nodes):
    return th.tensor([[i,j]
        for i in range(n_nodes)
        for j in range(n_nodes)
    ])

def encode(model, data, *, mask=True, x_encode=True, y_encode=True, u_encode=False, device, n_player=4):
    data = {
        'mask': data['valid'] if mask else None,
        'x': model.x_encoder(**data) if x_encode else None,
        'y': model.y_encoder(**data) if y_encode else None,
        'u': model.u_encoder(**data) if u_encode else None,
    }
    data = {
        k: v.to(device)
        for k, v in data.items()
        if v is not None
    }

    # for k,v in data.items():
    #     print(k,v.shape)

    edge_attr = th.zeros(n_player*n_player,0)
    edge_index = create_fully_connected(n_player)

    n_episodes = list(data.values())[0].shape[0]
    dataset = [
        Data(**{k: v[i] for k, v in data.items()}, edge_attr=edge_attr, edge_index=edge_index, idx=i, group_idx=i, num_nodes=n_player)
        for i in range(n_episodes)
    ]
    return dataset


for i, (train_data, test_data) in enumerate(get_cross_validations(data, n_cross_val)):
    model = AH_MODELS[model_name](
        n_contributions=n_contributions, n_punishments=n_punishments, x_encoding=x_encoding,
        **model_args).to(th_device)

    train_data_ = encode(model, train_data, mask=True, device=th_device)
    test_data_ = encode(model, test_data, mask=True, device=th_device)
    syn_data_ = encode(model, syn_data, mask=False, y_encode=False, device=th_device)

    ev.set_data(test=test_data_, train=train_data_, syn=syn_data_, syn_df=syn_df)

    optimizer = th.optim.Adam(model.parameters(), **optimizer_args)
    loss_fn = th.nn.CrossEntropyLoss(reduction='none')
    sum_loss = 0
    n_steps = 0

    for e in range(train_args['epochs']):
        ev.set_labels(cv_split=i, epoch=e)
        model.train()
        for i, batch_data in enumerate(iter(DataLoader(train_data_, shuffle=True, batch_size=train_args['batch_size']))):

            optimizer.zero_grad()
            py = model(batch_data).flatten(end_dim=-2)
            y_true = batch_data['y'].flatten(end_dim=-2)
            mask = batch_data['mask'].flatten()
            loss = loss_fn(py, y_true)
            loss = (loss * mask).sum() / mask.sum()

            loss.backward()

            if train_args['clamp_grad']:
                for param in model.parameters():
                    param.grad.data.clamp_(-train_args['clamp_grad'], train_args['clamp_grad'])
            optimizer.step()
            sum_loss += loss.item()
            n_steps +=1
        
        if e % train_args['eval_period'] == 0:
            avg_loss = sum_loss/n_steps
            print(f'CV {i} | Epoch {e} | Loss {avg_loss}')
            ev.add_loss(avg_loss)
            ev.eval_set(model, 'train')
            ev.eval_set(model, 'test')
            sum_loss = 0
            n_steps = 0

    # ev.eval_sync(model)

# ev.save(output_path, labels)
# model_path = os.path.join(output_path, 'model.pt')
# model.save(model_path)

TypeError: set_data() missing 1 required positional argument: 'syn_df'

In [None]:

from torch_geometric.data import Batch

In [None]:
test = Batch.from_data_list(train_data_)

In [None]:
test

DataBatch(x=[2160, 4, 5], edge_index=[16, 270], edge_attr=[2160, 0], y=[2160, 4, 21], mask=[2160, 4], idx=[135], group_idx=[135], num_nodes=540, batch=[540], ptr=[136])

In [None]:
train_data_[0]

Data(x=[16, 4, 5], edge_index=[16, 2], edge_attr=[16, 0], y=[16, 4, 21], mask=[16, 4], idx=0, group_idx=0, num_nodes=4)