In [1]:
# Parameters
n_cross_val = 3
fraction_training = 1.0
data_file = "../../data/experiments/pilot_random1_player_round_slim.csv"
data_dir = "../../data/artificial_humans/dev"
labels = {}
model_name = "graph"
job_id = 'dev'
model_args = {
    "y_levels": 31,
    "y_name": "punishment",
    "add_rnn": False,
    "add_edge_model": False,
    "add_global_model": False,
    "hidden_size": 10,
    "x_encoding": [
        {"name": "contribution", "n_levels": 21, "encoding": "numeric"},
        {"name": "prev_punishment", "n_levels": 31, "encoding": "numeric"},
        {"name": "contribution_valid", "etype": "bool"},
        {"name": "prev_punishment_valid", "etype": "bool"},
        {"name": "punishment_masked", "n_levels": 31, "encoding": "numeric"},
        {"name": "autoreg_mask", "etype": "bool"},
    ],
}
mask_name = "recorded"
experiment_names = ["trail_rounds_2"]
optimizer_args = {"lr": 0.001, "weight_decay": 1e-05}
train_args = {"epochs": 10, "batch_size": 20, "clamp_grad": 1, "eval_period": 10, 'l1_entropy': 0.1}
n_player = 4
shuffle_features = ["prev_punishment", "prev_contribution", "prev_common_good"]
device = "cpu"
seed = 123
autoregression = True


In [2]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
import random
import torch as th
from aimanager.generic.data import create_torch_data, get_cross_validations
from aimanager.artificial_humans import AH_MODELS
from aimanager.artificial_humans.evaluation import eval_model, Recorder
from aimanager.utils.utils import make_dir
from aimanager.generic.graph_encode import create_fully_connected
from aimanager.generic.graph import Encoder
from torch_geometric.loader import DataLoader
from itertools import permutations


model_dir = os.path.join(data_dir, 'model')
make_dir(model_dir)

th.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)


  return torch._C._cuda_getDeviceCount() > 0


In [3]:
df = pd.read_csv(data_file)

df = df[df['experiment_name'].isin(experiment_names)]

data, default_values = create_torch_data(df)

In [4]:
metrics = []
confusion_matrix = []
syn_pred = []
rec = Recorder()

th_device = th.device(device)

syn_index = ["prev_punishments", "prev_contributions"]

encodings = [
    {"name": "y_masked", "n_levels": 31, "encoding": "numeric"},
    {"name": "y_valid", "etype": "bool"},
]

x_add_encoder = Encoder(encodings=encodings)

if autoregression:
    # create a pattern for all possible permutations of the training mask
    pattern = [
        [True] * i + [False] * (n_player - i) for i in range(1, n_player + 1)
    ]
else:
    pattern = [[True] * n_player]

training_mask_pattern = list(set([pp for p in pattern for pp in permutations(p)]))
training_mask_pattern = th.tensor(training_mask_pattern, dtype=th.bool)

test_mask_pattern = th.tensor(pattern, dtype=th.bool)


def shuffle_feature(data, feature_name):
    data = {**data}
    data[feature_name] = data[feature_name][th.randperm(len(data[feature_name]))]
    return data


def batch_loader(data, batch_size):
    n = len(data["contribution"])
    all_idx = np.arange(n)
    all_idx = np.random.permutation(all_idx)
    n_batch = int(np.ceil(n / batch_size))
    for i in range(n_batch):
        batch_idx = all_idx[i * batch_size : (i + 1) * batch_size]
        if len(batch_idx) != batch_size:
            continue
        batch = {k: v[batch_idx] for k, v in data.items()}
        yield batch

def mask_data(data, mask, targets, default_values):
    for target in targets:
        data[target + '_masked'] = data[target].clone()
        if mask.shape[0] != data[target + '_masked'].shape[0]:
            mask = mask.repeat(data[target + '_masked'].shape[0], 1)
        data[target + '_masked'][mask] = default_values[target]
    return data

def apply_mask_pattern(data, mask_pattern, y_name, mask_name, default_values):
    data = mask_data(data, mask_pattern, [y_name], default_values)
    data[mask_name] = data[mask_name] & mask_pattern[:, :, np.newaxis]
    data["autoreg_mask"] = th.ones_like(data[mask_name]) & mask_pattern[:, :, np.newaxis]
    return data


def create_fully_connected(n_nodes, n_groups=1, device=th.device('cpu')):
    return th.tensor(
        [
            [i + k * n_nodes, j + k * n_nodes]
            for k in range(n_groups)
            for i in range(n_nodes)
            for j in range(n_nodes)
            if i != j
        ], device=device
    ).T


for i, train_data, test_data in get_cross_validations(
    data, n_cross_val, fraction_training
):
    model = AH_MODELS[model_name](default_values=default_values, **model_args).to(
        th_device
    )

    batch_size = train_args["batch_size"]
    batch_edge_index = create_fully_connected(n_player, n_groups=batch_size)
    train_edge_index = create_fully_connected(n_player, n_groups=train_data['contribution'].shape[0])
    if test_data is not None:
        test_edge_index = create_fully_connected(n_player, n_groups=test_data['contribution'].shape[0])

    y_name = model_args["y_name"]

    optimizer = th.optim.Adam(model.parameters(), **optimizer_args)
    loss_fn = th.nn.CrossEntropyLoss(reduction="none")
    sum_loss = 0
    n_steps = 0

    for e in range(train_args["epochs"]):
        rec.set_labels(cv_split=i, epoch=e)
        model.train()
        for j, b_data in enumerate(batch_loader(train_data, batch_size)):
            optimizer.zero_grad()

            p_idx = th.randint(0, len(training_mask_pattern), (batch_size,))
            b_data = apply_mask_pattern(b_data, training_mask_pattern[p_idx], y_name, mask_name, default_values)
            batch_data = model.encode(b_data, mask=mask_name, edge_index=batch_edge_index, device=th_device)

            y_logit = model(batch_data).flatten(end_dim=-2)
            y_pred = y_logit.softmax(-1)
            y_true = batch_data["y_enc"].flatten(end_dim=-2)
            mask = batch_data["mask"].flatten()

            loss = (
                loss_fn(y_logit, y_true)
                + (y_pred * y_pred.log()).sum(-1) * train_args["l1_entropy"]
            )
            
            loss = (loss * mask).sum() / mask.sum()

            loss.backward(retain_graph=True)

            if train_args["clamp_grad"]:
                for param in model.parameters():
                    param.grad.data.clamp_(
                        -train_args["clamp_grad"], train_args["clamp_grad"]
                    )
            optimizer.step()
            sum_loss += loss.item()
            n_steps += 1

        last_epoch = e == (train_args["epochs"] - 1)

        if (e % train_args["eval_period"] == 0) or last_epoch:
            avg_loss = sum_loss / n_steps
            print(f"CV {i} | Epoch {e} | Loss {avg_loss}")

            rec.rec(value=avg_loss, set="train")

            # evalute on training data for all possible mask patterns
            for j, mask in enumerate(test_mask_pattern): 
                n_pred = mask.sum().item()
                _d = apply_mask_pattern(train_data, mask[np.newaxis], y_name, mask_name, default_values)
                _d = model.encode(_d, mask=mask_name, edge_index=train_edge_index, device=th_device)
                metrics = eval_model(model, _d)
                rec.rec_many(metrics, set="train", n_pred=n_pred, mask=j)
                
            if test_data is not None:
                # evalute on training data for all possible mask patterns
                for j, mask in enumerate(test_mask_pattern): 
                    n_pred = mask.sum().item()
                    _d = apply_mask_pattern(test_data, mask[np.newaxis], y_name, mask_name, default_values)
                    _d = model.encode(_d, mask=mask_name, edge_index=test_edge_index, device=th_device)
                    metrics = eval_model(model, _d)
                    rec.rec_many(metrics, set="test", n_pred=n_pred, mask=j)
                    # evalute on training data, shuffled features
                    for sf in shuffle_features:
                        _d = shuffle_feature(test_data, sf)
                        _d = apply_mask_pattern(_d, mask[np.newaxis], y_name, mask_name, default_values)
                        _d = model.encode(
                            _d, mask=mask_name, edge_index=test_edge_index, device=th_device
                        )
                        metrics = eval_model(model, _d)
                        rec.rec_many(metrics, set="test", shuffle_feature=sf, n_pred=n_pred, mask=j)
            sum_loss = 0
            n_steps = 0

    if i is None:
        model_path = os.path.join(model_dir, f"{job_id}.pt")
        model.save(model_path)

    rec.save(data_dir, labels, job_id=job_id)


CV 0 | Epoch 0 | Loss 3.114483952522278


RecursionError: maximum recursion depth exceeded in comparison