In [None]:
import time
import json
from collections import defaultdict

import numpy as np
import torch
from torch import nn, tensor, optim
from torch.utils.tensorboard import SummaryWriter
from torch.nn import functional as F

from regretnet.plot import plot

class Trainer(object):
    def __init__(self, configuration, net, clip_op_lambda, device):
        self.net = net
        self.config = configuration
        self.device = device
        self.mode = 'train'
        self.clip_op_lambda = clip_op_lambda

        #self.writer = SummaryWriter(self.config.save_data)

        self.init_componenents()

    def init_componenents(self):
        self.create_constants()

        self.create_params_to_train()

        self.create_optimizers()

        self.create_masks()

        #self.save_config()

    def create_constants(self):
        self.x_shape = dict()
        self.x_shape['train'] = [self.config.train.batch_size, self.config.num_agents, self.config.num_items]
        self.x_shape['val'] = [self.config.val.batch_size, self.config.num_agents, self.config.num_items]

        self.adv_shape = dict()
        self.adv_shape['train'] = [self.config.num_agents, self.config.train.num_misreports,
                                   self.config.train.batch_size, self.config.num_agents, self.config.num_items]
        self.adv_shape['val'] = [self.config.num_agents, self.config.val.num_misreports,
                                 self.config.val.batch_size, self.config.num_agents, self.config.num_items]

        self.adv_var_shape = dict()
        self.adv_var_shape['train'] = [self.config.train.num_misreports, self.config.train.batch_size,
                                       self.config.num_agents, self.config.num_items]
        self.adv_var_shape['val'] = [self.config.val.num_misreports, self.config.val.batch_size,
                                     self.config.num_agents, self.config.num_items]

        self.u_shape = dict()
        self.u_shape['train'] = [self.config.num_agents, self.config.train.num_misreports,
                                 self.config.train.batch_size, self.config.num_agents]
        self.u_shape['val'] = [self.config.num_agents, self.config.val.num_misreports,
                               self.config.val.batch_size, self.config.num_agents]

        self.w_rgt = self.config.train.w_rgt_init_val
        self.w_rev = 5e3
        self.rgt_target = self.config.train.rgt_target_start
        self.rgt_target_mult = (self.config.train.rgt_target_end / self.config.train.rgt_target_start) ** \
                               (1.5 / self.config.train.max_iter)

    def create_params_to_train(self, train=True, val=True):
        # Trainable variable for find best misreport using gradient by inputs
        self.adv_var = dict()
        if train: self.adv_var['train'] = torch.zeros(self.adv_var_shape['train'], requires_grad=True,
                                                      device=self.device).float()
        if val: self.adv_var['val'] = torch.zeros(self.adv_var_shape['val'], requires_grad=True,
                                                  device=self.device).float()

    def create_optimizers(self, train=True, val=True):
        self.opt1 = optim.Adam(self.net.parameters(), self.config.train.learning_rate)

        # Optimizer for best misreport find
        self.opt2 = dict()
        if train: self.opt2['train'] = optim.Adam([self.adv_var['train']], self.config.train.gd_lr)
        if val: self.opt2['val'] = optim.Adam([self.adv_var['val']], self.config.val.gd_lr)

        self.sc_opt2 = dict()
        if train: self.sc_opt2['train'] = optim.lr_scheduler.StepLR(self.opt2['train'], 1, self.config.train.gd_lr_step)
        if val: self.sc_opt2['val'] = optim.lr_scheduler.StepLR(self.opt2['val'], 1, self.config.val.gd_lr_step)

    def create_masks(self, train=True, val=True):
        self.adv_mask = dict()
        if train:
            self.adv_mask['train'] = np.zeros(self.adv_shape['train'])
            self.adv_mask['train'][np.arange(self.config.num_agents), :, :, np.arange(self.config.num_agents), :] = 1.0
            self.adv_mask['train'] = tensor(self.adv_mask['train']).float()

        if val:
            self.adv_mask['val'] = np.zeros(self.adv_shape['val'])
            self.adv_mask['val'][np.arange(self.config.num_agents), :, :, np.arange(self.config.num_agents), :] = 1.0
            self.adv_mask['val'] = tensor(self.adv_mask['val']).float()

        self.u_mask = dict()
        if train:
            self.u_mask['train'] = np.zeros(self.u_shape['train'])
            self.u_mask['train'][np.arange(self.config.num_agents), :, :, np.arange(self.config.num_agents)] = 1.0
            self.u_mask['train'] = tensor(self.u_mask['train']).float()

        if val:
            self.u_mask['val'] = np.zeros(self.u_shape['val'])
            self.u_mask['val'][np.arange(self.config.num_agents), :, :, np.arange(self.config.num_agents)] = 1.0
            self.u_mask['val'] = tensor(self.u_mask['val']).float()

    def save_config(self):
        with open(self.writer.log_dir + '/config.json', 'w') as f:
            json.dump(self.config, f)

    def mis_step(self, x):
        '''
        Find best misreport step using gradient by inputs, trainable inputs: self.adv_var variable
        '''
        mode = self.mode

        self.opt2[mode].zero_grad()

        # Get misreports
        x_mis, misreports = self.get_misreports_grad(x)

        # Run net for misreports
        a_mis, p_mis = self.net(misreports)

        # Calculate utility
        utility_mis = self.compute_utility(x_mis, a_mis, p_mis)

        # Calculate loss value
        u_mis = - (utility_mis.view(self.u_shape[mode]) * self.u_mask[mode].to(self.device)).sum()

        # Make a step
        u_mis.backward()
        self.opt2[mode].step()
        self.sc_opt2[mode].step()

    def train_op(self, x):
        '''
        Loss for main net train
        '''
        self.opt1.zero_grad()

        x_mis, misreports = self.get_misreports(x)
        alloc_true, pay_true = self.net(x)
        a_mis, p_mis = self.net(misreports)

        rgt, utility = self.compute_regret(x, alloc_true, pay_true, x_mis, a_mis, p_mis)
        rgt = rgt.sum()
        utility = utility.mean()
        # Revenue
        revenue = self.compute_rev(pay_true)

        # Dual gradient decent
        self.w_rgt = max(0, self.w_rgt + self.config.train.rgt_lr *
                         #maybe this detach is needed to allow for backward
                         ((rgt / (revenue + 1e-8)).detach().log().item() - np.log(self.rgt_target)))
        self.w_rev = max(0, self.w_rev + self.config.train.revenue_lr *
                         (revenue).detach().item() - self.alpha * self.revenue_max)

        final_loss = -utility + self.w_rgt * rgt + self.w_rev * revenue

        # Make a step
        final_loss.backward()
        nn.utils.clip_grad_norm_(self.net.parameters(), 1)
        self.opt1.step()

        return final_loss, revenue, rgt, utility, (revenue).detach().item() - self.alpha * self.revenue_max

    def compute_metrics(self, x):
        '''
        Validation metrics
        '''
        x_mis, misreports = self.get_misreports_grad(x)

        alloc_true, pay_true = self.net(x)
        a_mis, p_mis = self.net(misreports)

        rgt = self.compute_regret_grad(x, alloc_true, pay_true, x_mis, a_mis, p_mis)

        revenue = self.compute_rev(pay_true)

        return revenue, rgt.mean()

    def compute_rev(self, pay):
        """ Given payment (pay), computes revenue
            Input params:
                pay: [num_batches, num_agents]
            Output params:
                revenue: scalar
        """
        return pay.sum(dim=-1).mean()

    def compute_utility(self, x, alloc, pay):
        """ Given input valuation (x), payment (pay) and allocation (alloc), computes utility
            Input params:
                x: [num_batches, num_agents, num_items]
                a: [num_batches, num_agents, num_items]
                p: [num_batches, num_agents]
            Output params:
                utility: [num_batches, num_agents]
        """
        return (alloc * x).sum(dim=-1) - pay

    def compute_regret(self, x, a_true, p_true, x_mis, a_mis, p_mis):
        return self.compute_regret_grad(x, a_true, p_true, x_mis, a_mis, p_mis)

    def compute_regret_grad(self, x, a_true, p_true, x_mis, a_mis, p_mis):
        mode = self.mode

        utility = self.compute_utility(x, a_true, p_true)
        utility_mis = self.compute_utility(x_mis, a_mis, p_mis)

        utility_true = utility.repeat(self.config.num_agents * self.config[mode].num_misreports, 1)
        excess_from_utility = F.relu((utility_mis - utility_true).view(self.u_shape[mode]) *
                                     self.u_mask[mode].to(self.device))

        rgt = excess_from_utility.max(3)[0].max(1)[0].mean(dim=1)
        return rgt, utility

    def get_misreports(self, x):
        return self.get_misreports_grad(x)

    def get_misreports_grad(self, x):
        mode = self.mode
        adv_mask = self.adv_mask[mode].to(self.device)

        adv = self.adv_var[mode].unsqueeze(0).repeat(self.config.num_agents, 1, 1, 1, 1)
        x_mis = x.repeat(self.config.num_agents * self.config[mode].num_misreports, 1, 1)
        x_r = x_mis.view(self.adv_shape[mode])
        y = x_r * (1 - adv_mask) + adv * adv_mask
        misreports = y.view([-1, self.config.num_agents, self.config.num_items])
        return x_mis, misreports

    def train(self, generator):
        '''
        Main function, full train process
        '''
        # Make a generators for train and validation
        self.train_gen, self.val_gen = generator

        iteration = self.config.train.restore_iter

        # Load save model
        if iteration > 0:
            model_path = self.writer.log_dir + '/model_{}'.format(iteration)
            state_dict = torch.load(model_path)
            self.net.load_state_dict(state_dict)

        time_elapsed = 0.0

        while iteration < (self.config.train.max_iter):
            tic = time.time()
            self.train_epoch(iteration)

            toc = time.time()
            time_elapsed += (toc - tic)

            iteration += 1
            self.writer.add_scalar('Train/epoch time', time_elapsed, iteration / 1000)

            if (iteration + 1) % self.config.train.save_iter == 0:
                self.save(iteration + 1)

            # Validation
            if (iteration % self.config.val.print_iter) == 0:
                self.eval(iteration)

    def train_epoch(self, iteration):
        self.mode = 'train'
        self.net.train()

        # Get new batch. X - true valuation, ADV - start point for misreport candidates
        # perm - ADV positions in full train dataset
        X, ADV, perm = next(self.train_gen.gen_func)

        x = torch.from_numpy(X).float().to(self.device)

        # Write start adv value for find best misreport variable
        self.adv_var['train'].data = tensor(ADV).float().to(self.device)

        self.misreport_cycle(x)

        # Save found best misreport values in data generator
        if self.config.train.data is 'fixed' and self.config.train.adv_reuse:
            self.train_gen.update_adv(perm, self.adv_var['train'].data.cpu())

        # Make a step for net weights updating
        net_loss, train_revenue, train_regret = self.train_op(x)

        self.rgt_target = max(self.rgt_target * self.rgt_target_mult, self.config.train.rgt_target_end)

        if (iteration % self.config.train.print_iter) == 0:
            print('Iteration {}'.format(iteration))
            print('Train revenue: {},   regret: {},   net loss: {} , w: {}'.format(
                round(float(train_revenue), 5),
                round(float(train_regret), 5),
                round(float(net_loss), 5),
                round(self.w_rgt, 4)
            ))
            self.writer.add_scalar('Train/revenue', train_revenue, iteration / 1000)
            self.writer.add_scalar('Train/regret', train_regret, iteration / 1000)
            self.writer.add_scalar('Train/loss', net_loss, iteration / 1000)
            self.writer.add_scalar('Train/w_rgt', self.w_rgt, iteration / 1000)

    def eval(self, iteration):
        print('Validation on {} iteration'.format(iteration))
        self.mode = 'val'
        self.net.eval()

        self.eval_grad(iteration)

        if self.config.plot.bool:
            self.plot()

    def eval_grad(self, iteration):
        val_revenue = 0
        val_regret = 0

        for _ in range(self.config.val.num_batches):
            X, ADV, _ = next(self.val_gen.gen_func)
            self.adv_var['val'].data = tensor(ADV).float().to(self.device)

            x = torch.from_numpy(X).float().to(self.device)

            self.misreport_cycle(x)

            val_revenue_batch, val_regret_batch = self.compute_metrics(x)
            val_revenue += val_revenue_batch
            val_regret += val_regret_batch

        val_revenue /= float(self.config.val.num_batches)
        val_regret /= float(self.config.val.num_batches)

        print('Val revenue: {},   regret_grad: {}'.format(
            round(float(val_revenue), 5),
            round(float(val_regret), 5)))
        self.writer.add_scalar('Validation/revenue', val_revenue, iteration / 1000)
        self.writer.add_scalar('Validation/regret_grad', val_regret, iteration / 1000)

    def plot(self):
        x = np.linspace(self.config.min, self.config.max, self.config.plot.n_points)
        x = np.stack([v.flatten() for v in np.meshgrid(x, x)], axis=-1)
        x = np.expand_dims(x, 1)
        x = torch.FloatTensor(x)

        allocation, _ = self.net(x)
        allocation = allocation.detach().numpy()[:, 0, :].reshape(self.config.plot.n_points, self.config.plot.n_points,
                                                                  self.config.num_items)

        plot(allocation, self.config.dir_name, self.config.setting)

    def misreport_cycle(self, x):
        mode = self.mode

        # Find best misreport cycle
        for _ in range(self.config[mode].gd_iter):
            # Make a gradient step, update self.adv_var variable
            self.mis_step(x)

            # Clipping new values of self.adv_var with respect for valuations distribution
            self.adv_var[mode].data.clamp_(self.config.min, self.config.max)

        for param_group in self.opt2[mode].param_groups:
            param_group['lr'] = self.config[mode].gd_lr

        self.opt2[mode].state = defaultdict(dict)  # reset momentum

    def save(self, iteration):
        torch.save(self.net.state_dict(), self.writer.log_dir + '/model_{}'.format(iteration))


Source: [Optimal-er Auctions through Attention - arXiv](https://arxiv.org/pdf/2202.13110.pdf)

In [None]:
import os
os.chdir('../input/amd-master/amd-master')
from regretnet.utils import get_objects
#from regretnet.trainer.trainer import Trainer, PartTrainer
from regretnet.nets.additive_net import AdditiveNet
from regretnet.nets.additive_net_attention import AdditiveNetAttention
from regretnet.nets.additive_net_exchangeable import AdditiveNetExchangeable
from regretnet.nets.part_net_attention import PartNetAttention
from regretnet.nets.part_net import PartNet

# os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
# os.environ['CUDA_VISIBLE_DEVICES']='1'

setting = 'additive_3x10_uniform'

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)



device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

cfg, _, Generator, clip_op_lambda = get_objects(setting)
cfg.setting = setting
if cfg.architecture == 'standard':
    net2 = AdditiveNet(cfg, device).to(device)
elif cfg.architecture == 'attention':
    net2 = AdditiveNetAttention(cfg, device).to(device)
elif cfg.architecture == 'exchangeable':
    net2 = AdditiveNetExchangeable(cfg, device).to(device)
print('number of parameters, net2 =', count_parameters(net2))
generator = [Generator(cfg, 'train'), Generator(cfg, 'val')]

if cfg.regret_type == 'standard':
    m = Trainer(cfg, net2, clip_op_lambda, device)
elif cfg.regret_type == 'part':
    if cfg.architecture == 'standard':
        part_net = PartNet(cfg, device).to(device)
    elif cfg.architecture == 'attention':
        part_net = PartNetAttention(cfg, device).to(device)
    print('number of parameters, part_net =', count_parameters(part_net))
    m = PartTrainer(cfg, net2, part_net, clip_op_lambda, device)

os.chdir('..')
os.chdir('..')


In [None]:
checkpoint = torch.load(PATH)
net2.load_state_dict(checkpoint)

In [None]:
from itertools import chain
class WrapperNet(nn.Module):
  def __init__(self, model, num_unfreezed_parameters, num_units, num_hidden):
    super(WrapperNet, self).__init__()
    self.num_unfreezed_parameters, self.num_units, self.num_hidden = num_unfreezed_parameters, num_units, num_hidden
    self.model = model
    self.redistribution = nn.Sequential(
        nn.Linear(self.model.num_agents, self.num_units),
        nn.ReLU(),
        *list(chain.from_iterable([[nn.Linear(self.num_units, self.num_units),
        nn.ReLU()] for i in range(self.num_hidden)])),
        nn.Linear(self.num_units, self.model.num_agents),
    )
    self.reallocation = nn.Sequential(
        nn.Linear(self.model.num_agents * self.model.num_items, self.num_units),
        nn.ReLU(),
        *list(chain.from_iterable([[nn.Linear(self.num_units, self.num_units),
        nn.ReLU()] for i in range(self.num_hidden)])),
        nn.Linear(self.num_units, self.model.num_agents * self.model.num_items),
    )

    for param in net2.named_parameters():
      if int(param[0].split('.')[1]) <= 5 - self.num_unfreezed_parameters:
        param[1].requires_grad = False
      else:
        param[1].requires_grad = True

  def forward(self, x):
    alloc_true, pay_true = self.model(x)
    alloc_true = alloc_true.view(-1, self.model.num_agents * self.model.num_items)
    alloc_reallocated = self.reallocation(alloc_true).view(-1, self.model.num_agents, self.model.num_items)
    payment_redistributed = self.redistribution(pay_true)
    return alloc_reallocated, payment_redistributed

In [None]:
#!pip install optuna
import optuna

In [None]:
cfg.train.revenue_lr = 1e-1
cfg.train.rgt_lr = 1e-2
cfg.train.batch_size = 1 * 1024
cfg.train.learning_rate = 1e-7
cfg.train.w_rgt_init_val = 42.9537
m.revenue_max = 5.61554
m.train_gen, m.val_gen = generator
G = 500

In [None]:
def assemble_model(trial):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  checkpoint = torch.load('../input/pretrained-rgtdinet/additive_net_3_10.pickle')
  net2.load_state_dict(checkpoint)
  num_unfreezed_parameters = trial.suggest_int('num_unfreezed_parameters', 0, 3)
  num_units = trial.suggest_int('num_units', 8, 128)
  num_hidden = trial.suggest_int('num_hid', 2, 6)
  model = WrapperNet(net2, num_unfreezed_parameters, num_units, num_hidden).to(device)
  cfg.train.revenue_lr = trial.suggest_float('revenue_lr', 1e-7, 1e-2)
  cfg.train.rgt_lr = trial.suggest_float('rgt_lr', 1e-7, 1e-2)
  cfg.train.batch_size = 1024
  cfg.train.learning_rate = trial.suggest_float('learning_rate', 1e-7, 1e-2)
  cfg.train.w_rgt_val = 42.9537
  m = Trainer(cfg, model, clip_op_lambda, device)
  m.revenue_max = 5.61554
  m.train_gen, m.val_gen = generator
  return model

In [None]:
from scipy.optimize import curve_fit
from sklearn.metrics import mean_absolute_percentage_error as mape
from scipy.interpolate import interp1d


def objective(trial):
  model = assemble_model(trial)
  revenues, utilitys, gap, rgts = [], [], [], []
  G = trial.suggest_int('G', 300, 500)
  X, ADV, perm = next(m.train_gen.gen_func)
  x = torch.from_numpy(X).float().to(m.device)
  m.adv_var['train'].data = torch.tensor(ADV).float().to(m.device)
  for alpha in np.linspace(1., 0., 7):
    print('alpha = ', alpha, end=', ')
    m.alpha = alpha
    for i in range(G):
      m.misreport_cycle(x)
      final_loss, revenue, rgt, u, revenue_excess = m.train_op(x)
      revenues.append(revenue.detach().cpu().item())
      rgts.append(rgt.detach().cpu().item())
      utilitys.append(u.detach().cpu().item())
      gap.append(revenue_excess)

    print('u={}, rev={}, g={}, rgt={}'.format(utilitys[-1], revenues[-1], gap[-1], rgts[-1]), end = (', ' if alpha == 0. else '\n'))
  res = compute_metrics((revenues, rgts, utilitys, gap))
  return res

def polynom(x, a, b, c):
  return a * x ** 2 + b * x + c

def compute_metrics(logs):
  revenues, rgts, utilitys, gap = logs
  gap_mean_abs = np.mean(np.abs(gap))
  rgts_mean_abs = np.mean(np.abs(rgts))
  x = np.linspace(1., 0., 20)
  coefs, _ = curve_fit(polynom, utilitys[:], revenues[:])
  f = interp1d(utilitys, revenues)
  x = np.linspace(min(utilitys), max(utilitys), 1000)
  smoothness = mape(f(x), polynom(f(x), *coefs))
  u = np.mean(utilitys)
  res = [-u, gap_mean_abs, rgts_mean_abs, smoothness]
  return res

In [None]:
study = optuna.create_study(directions=['minimize' for i in range(4)] )
study.optimize(objective, n_trials=300)

print("Number of finished trials: ", len(study.trials))

In [None]:
def assemble_model(params):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  checkpoint = torch.load('/kaggle/input/pretrained-rgtdinet/additive_net_3_10.pickle')
  net2.load_state_dict(checkpoint)
  num_unfreezed_parameters = params['num_unfreezed_parameters']
  num_units = params['num_units']
  num_hidden = params['num_hid']
  model = WrapperNet(net2, num_unfreezed_parameters, num_units, num_hidden).to(device)
  cfg.train.revenue_lr = params['revenue_lr']
  cfg.train.rgt_lr = params['rgt_lr']
  cfg.train.batch_size = 1024
  cfg.train.learning_rate = params['learning_rate']
  cfg.train.w_rgt_val = 42.9537
  m = Trainer(cfg, model, clip_op_lambda, device)
  m.revenue_max = 5.61554
  m.train_gen, m.val_gen = generator
  return model

from tqdm.notebook import trange, tqdm
def objective(params):
  model = assemble_model(params)
  revenues, utilities, gap, rgts = [], [], [], []
  G = params['G']
  X, ADV, perm = next(m.train_gen.gen_func)
  x = torch.from_numpy(X).float().to(m.device)
  m.adv_var['train'].data = torch.tensor(ADV).float().to(m.device)
  for alpha in tqdm(np.linspace(1., 0., 7)):
    m.alpha = alpha
    for i in tqdm(range(G)):
      m.misreport_cycle(x)
      final_loss, revenue, rgt, u, revenue_excess = m.train_op(x)
      revenues.append(revenue.detach().cpu().item())
      rgts.append(rgt.detach().cpu().item())
      utilities.append(u.detach().cpu().item())
      gap.append(revenue_excess)
  res = {'revenue': revenues, 'rgt': rgts, 'utilities': utilities, 'gap': gap]
  return res

In [None]:
num = study.best_trials[0].number
result = objective(study.trials[num].params)


In [None]:
sns.set(font_scale=1)
sns.set_theme(color_codes=True)
fig = plt.figure( figsize=(15, 10))
plt.plot(result[2], result[0])
plt.title('Pareto Frontier for 3x10 setting with v_i~U[0, 1]')
plt.xlabel('Avg. Utility')
plt.ylabel('Avg. Revenue')

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(30, 10))
sns.set(font_scale=1.5)
ax[0].plot(result[1])
ax[1].plot(result[3])
ax[0].title.set_text('The dynamics of Regret')
ax[1].title.set_text('The dynamics of a gap between a Current Revenue and a β-fraction of Max. Revenue')
ax[0].set_xlabel('Number of iterations')
ax[1].set_xlabel('Number of iterations')