In the cells below we set a model class, then we loaded pre-trained weights

In [None]:
#!pip install optuna

In [None]:
import torch
from torch import nn, optim
import numpy as np
import optuna
from scipy import stats as ss
from matplotlib import pyplot as plt
from IPython.display import clear_output

In [None]:
from random import random
class Generator:
  def __init__(self, n_bidders, batch_size, dist, params={}):
    self.n_bidders = n_bidders
    self.batch_size = batch_size
    self.dist = dist
    self.params = params 

  def generate(self, seed=np.random.randint(1, 15000)):
    if self.dist == 'irregular':
      probs, params, dist_name = self.params['probs'], self.params['params'], self.params['dist_name']
      gen = lambda x: dist_name.rvs(**x)
      gen = np.vectorize(gen)
      dist_matrix = np.random.choice(params, size=(self.batch_size, self.n_bidders), p=probs)
      sample_val = np.array(list(map(gen, dist_matrix)))
      return torch.tensor(sample_val, dtype=torch.float)

    return torch.tensor(
        self.dist.rvs(size=(self.batch_size, self.n_bidders), random_state=seed, **self.params), dtype=torch.float
    )

In [None]:
class MyersonNetV4(nn.Module):
    def __init__(self, n_bidders, J_functions=10, K_groups=10, B=1.0, softmax_temperature=1.0):
      super(MyersonNetV4, self).__init__()
      self.J, self.K, self.B, self.kappa, self.n_bidders = J_functions, K_groups, B, softmax_temperature, n_bidders
      self.w_transformation = nn.Sequential(
          nn.Linear(self.n_bidders, self.J * self.K),
          nn.ReLU(),
          nn.Linear(self.J * self.K, self.J * self.K),
          nn.ReLU(),
          nn.Linear(self.J * self.K, self.J * self.K),
      )
      self.b_transformation = nn.Sequential(
          nn.Linear(self.n_bidders, self.J * self.K),
          nn.ReLU(),
          nn.Linear(self.J * self.K, self.J * self.K),
          nn.ReLU(),
          nn.Linear(self.J * self.K, self.J * self.K),
      )
      self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
      self.mask = torch.ones([self.n_bidders, 1, self.n_bidders]).to(self.device)
      self.mask[np.arange(self.n_bidders), :, np.arange(self.n_bidders)] = 0.

      
    def transform_weights(self, data):
      w = self.w_transformation(data).view(self.J, self.K, -1, 1)
      bias = self.b_transformation(data).view(self.J, self.K, -1, 1)
      return (w, bias)#torch.clamp(self.w, -self.B, self.B)
    
    def get_virtual_valuation(self, data, weights):
      weights, bias = weights
      return torch.min(
          torch.max(torch.exp(weights) * data + bias, axis=0).values,
          axis=0
      ).values

    def get_inverse_virtual_valuation(self, phi, weights):
      weights, bias = weights
      return torch.max(
          torch.min(torch.exp(-weights) * (phi - bias), axis=0).values,
          axis=0
      ).values

    def get_allocation(self, array, task):
        mask = torch.ones(array.shape[1]+1).to(self.device)
        mask[-1] = 0.
        mask = torch.diag(mask)[:-1, :]
        if task == 'train':
          return torch.nn.functional.softmax(array @ (self.kappa * mask), dim=-1)[:, :-1]
        elif task == 'eval':
          return torch.nn.functional.softmax(array @ (1e25 * mask), dim=-1)[:, :-1]
        else:
          raise ValueError('Wrong task, choose among "train", "eval".')
        

    def get_payment(self, phi, weights):
      second_price = torch.max(self.mask * phi, axis=-1).values.swapaxes(0, 1)
      payment = self.get_inverse_virtual_valuation(second_price, weights)
      return payment

    def forward(self, data, task='train'):
      weights = self.transform_weights(data)
      phi = self.get_virtual_valuation(data, weights)
      allocation = self.get_allocation(phi, task) 
      payment = self.get_payment(phi, weights)
      return - torch.mean(
          torch.sum(allocation * payment, axis=1)
      ), (payment, allocation, data, phi)

In [None]:
def define_model(trial):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  n_bidders, J_functions, K_groups, B, softmax_temperature, batch_size = 3, 15, 33, 8, 1e2, 128
  softmax_temperature = 1e2#trial.suggest_float('softmax_t', 1., 3e3)
  model = MyersonNetV4(n_bidders, J_functions=J_functions, K_groups=K_groups, B=B, softmax_temperature=softmax_temperature).to(device)
  model.load_state_dict(torch.load('/content/model_at_iter_6800_15_33.pickle'))
  learning_rate = trial.suggest_float('learning_rate', 1e-8, 1e-2)
  learning_rate_lambdas = trial.suggest_float('learning_rate_lambdas', 1e-8, 1e-2)
  lambdas = nn.ParameterList([nn.Parameter(torch.ones(size=(1, )) * trial.suggest_float('lambdas', -5., 5.))]).to(device)
  optim_lambdas = optim.Adam(lambdas.parameters(), lr=learning_rate)
  optimizer = optim.Adam(model.parameters(), lr=learning_rate_lambdas)
  c = trial.suggest_float('c', 0.5, 10.)
  j_iter = trial.suggest_int('j_iter', 100, 500)
  step_size = trial.suggest_float('step_size', 1e-5, 1e-2)
  return model, optimizer, lambdas, optim_lambdas, c, j_iter, step_size

In [None]:
def train_model(model, optimizer, lambdas, optim_lambdas, c, j_iter, step_size, generators):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  losses, revenues, utilities, gap = [], [], [], []
  batch = torch.cat([generator.generate() for generator in generators])[torch.randperm(1280)].to(device)
  neg_revenue, (payment, allocation, data, phi) = model(batch)
  max_revenue = - neg_revenue.detach()
  for i in range(0, 150):
    factor = (1 - step_size) ** i
    lower_bound = (factor * max_revenue).detach()
    for j in range(j_iter):
      optim_lambdas.zero_grad()
      optimizer.zero_grad()
      neg_revenue, (payment, allocation, data, phi) = model(batch)
      if payment.sum().isnan() or allocation.sum().isnan():
        break
      revenue = - neg_revenue
      utility = torch.mean(torch.sum((data - payment) * allocation, dim=-1))
      loss = - utility + lambdas[0] * (
          revenue - lower_bound
      ) + c/2 * (revenue - lower_bound) ** 2

      loss.backward()
      lambdas[0].grad = - lambdas[0].grad
      optim_lambdas.step()
      optimizer.step()
    losses.append( loss.detach().cpu().numpy().item())
    revenues.append( -revenue.detach().cpu().numpy().item())
    utilities.append( -utility.detach().cpu().numpy().item())
    gap.append((revenue - lower_bound).mean().cpu().detach().numpy().item())

  return [np.array(x) for x in (losses, revenues, utilities, gap)]

from scipy.optimize import curve_fit
from sklearn.metrics import mean_absolute_percentage_error as mape

def polynom(x, a, b, c):
  return a * x ** 2 + b * x + c

def compute_metrics(logs, step_size):
  losses, revenues, utilities, gap = logs
  smoothness_f = lambda x: np.std(np.diff(x))/np.abs(np.mean(np.diff(x)))
  gap_sum_abs = np.mean(np.abs(gap))
  x = np.array([(1 - step_size) ** i for i in range(150)])
  try:
    coefs, someshit = curve_fit(polynom, utilities[:], revenues[:])
    f = interp1d(utilities, revenues)
    x = np.linspace(utilities.min(), utilities.max(), 1000)
    smoothness = mape(f(x), polynom(f(x), *coefs))
    res = [utilities[~np.isnan(utilities)].mean(), smoothness, gap_sum_abs, np.isnan(utilities).sum(), -len(set(utilities))]
  except:
    return [utilities[~np.isnan(utilities)].mean(), 99999, gap_sum_abs, np.isnan(utilities).sum(), -len(set(utilities))]
  
  return res



In [None]:
def objective(trial):
  params_all = []
  for right_border in np.linspace(0, 30, 10):
    params = dict()
    params['probs'] = [0.75, 0.25]
    params['params'] = [{'loc': 0, 'scale': 3}, {'loc': 3, 'scale': right_border}]
    params['dist_name'] = ss.uniform
    params_all.append(params)
  generators = [Generator(3, 128, 'irregular', params) for params in params_all]
  model, optimizer, lambdas, optim_lambdas, c, j_iter, step_size = define_model(trial)
  logs = train_model(model, optimizer, lambdas, optim_lambdas, c, j_iter, step_size, generators)
  metrics = compute_metrics(logs, step_size)
    
  return metrics

In [None]:
study = optuna.create_study(directions=['minimize' for i in range(5)] )
fig = plt.figure(figsize=(10, 3))
study.optimize(objective, n_trials=300)

print("Number of finished trials: ", len(study.trials))

In [None]:
best_numbers = [x.number for x in study.best_trials]
data = study.trials_dataframe()
data['number_isin_best_numbers'] = datap['number'].isin(best_numbers)

Not the best option but we chose an appropriate configuration for the model manually

In [None]:
params = data[data['number_isin_best_numbers']].iloc[:, 7:-4].loc[208]

In [None]:
best_params = params.to_dict()

In [None]:
def define_model_eval(params):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  n_bidders, J_functions, K_groups, B, softmax_temperature, batch_size = 3, 10, 5, 8, 1e2, 128
  softmax_temperature = 1e3
  model = MyersonNetV4(n_bidders, J_functions=J_functions, K_groups=K_groups, B=B, softmax_temperature=softmax_temperature).to(device)
  model.load_state_dict(torch.load('/content/model_at_iter_6150.pickle'))
  learning_rate = params['params_learning_rate']
  learning_rate_lambdas = params['params_learning_rate_lambdas']
  lambdas = nn.ParameterList([nn.Parameter(torch.ones(size=(1, )) * params['params_lambdas'])]).to(device)
  optim_lambdas = optim.Adam(lambdas.parameters(), lr=learning_rate)
  optimizer = optim.Adam(model.parameters(), lr=learning_rate_lambdas)
  c = params['params_c']
  j_iter = params['params_j_iter']
  step_size = params['params_step_size']
  return model, optimizer, lambdas, optim_lambdas, c, j_iter, step_size
params_all = []
for right_border in np.linspace(0, 30, 10):
  params = dict()
  params['probs'] = [0.75, 0.25]
  params['params'] = [{'loc': 0, 'scale': 3}, {'loc': 3, 'scale': right_border}]
  params['dist_name'] = ss.uniform
  params_all.append(params)
generators = [Generator(3, 128, 'irregular', params) for params in params_all]
model, optimizer, lambdas, optim_lambdas, c, j_iter, step_size = define_model_eval(best_params)
logs = train_model(model, optimizer, lambdas, optim_lambdas, c, j_iter, step_size, generators)

In [None]:
losses, revenues, utilities, gap = logs

In [None]:
plt.scatter(-utilities, -revenues)

In [None]:
plt.plot(-utiliiess)

In [None]:
plt.plot(-revenues)

In [None]:
plt.plot(gap)