# n Player FPSB Auction with uniform symmetric valuation distributions

## Imports

In [None]:
import os, sys, time, warnings
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
from timeit import default_timer as timer

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required
from torch.utils.tensorboard import SummaryWriter

from bnelearn.strategy import NeuralNetStrategy, ClosureStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.learner import ESPGLearner
from bnelearn.environment import AuctionEnvironment
from bnelearn.experiment import Experiment

# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
plt.rcParams['figure.figsize'] = [8, 5]

cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

manual_seed = False
if manual_seed:
    torch.random.manual_seed(manual_seed)
    torch.cuda.manual_seed_all(manual_seed)

# Use specific cuda gpu if desired (i.e. for running multiple experiments in parallel)
specific_gpu = 3
if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)

print('device', device)
print('\tcpu-seed', torch.random.initial_seed())
if cuda: print('specific gpu:', torch.cuda.current_device())
if cuda: print('\tgpu-seed', torch.cuda.initial_seed())
    

## Settings

In [None]:
# log in notebook folder
# alternative for shared access to experiments:
# log_root = os.path.abspath('/srv/bnelearn-experiments/')
log_root = os.path.abspath('.')
run_comment = '' # used in log title in addition to datetime
save_figure_data_to_disc = True

### Experiment setup
epoch = 5000
n_players = 2
n_items = 1

# valuation distribution
u_lo =0
u_hi =10

risk = 1 # risk parameter for agent <-- not implemented in bidder yet but used in calculation of optimal utility

### Environment settings
batch_size = 2**17
eval_batch_size = 2**25

### strategy model architecture
input_length = 1
hidden_nodes = [5, 5]
hidden_activations = [nn.SELU(), nn.SELU()]

### Learner Settings
learner_hyperparams = {
    'population_size': 128,
    'sigma': 1.,
    'scale_sigma_by_model_size': True
}

### Optimizer Settings
# SGD standards
    #'lr': 1e-3,
    #'momentum': 0.7
# Adam standards:
    # 'lr': 1e-3
    # 'betas': (0.9, 0.999), #coefficients for running avgs of grad and square grad
    # 'eps': 1e-8 , # added to denominator for numeric stability
    # 'weight_decay': 0, #L2-decay
    # 'amsgrad': False #whether to use amsgrad-variant
optimizer_type = torch.optim.Adam
optimizer_hyperparams ={    
    'lr': 3e-3
}

### Plot and log training options
plot_epoch = 100
plot_points = min(100, batch_size)

plot_xmin = u_lo
plot_xmax = u_hi
plot_ymin = 0
plot_ymax = 10


def strat_to_bidder(strategy, batch_size=batch_size, player_position=None, cache_actions=False):
    return Bidder.uniform(u_lo, u_hi, strategy,
                          batch_size = batch_size,
                          player_position=player_position,
                          cache_actions=cache_actions)

## Setting up the Environment

In [None]:
def setup_fpsb(self):
    self.mechanism = FirstPriceSealedBidAuction(cuda = True)

def setup_fpsb_bidders(self, model_sharing = True, pretrain_iters = 0):
    if model_sharing:    
        self.model = NeuralNetStrategy(
            input_length, hidden_nodes = hidden_nodes,hidden_activations = hidden_activations,
            ensure_positive_output = torch.tensor([float(u_hi)])
        ).to(device)
        if pretrain_iters > 0:
            self.model.pretrain(bidders[0].valuations, pretrain_iters)

        self.bidders = [strat_to_bidder(self.model, batch_size, player_position)
                   for player_position in range(n_players)]
    else: raise NotImplementedError("only model sharing has been implemented.")
        
def setup_auction_environment(self):
    self.env = AuctionEnvironment(self.mechanism, agents = self.bidders,
                                  batch_size = batch_size, n_players =n_players,
                                  strategy_to_player_closure = strat_to_bidder)
def setup_espg_learner(self):
    self.learner = ESPGLearner(
        model = self.model, environment = self.env, hyperparams = learner_hyperparams,
        optimizer_type = optimizer_type, optimizer_hyperparams = optimizer_hyperparams)

## Setting up Evaluation and logging

In the symmetric $\sim U (\ \underline v, \overline v \ )$ setting with risk parameter $r$ and $n$ players, the bne-optimal bid is given by (Cox et al 1982)

$$b^*(v) = \underline v + \frac{n - 1}{n-1+r} (v - \underline v) $$

The expected utility in the bne can then be calculated using

$$E[u_{BNE}] = \int_{\underline v}^{\overline v}{P(win | b) * u(b,v | win) *pdf(v) dv}$$

In this setting, we have:

$P(win | b) = P(b_i > b_j, \forall j\neq i) = P(b_i > b_j)^{n-1} = {\frac{v - \underline v}{\overline v - \underline v}}^{n-1}$,

$u(b,v | win) = v - b^*(v) = \frac{r}{n-1+r}$, where we use monotonicity and symmetry (i.e. $v_i \geq v_j \iff b_i \geq b_j$

$pdf(v) = \frac{1}{\overline v - \underline v} $

The integral above then works out to
$$E(u_{BNE}) = \frac{r(\overline v - \underline v)}{(n-1+r)(n+1)}  $$

In [None]:
# for evaluation
def optimal_bid(valuation):
    return valuation * (n_players - 1) / n_players

#calculate analytical bne_utility
bneStrategy = ClosureStrategy(optimal_bid)

def setup_bne_environment(self):
    # environment filled with optimal players for logging
    # use higher batch size for calculating optimum
    self.bne_env = AuctionEnvironment(self.mechanism,
                                agents = [strat_to_bidder(bneStrategy,
                                                          player_position= i,
                                                          batch_size = eval_batch_size,
                                                          cache_actions=True)
                                          for i in range(n_players)],
                                batch_size = eval_batch_size,
                                n_players=n_players,
                                strategy_to_player_closure = strat_to_bidder
                               )
    self.bne_utility = risk/(n_players - 1 + risk)*(u_hi - u_lo)/(n_players+1)

    
def plot_bid_function(self, fig, plot_data, writer=None, e=None, plot_points=plot_points,
                      save_vectors_to_disc=save_figure_data_to_disc):    
    v,b = plot_data    
    v = v.detach().cpu().numpy()[:plot_points]
    b= b.detach().cpu().numpy()[:plot_points]
    
    if save_vectors_to_disc:
        np.savez(os.path.join(self.logdir, 'figure_data.npz'),
                 v_opt = v_opt,b_opt = b_opt, v = v, b = b)   
    
    fig = plt.gcf()
    plt.cla()
    plt.xlim(plot_xmin, plot_xmax)
    plt.ylim(plot_ymin, plot_ymax)
    plt.plot(v,b, 'o', v_opt, b_opt, 'r--')
    #if is_ipython:
        #display.clear_output(wait=True)
    display.display(plt.gcf())
    if writer:
        writer.add_figure('eval/bid_function', fig, e)  

## Set up training loop

In [None]:
def log_once(self, writer, e):
    """Everything that should be logged only once on initialization."""
    writer.add_scalar('debug/total_model_parameters', n_parameters, e)
    writer.add_text('hyperparams/neural_net_spec', str(self.model), 0)    
    writer.add_scalar('debug/eval_batch_size', eval_batch_size, e)
    writer.add_graph(self.model, self.env.agents[0].valuations)    
    
def log_metrics(self, writer, e):
    writer.add_scalar('eval/utility', self.utility, e)
    writer.add_scalar('debug/norm_parameter_update', self.update_norm, e)
    writer.add_scalar('eval/utility_vs_bne', self.utility_vs_bne, e)
    writer.add_scalar('eval/epsilon_relative', self.epsilon_relative, e)
    writer.add_scalar('debug/epsilon_absolute', self.epsilon_absolute, e) # debug because only interesting to see if numeric precision is a problem, otherwise same as relative but scaled.

# TODO: deferred until writing logger
def log_hyperparams(self, writer, e):
    """Everything that should be logged on every learning_rate updates"""
#     writer.add_scalar('hyperparams/batch_size', batch_size, e)
#     writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
#     writer.add_scalar('hyperparams/momentum', momentum, e)
#     writer.add_scalar('hyperparams/sigma', sigma, e)
#     writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)

def training_loop(self, writer, e):    

    ### do in every iteration ###
    # save current params to calculate update norm
    prev_params = torch.nn.utils.parameters_to_vector(self.model.parameters())
    #update model
    self.utility = self.learner.update_strategy_and_evaluate_utility()
    
    ## everything after this is logging --> measure overhead
    start_time = timer()
    
    # calculate infinity-norm of update step
    new_params = torch.nn.utils.parameters_to_vector(self.model.parameters())
    self.update_norm = (new_params - prev_params).norm(float('inf'))    
    # calculate utility vs bne    
    self.utility_vs_bne = self.bne_env.get_reward(strat_to_bidder(self.model, batch_size = eval_batch_size), draw_valuations=False)
    self.epsilon_relative = 1 - self.utility_vs_bne / self.bne_utility
    self.epsilon_absolute = self.bne_utility - self.utility_vs_bne
    
    self.log_metrics(writer, e)
    
    if e % plot_epoch == 0:
        # plot current function output
        #bidder = strat_to_bidder(model, batch_size)
        #bidder.draw_valuations_()
        v = self.bidders[0].valuations
        b = self.bidders[0].get_action()
        plot_data = (v,b)

        print("Epoch {}: \tcurrent utility: {:.3f},\t utility vs BNE: {:.3f}, \tepsilon (abs/rel): ({:.5f}, {:.5f})".format(
            e, self.utility, self.utility_vs_bne, self.epsilon_absolute, self.epsilon_relative))
        self.plot(self.fig, plot_data ,writer,e)
            
    elapsed = timer() - start_time        
    self.overhead_mins = self.overhead_mins + elapsed/60
    writer.add_scalar('debug/overhead_mins', self.overhead_mins, e)

In [None]:
class SymmetricFPSBExperiment(Experiment):
    setup_game = setup_fpsb
    setup_players = setup_fpsb_bidders
    setup_learning_environment = setup_auction_environment
    setup_learners = setup_espg_learner
    equilibrium_strategy = optimal_bid
    setup_eval_environment = setup_bne_environment
    plot = plot_bid_function
    log_once = log_once
    log_metrics = log_metrics
    log_hyperparams = log_hyperparams
    training_loop = training_loop

In [None]:
exp = SymmetricFPSBExperiment(
    name = ['single_item', 'fpsb', 'uniform', 'symmetric', str(n_players)+'p'],
    options = None, device = device, specific_gpu = specific_gpu, seed = None, log_root = log_root)

In [None]:
print(exp.model)
n_parameters = sum([p.numel() for p in exp.model.parameters()])
print('Total parameters: ' + str(n_parameters))

## Check Setup

# when calculating utilities, make sure valuations are drawn at least once.
print("Utility in BNE (analytical): \t{:.5f}".format(exp.bne_utility))
bne_utility_sampled = exp.bne_env.get_reward(exp.bne_env.agents[0], draw_valuations=True)
print('Utility in BNE (sampled): \t{:.5f}'.format(bne_utility_sampled))
utility_vs_bne = exp.bne_env.get_strategy_reward(exp.model, player_position=0)
print('Model utility vs BNE: \t\t{:.5f}'.format(utility_vs_bne))
utility_learning_env = exp.env.get_strategy_reward(exp.model, player_position=0, draw_valuations = True)
print('Model utility in learning env:\t{:.5f}'.format(utility_learning_env))

v_opt = np.linspace(plot_xmin, plot_xmax, 100)
b_opt = optimal_bid(v_opt)

## Training

In [None]:
exp.run(epoch)
            