# n Player FPSB Auction with symmetric valuation distributions

## Imports

In [None]:
import os
import sys
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
from timeit import default_timer as timer

In [None]:
import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required

In [None]:
from bnelearn.strategy import NeuralNetStrategy, TruthfulStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.optimizer import ES
from bnelearn.environment import AuctionEnvironment

In [None]:
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
#
#plt.ion()

In [None]:
cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

# Use specific cuda gpu if desired 
#(i.e. for running multiple experiments in parallel)
specific_gpu = 3
if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)

print(device)

## Settings

The following cell fully defines an experiment

In [None]:
run_dir = 'fpsb/10p/normal/symmetric'
run_name = '3'
logdir = os.path.join(root_path, 'notebooks', run_dir , run_name)

## Experiment setup
n_players = 10
n_items = 1

# valuation distribution
valuation_mean = 10.0
valuation_std = 5.0

def strat_to_bidder(strategy, batch_size):
    return Bidder.normal(valuation_mean, valuation_std, strategy, batch_size = batch_size)

## Environment settings
#training batch size
batch_size = 2**17
input_length = 1

# strategy model architecture
size_hidden_layer = 10

# optimization params
epoch = 10000
learning_rate = 3e-1
lr_decay = True
lr_decay_every = 1000
lr_decay_factor = 0.7
baseline = True
momentum = 0.5

sigma = .05 #ES noise parameter
n_perturbations = 256

# plot and log training options
plot_epoch = 250 #plot and log optima this often
calculate_optima = False #whether to log stats regarding optimum (expensive - time)
write_graph = True # whether to log graph to disk
plot_points = min(100, batch_size)
plot_xmin = int(max(0, valuation_mean - 3*valuation_std))
plot_xmax = int(valuation_mean + 3*valuation_std)

plot_ymin = 0
plot_ymax = 20

## Optimal Bid Function

According to Menezes et al. 2005., the optimal bid for symmetric valuations $v$ that are distributed with cdf $F(v)$ for $n$ players in this setting is given by

$$b^*(v) = v - \frac{\int_0^v F(x)^{n-1} dx}{F(v)^{n-1}} $$


We implement it here for calculating comparison metrics.

In [None]:
import scipy.integrate as integrate

common_dist = torch.distributions.normal.Normal(loc = valuation_mean, scale = valuation_std)

# TODO: investigate where everything is allocated. possibly move GPU vectors to CPU completely instead of shuffling around for integration?
def optimal_bid(valuation: torch.Tensor or np.ndarray or float) -> torch.Tensor:
    
    # For float and numpy --> convert to tensor
    if not isinstance(valuation, torch.Tensor):
        valuation = torch.tensor(valuation, dtype = torch.float)           
    # For float / 0d tensors --> unsqueeze to allow list comprehension below
    if valuation.dim() == 0:
        valuation.unsqueeze_(0)
    
    # shorthand notation for F^(n-1)
    Fpowered = lambda v: torch.pow(common_dist.cdf(v), n_players - 1)  
    
    # do the calculations
    numerator = torch.tensor(
            [integrate.quad(Fpowered, 0, v)[0] for v in valuation],
            device = valuation.device
        ).reshape(valuation.shape)                                 
    return valuation - numerator / Fpowered(valuation)

# Setting up the Environment

In [None]:
def log_hyperparams(writer, e):
    writer.add_scalar('hyperparams/batch_size', batch_size, e)
    writer.add_scalar('hyperparams/size_hidden_layer', size_hidden_layer, 0)
    writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
    writer.add_scalar('hyperparams/momentum', momentum, e)
    writer.add_scalar('hyperparams/sigma', sigma, e)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)
   
    
# predefine points for plotting optimal curve to save cpu-bound integrations
v_opt = np.linspace(plot_xmin, plot_xmax, 100) # 100 points more than enough
b_opt = optimal_bid(v_opt).numpy()

def plot_bid_function(fig, v,b, writer=None, e=None, plot_points=plot_points):
    
    # subsample points and plot
    v = v.detach().cpu().numpy()[:plot_points]
    b= b.detach().cpu().numpy()[:plot_points]
    
    fig = plt.gcf()
    plt.cla()
    plt.xlim(plot_xmin, plot_xmax)
    plt.ylim(plot_ymin, plot_ymax)
    plt.plot(v,b, 'o', v_opt, b_opt, 'r--')
    #if is_ipython:
    #    display.clear_output(wait=True)
    display.display(plt.gcf())
    if writer:
        writer.add_figure('eval/bid_function', fig, e)  

Plot optimal bid to ensure appropriate boundaries have been chosen

In [None]:
plot_bid_function(None, torch.tensor([0]),torch.tensor([0]))

Initialize the model.
We'll ensure the initialization provides positive outputs on the domain we are interested in, as otherwise we can't learn.

In [None]:
output_is_positive = False
while not output_is_positive:
    model = NeuralNetStrategy(input_length,
                              size_hidden_layer = size_hidden_layer,
                              requires_grad=False
                             ).to(device)
    
    if model(torch.tensor([float(valuation_mean)], device=device)) > 0:
        output_is_positive = True    

In [None]:
mechanism = FirstPriceSealedBidAuction(cuda = True)
env = AuctionEnvironment(mechanism,
                  agents = [], #dynamically built
                  max_env_size = n_players - 1, #
                  batch_size = batch_size,
                  n_players =n_players,
                  strategy_to_bidder_closure = strat_to_bidder
                 )
optimizer = ES(model=model, environment = env,
               lr = learning_rate, momentum=momentum,
               sigma=sigma, n_perturbations=n_perturbations,
               baseline=baseline, env_type='dynamic')

## Training

In [None]:
with SummaryWriter(logdir, flush_secs=120) as writer:
    overhead_mins = 0
    torch.cuda.empty_cache()
    log_hyperparams(writer, 0)
    fig = plt.figure()
    for e in range(epoch+1):
        # lr decay?
        if lr_decay and e % lr_decay_every == 0 and e > 0:
            learning_rate = learning_rate * lr_decay_factor
            log_hyperparams(writer, e)
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        # always: do optimizer step
        utility = -optimizer.step()
        writer.add_scalar('eval/utility', utility, e)

        if e % plot_epoch == 0:
            start_time = timer()
            # log statistics and plot current function output
            bidder = strat_to_bidder(model, batch_size)
            bidder.draw_valuations_()
            v = bidder.valuations
            b = bidder.get_action()
            # calculate stats regarding optimal behaviour (expensive for general valuation priors!)
            if calculate_optima:
                op_e = optimal_bid(v)
                share = b.mean()/op_e.mean()
                diff = (b-op_e).mean()
                writer.add_scalar('eval/share', share, e)
                writer.add_scalar('eval/diff', diff, e) 
                print("Epoch {}: \ttotal share: {:.3f}, diff: {:.3f}, \tutility: {:.3f}".format(e, share, diff, utility))
            else:
                print("Epoch {}: \ttotal share: {}, diff: {}, \tutility: {:.3f}".format(e, '?', '?', utility))
                
                
            if write_graph:
                writer.add_graph(model, bidder.valuations)            
                    
            
            plot_bid_function(fig, v,b,writer,e)
            
            elapsed = timer() - start_time
            overhead_mins = overhead_mins + elapsed/60
            writer.add_scalar('eval/overhead_mins', overhead_mins, e)
            print("Logging checkpoint took {:.2f}s.".format(elapsed))