# n Player FPSB Auction with uniform symmetric valuation distributions

## Imports

In [None]:
import os, sys, time, warnings
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
from timeit import default_timer as timer

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required
from torch.utils.tensorboard import SummaryWriter

from bnelearn.strategy import NeuralNetStrategy, ClosureStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.optimizer import ES
from bnelearn.environment import AuctionEnvironment

# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
plt.rcParams['figure.figsize'] = [8, 5]

cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

# Use specific cuda gpu if desired (i.e. for running multiple experiments in parallel)
specific_gpu = 3
if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)

print(device)
if cuda: print(torch.cuda.current_device())

## Settings

The following cell fully defines an experiment.
The following set of parameters works well:
```
2p:  batch_size = 2**18, size_hidden_layer = 10, learning_rate = 3e-1m lr_decay = True, lr_decay_every = 500, lr_decay_factor = 0.8, momentum = 0.7, sigma = .02, n_perturbations = 128


10p: (run 2019-07-08 Mon 01:00 with following settings was
      NOT aggressive enough. Needs higher lr:)
         batch_size = 2**17, size_hidden_layer = 10, learning_rate = 3e-1, lr_decay_every = 500, 
         lr_decay_factor = 0.75, momentum = 076, sigma = .02, n_perturbations = 256,
         eval_batch_size 2**24
         (1 hidden layer, tanh)
```

In [None]:
# log in notebook folder
# alternative for shared access to experiments:
log_root = os.path.abspath('/srv/bnelearn/wits-experiments')
#log_root = os.path.abspath('.')
run_comment = '' # used in log title in addition to datetime
save_figure_data_to_disc = True

## Experiment setup
n_players = 2
n_items = 1

# valuation distribution
u_lo =0
u_hi =10

risk = 1 # risk parameter for agent <-- not implemented in bidder yet but used in calculation of optimal utility

## Environment settings
#training batch size
batch_size = 2**17
eval_batch_size = 2**25

# strategy model architecture
input_length = 1
hidden_nodes = [5, 5]
hidden_activations = [nn.SELU(), nn.SELU()]

# optimization params
epoch = 2000
learning_rate = 3e-1
lr_decay = True
lr_decay_every = 500
lr_decay_factor = 0.7
momentum = 0.7

sigma = .02 #ES noise parameter
n_perturbations = 128

# plot and log training options
plot_epoch = 500
plot_points = min(100, batch_size)

plot_xmin = u_lo
plot_xmax = u_hi
plot_ymin = 0
plot_ymax = 10


def strat_to_bidder(strategy, batch_size=batch_size, player_position=None, cache_actions=False):
    return Bidder.uniform(u_lo, u_hi, strategy,
                          batch_size = batch_size,
                          player_position=player_position,
                          cache_actions=cache_actions)

## Setting up the Environment

In [None]:
mechanism = FirstPriceSealedBidAuction(cuda = True)

model = NeuralNetStrategy(input_length,
                          hidden_nodes = hidden_nodes,
                          hidden_activations = hidden_activations,
                          requires_grad=False,
                          ensure_positive_output = torch.tensor([float(u_hi)])
                          ).to(device)

bidders = [ strat_to_bidder(model, batch_size, player_position)
           for player_position in range(n_players)]

env = AuctionEnvironment(mechanism,
                  agents = bidders,
                  batch_size = batch_size,
                  n_players =n_players,
                  strategy_to_player_closure = strat_to_bidder
                 )
optimizer = ES(model=model, environment = env,
               lr = learning_rate, momentum=momentum,
               sigma=sigma, n_perturbations=n_perturbations)

print(model)
n_parameters = sum([p.numel() for p in model.parameters()])
print('Total parameters: ' + str(n_parameters))

## Setting up Evaluation and logging

In the symmetric $\sim U (\ \underline v, \overline v \ )$ setting with risk parameter $r$ and $n$ players, the bne-optimal bid is given by (Cox et al 1982)

$$b^*(v) = \underline v + \frac{n - 1}{n-1+r} (v - \underline v) $$

The expected utility in the bne can then be calculated using

$$E[u_{BNE}] = \int_{\underline v}^{\overline v}{P(win | b) * u(b,v | win) *pdf(v) dv}$$

In this setting, we have:

$P(win | b) = P(b_i > b_j, \forall j\neq i) = P(b_i > b_j)^{n-1} = {\frac{v - \underline v}{\overline v - \underline v}}^{n-1}$,

$u(b,v | win) = v - b^*(v) = \frac{r}{n-1+r}$, where we use monotonicity and symmetry (i.e. $v_i \geq v_j \iff b_i \geq b_j$

$pdf(v) = \frac{1}{\overline v - \underline v} $

The integral above then works out to
$$E(u_{BNE}) = \frac{r(\overline v - \underline v)}{(n-1+r)(n+1)}  $$

In [None]:
# for evaluation
def optimal_bid(valuation):
    return valuation * (n_players - 1) / n_players

#calculate analytical bne_utility
bne_utility = risk/(n_players - 1 + risk)*(u_hi - u_lo)/(n_players+1)

bneStrategy = ClosureStrategy(optimal_bid)

# environment filled with optimal players for logging
# use higher batch size for calculating optimum
bne_env = AuctionEnvironment(mechanism,
                            agents = [strat_to_bidder(bneStrategy,
                                                      player_position= i,
                                                      batch_size = eval_batch_size,
                                                      cache_actions=True)
                                      for i in range(n_players)
                                     ],
                            batch_size = eval_batch_size,
                            n_players=n_players,
                            strategy_to_player_closure = strat_to_bidder
                           )

# when calculating utilities, make sure valuations are drawn at least once.
print("Utility in BNE (analytical): \t{:.5f}".format(bne_utility))
bne_utility_sampled = bne_env.get_reward(bne_env.agents[0], draw_valuations=True)
print('Utility in BNE (sampled): \t{:.5f}'.format(bne_utility_sampled))
utility_vs_bne = bne_env.get_strategy_reward(model, player_position=0)
print('Model utility vs BNE: \t\t{:.5f}'.format(utility_vs_bne))
utility_learning_env = env.get_strategy_reward(model, player_position=0, draw_valuations = True)
print('Model utility in learning env:\t{:.5f}'.format(utility_learning_env))

v_opt = np.linspace(plot_xmin, plot_xmax, 100)
b_opt = optimal_bid(v_opt)
    
def plot_bid_function(fig, v,b, writer=None, e=None, plot_points=plot_points,
                      save_vectors_to_disc=save_figure_data_to_disc):
    
    # subsample points and plot
    v = v.detach().cpu().numpy()[:plot_points]
    b= b.detach().cpu().numpy()[:plot_points]
    
    if save_vectors_to_disc:
        np.savez(
            os.path.join(logdir, 'figure_data.npz'),
            v_opt = v_opt,
            b_opt = b_opt,
            v = v, b = b
        )
    
    
    fig = plt.gcf()
    plt.cla()
    plt.xlim(plot_xmin, plot_xmax)
    plt.ylim(plot_ymin, plot_ymax)
    plt.plot(v,b, 'o', v_opt, b_opt, 'r--')
    #if is_ipython:
        #display.clear_output(wait=True)
    display.display(plt.gcf())
    if writer:
        writer.add_figure('eval/bid_function', fig, e)  

## Set up training loop

In [None]:
def log_once(writer, e):
    """Everything that should be logged only once on initialization."""
    writer.add_scalar('debug/total_model_parameters', n_parameters, e)
    writer.add_text('hyperparams/neural_net_spec', str(model), 0)    
    writer.add_scalar('debug/eval_batch_size', eval_batch_size, e)
    writer.add_graph(model, env.agents[0].valuations)    
    
def log_metrics(writer, e):
    writer.add_scalar('eval/utility', utility, e)
    writer.add_scalar('debug/norm_parameter_update', update_norm, e)
    writer.add_scalar('eval/utility_vs_bne', utility_vs_bne, e)
    writer.add_scalar('eval/epsilon_relative', epsilon_relative, e)
    writer.add_scalar('debug/epsilon_absolute', epsilon_absolute, e) # debug because only interesting to see if numeric precision is a problem, otherwise same as relative but scaled.



def log_hyperparams(writer, e):
    """Everything that should be logged on every learning_rate updates"""
    writer.add_scalar('hyperparams/batch_size', batch_size, e)
    writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
    writer.add_scalar('hyperparams/momentum', momentum, e)
    writer.add_scalar('hyperparams/sigma', sigma, e)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)

def training_loop(e, writer):    
    global overhead_mins, learning_rate,\
        utility, utility_vs_bne, epsilon_relative, epsilon_absolute, update_norm
    
    if lr_decay and e % lr_decay_every == 0 and e > 0:
        learning_rate = learning_rate * lr_decay_factor
        log_hyperparams(writer, e)
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

    ### do in every iteration ###
    # save current params to calculate update norm
    prev_params = torch.nn.utils.parameters_to_vector(model.parameters())
    #update model
    utility = -optimizer.step()
    
    ## everything after this is logging --> measure overhead
    start_time = timer()
    
    # calculate infinity-norm of update step
    new_params = torch.nn.utils.parameters_to_vector(model.parameters())
    update_norm = (new_params - prev_params).norm(float('inf'))    
    # calculate utility vs bne    
    utility_vs_bne = bne_env.get_reward(strat_to_bidder(model, batch_size = eval_batch_size), draw_valuations=False)
    epsilon_relative = 1 - utility_vs_bne / bne_utility
    epsilon_absolute = bne_utility - utility_vs_bne
    
    log_metrics(writer, e)
    
    if e % plot_epoch == 0:
        # plot current function output
        #bidder = strat_to_bidder(model, batch_size)
        #bidder.draw_valuations_()
        v = bidders[0].valuations
        b = bidders[0].get_action()

        print("Epoch {}: \tcurrent utility: {:.3f},\t utility vs BNE: {:.3f}, \tepsilon (abs/rel): ({:.5f}, {:.5f})".format(e, utility, utility_vs_bne, epsilon_absolute, epsilon_relative))
        plot_bid_function(fig, v,b,writer,e)
            
    elapsed = timer() - start_time        
    overhead_mins = overhead_mins + elapsed/60
    writer.add_scalar('debug/overhead_mins', overhead_mins, e)

## Training

In [None]:
# setup logger
if os.name == 'nt': raise ValueError('The run_name may not contain : on Windows! (change datetime format to fix this)') 
run_name = time.strftime('%Y-%m-%d %a %H:%M')
if run_comment:
    run_name = run_name + ' - ' + str(run_comment)
logdir = os.path.join(log_root, 'fpsb', 'symmetric', 'uniform', str(n_players) + 'p', run_name)

e = 0
overhead_mins = 0

print(logdir)
fig = plt.figure()
torch.cuda.empty_cache()

with SummaryWriter(logdir, flush_secs=60) as writer:
    
    torch.cuda.empty_cache()    
    log_once(writer, 0)
    log_hyperparams(writer, 0)    
    
    for e in range(e,e+epoch+1):
        training_loop(e, writer)       
            