# 2 Player FPSB Auction with assymetric uniform valuation distributions

## Imports

In [None]:
import os
import sys
root_path = os.path.join(os.path.expanduser('~'), 'bnelearn')
if root_path not in sys.path:
    sys.path.append(root_path)
import time
from timeit import default_timer as timer
from functools import partial

import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required

from bnelearn.strategy import NeuralNetStrategy, ClosureStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.learner import ESPGLearner
from bnelearn.environment import AuctionEnvironment

from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
plt.rcParams['figure.figsize'] = [10, 7]
    
cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

# Use specific cuda gpu if desired (i.e. for running multiple experiments in parallel)
specific_gpu = 3
if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)

print(device)
if cuda: print(torch.cuda.current_device())
    
seed = 8
if seed is not None:
    torch.random.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

## Settings


In [None]:
# log in notebook folder
# alternative for shared access to experiments:
#log_root = os.path.abspath('/srv/bnelearn/experiments')
log_root = os.path.join(root_path, 'experiments')
#log_root = os.path.abspath('.')
run_comment = str(seed) + str('_pretrain')
#save_figure_data_to_disc = False
save_figure_to_disc_png = True
save_figure_to_disc_svg = True
show_plot_inline = False

## Experiment setup
n_players = 2
n_items = 1
# valuation distribution
# both players should have same lower bound
u_lo =   5.
u0_hi = 15.
u1_hi = 25.
u_his = [u0_hi, u1_hi]

pretrain_iters = 500

def strat_to_bidder(strategy, batch_size, player_position):
    return Bidder.uniform(u_lo, u_his[player_position], strategy, player_position=player_position, batch_size = batch_size)

## Environment settings
#training batch size
batch_size = 2**17
eval_batch_size = 2**25
epoch = 5000

# strategy model architecture
input_length = 1
hidden_nodes = [10, 10]
hidden_activations = [nn.SELU(), nn.SELU()]


learner_hyperparams = {
    'population_size': 64,
    'sigma': 1.,
    'scale_sigma_by_model_size': True
}

### Optimizer hyperparams
# SGD standards
    #'lr': 1e-3,
    #'momentum': 0.7
# Adam standards:
    # 'lr': 1e-3
    # 'betas': (0.9, 0.999), #coefficients for running avgs of grad and square grad
    # 'eps': 1e-8 , # added to denominator for numeric stability
    # 'weight_decay': 0, #L2-decay
    # 'amsgrad': False #whether to use amsgrad-variant
optimizer_type = torch.optim.SGD
optimizer_hyperparams ={    
    'lr': 1e-3,
    'momentum': 0.5
}

# plot and log training options
plot_epoch = 50
plot_points = min(150, batch_size)

plot_xmin = u_lo
plot_xmax = u1_hi
plot_ymin = 0
plot_ymax = 15

## Setting up the Environment

For $v_1 \sim U(\alpha, \beta_1)$, $v_2 \sim U(\alpha, \beta_2)$, and with

$$c = \frac{1}{(\beta_1 - \alpha)²} -\frac{1}{(\beta_2 - \alpha)²}, $$

the equilibrium bids are given by

$$b_1^*(v_1) = \alpha + \frac{v_1 - \alpha}{1 + \sqrt{1-c(v_1-\alpha)²}} $$

$$b_2^*(v_2) = \alpha + \frac{v_2 - \alpha}{1 + \sqrt{1-c(v_2-\alpha)²}} $$

(See https://link.springer.com/article/10.1007/BF01271133

The expected utility in the bne can then be calculated using

$$E[u_{BNE}] = \int_{0}^{\infty}{P(win | b) * u(b,v | win) *f(v) dv} = ???$$


In [None]:
# for evaluation
# helper constant
c = 1 / (u0_hi - u_lo)**2 - 1 / (u1_hi - u_lo)**2
def optimal_bid(valuation: torch.Tensor or np.ndarray or float,
                player_position: int) -> torch.Tensor:
    #print(c)
    
    if not isinstance(valuation, torch.Tensor):
        valuation = torch.tensor(valuation, dtype=torch.float)
    #unsqueeze if simple float
    if valuation.dim() == 0:
        valuation.unsqueeze_(0)
    
    if player_position == 0:
        # weak player
        return u_lo + (valuation - u_lo) / (1 + torch.sqrt(1 - c*(valuation - u_lo)**2))
    elif player_position == 1:
        # strong player
        return u_lo + (valuation - u_lo) / (1 + torch.sqrt(1 + c*(valuation - u_lo)**2))

def setup_custom_scalar_plots(writer):    
    ## define layout first, then call add_custom_scalars once
    layout = {
        'eval': {
            'Utilities (SP and BNE)':  ['Multiline',
                                     ['eval_players/p{}_utility'.format(i)       for i in range(n_players)]],
            'Utilities Self Play':  ['Multiline',
                                     ['eval_players/p{}_utility_sp'.format(i)       for i in range(n_players)]],
            'Utilities vs BNE':     ['Multiline',
                                     ['eval_players/p{}_utility_vs_bne'.format(i)   for i in range(n_players)]],
            'Loss vs BNE absolute': ['Multiline',
                                     ['eval_players/p{}_epsilon_absolute'.format(i) for i in range(n_players)]],
            'Loss vs BNE relative': ['Multiline',
                                     ['eval_players/p{}_epsilon_relative'.format(i) for i in range(n_players)]]
            #'How to make a margin chart': ['Margin', ['tag_mean', 'tag_min', 'tag_max']]
        }
    }    
    writer.add_custom_scalars(layout) 

def log_once(writer, e):
    """Everything that should be logged only once on initialization."""
    for i in range(n_players):
        writer.add_scalar('debug_players/p{}_model_parameters'.format(i), n_parameters[i], e)
    writer.add_scalar('debug/model_parameters', sum(n_parameters), e)
    writer.add_scalar('debug/eval_batch_size', eval_batch_size, e)
    writer.add_text('hyperparams/neural_net_spec', str(model_1), 0)    
    #writer.add_scalar('debug/eval_batch_size', eval_batch_size, e)
    writer.add_graph(model_1, env.agents[0].valuations)    

def log_hyperparams(writer, e):
#     writer.add_scalar('hyperparams/batch_size', batch_size, e)
#     writer.add_scalar('hyperparams/batch_size', batch_size, e)
#     writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
#     writer.add_scalar('hyperparams/momentum', momentum, e)
#     writer.add_scalar('hyperparams/sigma', sigma, e)
#     writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)
    pass
        
def log_metrics(writer, u, u_vs_bne, e):
    """log scalar for each player. Tensor should be of shape n_players"""
    epsilons_rel = eps_rel(u_vs_bne)
    epsilons_abs = eps_abs(u_vs_bne)
    
    # redundant logging of utlities for multiline
    for i in range(n_players):
        ## Note: multiline chart capture all tags that match the given beginning of the tag_name,
        ## i.e. eval/utility will match all of  eval/utility, eval/utility_sp and eval/utlity_vs_bne
        ## thus self play utility should be named utility_sp to be able to capture it by itself later.
        writer.add_scalar('eval_players/p{}_utility_sp'.format(i), u[i], e)
        writer.add_scalar('eval_players/p{}_utility_vs_bne'.format(i), u_vs_bne[i], e)
        writer.add_scalar('eval_players/p{}_epsilon_absolute'.format(i), epsilons_abs[i], e)
        writer.add_scalar('eval_players/p{}_epsilon_relative'.format(i), epsilons_rel[i], e)
    
    writer.add_scalar('eval/epsilon_relative', epsilons_rel.mean(),e)
    writer.add_scalar('debug/epsilon_absolute', epsilons_abs.mean(),e)

v0_opt = np.linspace(u_lo, u0_hi, 25)
b0_opt = optimal_bid(v0_opt, 0).numpy()
v1_opt = np.linspace(u_lo, u1_hi, 50)
b1_opt = optimal_bid(v1_opt, 1).numpy()
    
def plot_bid_function(fig, v0,b0, v1, b1, writer=None, e=None):
    #plot_points = min(plot_points, len(v1), len(v2))
    
    # subsample points and plot    
    v0 = v0.detach().cpu().numpy()[:plot_points]
    b0 = b0.detach().cpu().numpy()[:plot_points]
    v1 = v1.detach().cpu().numpy()[:plot_points]
    b1 = b1.detach().cpu().numpy()[:plot_points]
    
#     if save_vectors_to_disc:
#         np.savez(
#             os.path.join(logdir, 'figure_data.npz'),
#             v0_opt = v0_opt,
#             b0_opt = b0_opt,
#             v1_opt = v1_opt,
#             b1_opt = b1_opt,
#             v0 = v0, b0 = b0, v1=v1, b1=b1
#         )
        
    fig = plt.gcf()
    plt.cla()
    plt.xlim(plot_xmin, plot_xmax)
    plt.ylim(plot_ymin, plot_ymax)
    plt.xlabel('valuation')
    plt.ylabel('bid')
    plt.text(plot_xmin + 0.05*(plot_xmax - plot_xmin),
             plot_ymax - 0.05*(plot_ymax - plot_ymin),
             'iteration {}'.format(e))
    plt.plot(v0,b0, 'bo', v0_opt, b0_opt, 'b--', v1,b1, 'ro', v1_opt,b1_opt, 'r--')
    
    if save_figure_to_disc_png:
        plt.savefig(os.path.join(logdir, 'png', f'epoch_{e:05}.png'))

    if save_figure_to_disc_svg:
            plt.savefig(os.path.join(logdir, 'svg', f'epoch_{e:05}.svg'),
                        format='svg', dpi=1200)

    if show_plot_inline:
            #display.display(plt.gcf())
            plt.show()
    if writer:
        writer.add_figure('eval/bid_function', fig, e)  

Initialize the model.
We'll ensure the initialization provides positive outputs on the domain we are interested in, as otherwise we can't learn.

In [None]:
# initialize models
model_0 = NeuralNetStrategy(input_length,                            
                            hidden_nodes = hidden_nodes,
                            hidden_activations = hidden_activations,
                            ensure_positive_output = torch.tensor([float(u0_hi)])
                            ).to(device)
   

model_1 = NeuralNetStrategy(input_length,
                            hidden_nodes = hidden_nodes,
                            hidden_activations = hidden_activations,
                            ensure_positive_output = torch.tensor([float(u1_hi)])
                            ).to(device)

n_parameters = [sum([p.numel() for p in model_0.parameters()]),sum([p.numel() for p in model_0.parameters()])]

bidder_0 = strat_to_bidder(model_0, batch_size, player_position=0)
bidder_1 = strat_to_bidder(model_1, batch_size, player_position=1)


mechanism = FirstPriceSealedBidAuction(cuda = True)
env = AuctionEnvironment(mechanism,
                  agents = [bidder_0, bidder_1],
                  batch_size = batch_size,
                  n_players =n_players,
                  strategy_to_player_closure = strat_to_bidder
                 )
learner_0 = ESPGLearner(
    model = model_0,
    environment = env,
    hyperparams = learner_hyperparams,
    optimizer_type = optimizer_type,
    optimizer_hyperparams = optimizer_hyperparams,
    strat_to_player_kwargs={"player_position":0})

learner_1 = ESPGLearner(
    model = model_1,
    environment = env,
    hyperparams = learner_hyperparams,
    optimizer_type = optimizer_type,
    optimizer_hyperparams = optimizer_hyperparams,    
    strat_to_player_kwargs={"player_position":1})



print(model_0)
print('Total parameters: ' + str(n_parameters))

## Set up equilibrium-environment

In [None]:
bne_strategies = [
    ClosureStrategy(partial(optimal_bid, player_position=i))
    for i in range(n_players)
]

bne_env = AuctionEnvironment(
    mechanism,
    agents = [strat_to_bidder(bne_strategies[i], player_position=i, batch_size=eval_batch_size)
              for i in range(n_players)],
    n_players = n_players,
    batch_size = eval_batch_size,
    strategy_to_player_closure = strat_to_bidder
)


### Pretraining

In [None]:
if pretrain_iters > 0:
    model_0.pretrain(bidder_0.valuations, pretrain_iters)
    model_1.pretrain(bidder_1.valuations, pretrain_iters)

In [None]:

#print("Utility in BNE (analytical): \t{:.5f}".format(bne_utility))
bne_utilities_sampled = torch.tensor([bne_env.get_reward(a, draw_valuations = True) for a in bne_env.agents])
print(('Utilities in BNE (sampled):'+ '\t{:.5f}'*n_players + '.').format(*bne_utilities_sampled))

if u0_hi==15 and u1_hi ==25 and u_lo==5:
    # replace by known optimum with higher precision
    bne_utilities_sampled = torch.tensor([0.9694, 5.0688]) # calculated using 100x batch size above

eps_abs = lambda us: bne_utilities_sampled - us
eps_rel = lambda us: 1- us/bne_utilities_sampled

utilities_vs_bne = torch.tensor([bne_env.get_strategy_reward(a.strategy, player_position=i) for i,a in enumerate(env.agents)])
print(('Model utility vs BNE: \t'+'\t{:.5f}'*n_players).format(*utilities_vs_bne))

utilities_learning_env = torch.tensor([env.get_strategy_reward(a.strategy, player_position=i, draw_valuations = True) for i,a in enumerate(env.agents)])
print(('Model utility in learning env:'+'\t{:.5f}'*n_players).format(*utilities_learning_env))

In [None]:
bidder_0.draw_valuations_()
v_0 = bidder_0.valuations
b_0 = bidder_0.get_action()
bidder_1.draw_valuations_()
v_1 = bidder_1.valuations
b_1 = bidder_1.get_action()
fig = plt.figure()
plot_bid_function(fig, v_0, b_0, v_1, b_1, writer=None,e=0) 

## Training

In [None]:
print(log_root)

In [None]:
if os.name == 'nt': raise ValueError('The run_name may not contain : on Windows! (change datetime format to fix this)') 
run_name = time.strftime('%Y-%m-%d %a %H:%M:%S')
if run_comment:
    run_name = run_name + ' - ' + str(run_comment)
logdir = os.path.join(log_root, 'single_item', 'first_price',  'uniform', 'asymmetric', str(n_players) + 'p', run_name)
print(logdir)
os.makedirs(logdir, exist_ok=False)
if save_figure_to_disc_png:
    os.mkdir(os.path.join(logdir, 'png'))
if save_figure_to_disc_svg:
    os.mkdir(os.path.join(logdir, 'svg'))

plt.rcParams['figure.figsize'] = [10, 7]
with SummaryWriter(logdir, flush_secs=60) as writer:
    
  
    setup_custom_scalar_plots(writer)
    
    
    overhead_mins = 0
    torch.cuda.empty_cache()
    log_once(writer, 0)
    log_hyperparams(writer, 0)
    fig = plt.figure()
    
    
    # plot current function output
    bidder_0.draw_valuations_()
    v_0 = bidder_0.valuations
    b_0 = bidder_0.get_action()
    bidder_1.draw_valuations_()
    v_1 = bidder_1.valuations
    b_1 = bidder_1.get_action()
    plot_bid_function(fig, v_0, b_0, v_1, b_1, writer,e=0) 
    
    for e in range(epoch+1):

        # always: do optimizer step
        utility_0 = learner_0.update_strategy_and_evaluate_utility()
        utility_1 = learner_1.update_strategy_and_evaluate_utility()
        
        #logging 
        start_time = timer()
        utilities = torch.tensor([utility_0, utility_1])
        utilities_vs_bne = torch.tensor([bne_env.get_strategy_reward(a.strategy, player_position=i) for i,a in enumerate(env.agents)])
        log_metrics(writer, utilities, utilities_vs_bne, e)

        if e % plot_epoch == 0:
            
            # plot current function output
            bidder_0.draw_valuations_()
            v_0 = bidder_0.valuations
            b_0 = bidder_0.get_action()
            bidder_1.draw_valuations_()
            v_1 = bidder_1.valuations
            b_1 = bidder_1.get_action()    
            
            print("Epoch {}: \tutilities: \t p0: {:.3f} \t p1: {:.3f}".format(e, utility_0, utility_1))
            plot_bid_function(fig, v_0, b_0, v_1, b_1, writer,e)            
        
        elapsed = timer() - start_time
        overhead_mins = overhead_mins + elapsed/60
        writer.add_scalar('debug/overhead_mins', overhead_mins, e)
            
                     
            