# 2 Player FPSB Auction with assymetric uniform valuation distributions

## Imports

In [None]:
import os
import sys
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
import time
from timeit import default_timer as timer

In [None]:
import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required

In [None]:
from bnelearn.strategy import NeuralNetStrategy, TruthfulStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.optimizer import ES
from bnelearn.environment import AuctionEnvironment

In [None]:
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
#
#plt.ion()

In [None]:
cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

# Use specific cuda gpu if desired (i.e. for running multiple experiments in parallel)
specific_gpu = 7
if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)

print(device)

## Settings


In [None]:
run_comment = ''

## Experiment setup
n_players = 2
n_items = 1
# valuation distribution
# both players should have same lower bound
u_lo =   5.
u1_hi = 15.
u2_hi = 20.

def strat_to_bidder(strategy, batch_size, u_hi, player_position=None):
    return Bidder.uniform(u_lo, u_hi, strategy, player_position=player_position, batch_size = batch_size)

## Environment settings
#training batch size
batch_size = 2**16
input_length = 1

# strategy model architecture
size_hidden_layer = 10

# optimization params
epoch = 5000
learning_rate = 2e-1
lr_decay = False
lr_decay_every = 2000
lr_decay_factor = 0.8
baseline = True
momentum = 0.6

sigma = .02 #ES noise parameter
n_perturbations = 128

# plot and log training options
plot_epoch = 250
plot_points = min(100, batch_size)

plot_xmin = u_lo
plot_xmax = u2_hi
plot_ymin = 0
plot_ymax = 12

## Setting up the Environment

In [None]:
# for evaluation
# helper constant
c = 1 / (u1_hi - u_lo)**2 - 1 / (u2_hi - u_lo)**2
def optimal_bid(valuation: torch.Tensor or np.ndarray or float,
                player_position: int) -> torch.Tensor:
    #print(c)
    
    if not isinstance(valuation, torch.Tensor):
        valuation = torch.tensor(valuation, dtype=torch.float)
    #unsqueeze if simple float
    if valuation.dim() == 0:
        valuation.unsqueeze_(0)
    
    if player_position == 1:
        return u_lo + (valuation - u_lo) / (1 + torch.sqrt(1 - c*(valuation - u_lo)**2))
    elif player_position == 2:
        return u_lo + (valuation - u_lo) / (1 + torch.sqrt(1 + c*(valuation - u_lo)**2))
            

def log_hyperparams(writer, e):
    writer.add_scalar('hyperparams/batch_size', batch_size, e)
    writer.add_scalar('hyperparams/size_hidden_layer', size_hidden_layer, 0)
    writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
    writer.add_scalar('hyperparams/momentum', momentum, e)
    writer.add_scalar('hyperparams/sigma', sigma, e)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)

v1_opt = np.linspace(u_lo, u1_hi, 25)
b1_opt = optimal_bid(v1_opt, 1).numpy()
v2_opt = np.linspace(u_lo, u2_hi, 50)
b2_opt = optimal_bid(v2_opt, 2).numpy()
    
def plot_bid_function(fig, v1,b1, v2, b2, writer=None, e=None, plot_points=plot_points):
    #plot_points = min(plot_points, len(v1), len(v2))
    
    # subsample points and plot    
    v1 = v1.detach().cpu().numpy()[:plot_points]
    b1 = b1.detach().cpu().numpy()[:plot_points]
    v2 = v2.detach().cpu().numpy()[:plot_points]
    b2 = b2.detach().cpu().numpy()[:plot_points]
    
    fig = plt.gcf()
    plt.cla()
    plt.xlim(plot_xmin, plot_xmax)
    plt.ylim(plot_ymin, plot_ymax)
    plt.plot(v1,b1, 'bo', v1_opt, b1_opt, 'b--', v2,b2, 'ro', v2_opt,b2_opt, 'r--')
    #if is_ipython:
        #display.clear_output(wait=True)
    display.display(plt.gcf())
    if writer:
        writer.add_figure('eval/bid_function', fig, e)  

Initialize the model.
We'll ensure the initialization provides positive outputs on the domain we are interested in, as otherwise we can't learn.

In [None]:
# initialize models
model_1 = NeuralNetStrategy(input_length,
                            size_hidden_layer = size_hidden_layer,
                            requires_grad=False,
                            ensure_positive_output = torch.tensor([float(u1_hi)])
                            ).to(device)
   

model_2 = NeuralNetStrategy(input_length,
                            size_hidden_layer = size_hidden_layer,
                            requires_grad=False,
                            ensure_positive_output = torch.tensor([float(u2_hi)])
                            ).to(device)

bidder_1 = strat_to_bidder(model_1, batch_size, u1_hi, player_position=0)
bidder_2 = strat_to_bidder(model_2, batch_size, u2_hi, player_position=1)

In [None]:
mechanism = FirstPriceSealedBidAuction(cuda = True)
env = AuctionEnvironment(mechanism,
                  agents = [bidder_1, bidder_2],
                  batch_size = batch_size,
                  n_players =n_players,
                  strategy_to_bidder_closure = strat_to_bidder
                 )
optimizer_1 = ES(model=model_1, environment = env,
                 lr = learning_rate, momentum=momentum,
                 sigma=sigma, n_perturbations=n_perturbations, baseline=baseline,
                 strat_to_player_kwargs={"player_position":0, "u_hi": u1_hi}
                )
optimizer_2 = ES(model=model_2, environment = env,
                 lr = learning_rate, momentum=momentum,
                 sigma=sigma, n_perturbations=n_perturbations, baseline=baseline,
                 strat_to_player_kwargs={"player_position":1,"u_hi": u2_hi}
                )

## Training

In [None]:
if os.name == 'nt': raise ValueError('The run_name may not contain : on Windows! (change datetime format to fix this)') 
run_name = time.strftime('%Y-%m-%d %a %H:%M')
if run_comment:
    run_name = run_name + ' - ' + str(run_comment)
logdir = os.path.join(root_path, 'notebooks', 'fpsb', str(n_players) + 'p', 'uniform', 'asymmetric', run_name)


with SummaryWriter(logdir, flush_secs=60) as writer:
    
    # create custom_scalar multilinechart with both player's utilities
    ## NOTE: this does not work in pytorch 1.1.0 due to a bug, fixed pytorch-nightly (and will be in 1.2.0 stable)
    # uncomment once 1.2 is available -- see issue #18 https://gitlab.lrz.de/heidekrueger/bnelearn/issues/18
    #writer.add_custom_scalars(torch.utils.tensorb['eval/p1_utility', 'eval/p2_utility'], title = 'Player Utilities')
    
    overhead_mins = 0
    torch.cuda.empty_cache()
    log_hyperparams(writer, 0)
    fig = plt.figure()
    for e in range(epoch+1):
        # lr decay?
        if lr_decay and e % lr_decay_every == 0 and e > 0:
            learning_rate = learning_rate * lr_decay_factor
            log_hyperparams(writer, e)
            for param_group in optimizer_1.param_groups:
                param_group['lr'] = learning_rate

        # always: do optimizer step
        utility_1 = -optimizer_1.step()
        writer.add_scalar('eval/p1_utility', utility_1, e)
        utility_2 = -optimizer_2.step()
        writer.add_scalar('eval/p2_utility', utility_2, e)

        if e % plot_epoch == 0:
            start_time = timer()
            # plot current function output
            bidder_1.draw_valuations_()
            v_1 = bidder_1.valuations
            b_1 = bidder_1.get_action()
            bidder_2.draw_valuations_()
            v_2 = bidder_2.valuations
            b_2 = bidder_2.get_action()
            
            #share = b.mean()/optimal_bid(v).mean()
            #diff = (b-optimal_bid(v)).mean()
            #writer.add_scalar('eval/share', share, e)
            #writer.add_scalar('eval/diff', diff, e)
            writer.add_graph(model_1, bidder_1.valuations) 

            print("Epoch {}: \tutilities: \t p1: {:.3f} \t p2: {:.3f}".format(e, utility_1, utility_2))
            plot_bid_function(fig, v_1, b_1, v_2, b_2, writer,e)
            
            elapsed = timer() - start_time
            overhead_mins = overhead_mins + elapsed/60
            writer.add_scalar('debug/overhead_mins', overhead_mins, e)
            
            print("Logging checkpoint took {:.2f}s.".format(elapsed))         
            