# 2 Player FPSB Auction with symmetric valuation distributions

## Imports

In [None]:
import os
import sys
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)

In [None]:
import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required

In [None]:
from bnelearn.strategy import NeuralNetStrategy, TruthfulStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.optimizer import ES
from bnelearn.environment import AuctionEnvironment

In [None]:
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
#
#plt.ion()

In [None]:
cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'
print(device)

## Settings

The following cell fully defines an experiment

In [None]:
run_dir = 'fpsb'
run_name = 'test tb-nightly2'
logdir = os.path.join(root_path, 'notebooks', run_dir , run_name)

## Experiment setup
n_players = 2
n_items = 1
# valuation distribution
u_lo =0
u_hi =10

def strat_to_bidder(strategy, batch_size):
    return Bidder.uniform(u_lo, u_hi, strategy, batch_size = batch_size, n_players=1)

#def strat_to_bidder(strategy, batch_size):
#    return Bidder.normal(10.0, 5.0, strategy, batch_size = batch_size, n_players=1)

## Environment settings
#training batch size
batch_size = 2**14
input_length = 1

# strategy model architecture
size_hidden_layer = 10

# optimization params
epoch = 5000
learning_rate = 1e-1
lr_decay = True
lr_decay_every = 1000
lr_decay_factor = 0.6
baseline = True
momentum = 0.5

sigma = .02 #ES noise parameter
n_perturbations = 32

# plot and log training options
plot_epoch = 50
plot_points = min(250, batch_size)
sample_points = torch.from_numpy(np.linspace(u_lo, u_hi, u_hi+1)).float().view(-1, n_items).cuda()

# tensorboard writer settings

## Setting up the Environment

In [None]:
# for evaluation
def optimal_bid(valuation):
    return valuation * (n_players - 1) / n_players

def log_hyperparams(writer, e):
    writer.add_scalar('hyperparams/batch_size', batch_size, e)
    writer.add_scalar('hyperparams/size_hidden_layer', size_hidden_layer, 0)
    writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
    writer.add_scalar('hyperparams/momentum', momentum, e)
    writer.add_scalar('hyperparams/sigma', sigma, e)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)

def plot_bid_function(fig, v,b, writer=None, e=None, plot_points=100):
    
    # subsample points and plot
    v = v.detach().cpu().numpy()[:plot_points]
    b= b.detach().cpu().numpy()[:plot_points]
    
    optimal = optimal_bid(v)
    fig = plt.gcf()
    plt.cla()
    plt.plot(v,b, 'o', v, optimal, 'r-')
    #if is_ipython:
        #display.clear_output(wait=True)
    display.display(plt.gcf())
    if writer:
        writer.add_figure('eval/bid_function', fig, e)  

In [None]:
model = NeuralNetStrategy(input_length,
                          size_hidden_layer = size_hidden_layer,
                          requires_grad=False
                         ).to(device)

In [None]:
mechanism = FirstPriceSealedBidAuction(cuda = True)
env = AuctionEnvironment(mechanism,
                  agents = [], #dynamically built
                  max_env_size = 1, #
                  batch_size = batch_size,
                  n_players =n_players,
                  strategy_to_bidder_closure = strat_to_bidder
                 )
optimizer = ES(model=model, environment = env,
               lr = learning_rate, momentum=momentum,
               sigma=sigma, n_perturbations=n_perturbations,
               baseline=baseline)

## Training

In [None]:
with SummaryWriter(logdir, flush_secs=30) as writer:
    torch.cuda.empty_cache()
    log_hyperparams(writer, 0)
    fig = plt.figure()
    for e in range(epoch+1):
        # lr decay?
        if lr_decay and e % lr_decay_every == 0 and e > 0:
            learning_rate = learning_rate * lr_decay_factor
            log_hyperparams(writer, e)
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        # always: do optimizer step
        utility = -optimizer.step()
        writer.add_scalar('eval/utility', utility, e)

        if e % plot_epoch == 0:
            # plot current function output
            bidder = strat_to_bidder(model, batch_size)
            bidder.draw_valuations_()
            v = bidder.valuations
            b = bidder.get_action()
            share = b.mean()/optimal_bid(v).mean()
            diff = (b-optimal_bid(v)).mean()
            writer.add_scalar('eval/share', share, e)
            writer.add_scalar('eval/diff', diff, e) 


            print("Epoch {}: \ttotal share: {:2f}, diff: {:2f}, \tutility: {:2f}".format(e, share, diff, utility))
            plot_bid_function(fig, v,b,writer,e)        

            writer.add_graph(model, bidder.valuations)