# n Player Vickrey Auction with uniform symmetric valuation distributions

## Imports

In [None]:
import os, sys, time, warnings
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
from timeit import default_timer as timer

import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required

from bnelearn.strategy import NeuralNetStrategy, ClosureStragegy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.optimizer import ES
from bnelearn.environment import AuctionEnvironment

from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
#
#plt.ion()

cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

# Use specific cuda gpu if desired (i.e. for running multiple experiments in parallel)
specific_gpu = 1
if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)

print(device)
if cuda: print(torch.cuda.current_device())

## Settings

The following cell fully defines an experiment.
The following set of parameters works well:
```
2p:  ?


3p: (run 2019-07-08 Mon 10:37)
    batch_size = 2**17, size_hidden_layer = 10, learning_rate = 3.5e-1, lr_decay_every = 1000, lr_decay_factor = 0.75, momentum = 0.7, sigma = .02, n_perturbations = 64,
    eval_batch_size 2**25
    (1 hidden layer, tanh)
    
10p: (run 2019-07-08 Mon 13:57 shows settings for 3p above are not aggressive enough --> need higher step sizes.)
    
```

In [None]:
run_comment = '' # used in log title
resume = False

## Experiment setup
n_players = 10
n_items = 1

# valuation distribution
u_lo =0
u_hi =10

## Environment settings
#training batch size
batch_size = 2**17
input_length = 1

eval_batch_size = 2**25

# strategy model architecture
size_hidden_layer = 10

# optimization params
epoch = 15000
learning_rate = 3.5e-1
lr_decay = True
lr_decay_every = 1000
lr_decay_factor = 0.75
momentum = 0.7

sigma = .02 #ES noise parameter
n_perturbations = 64

# plot and log training options
plot_epoch = 100
plot_points = min(100, batch_size)

plot_xmin = u_lo
plot_xmax = u_hi
plot_ymin = 0
plot_ymax = 10


def strat_to_bidder(strategy, batch_size=batch_size, player_position=None, cache_actions=False):
    return Bidder.uniform(u_lo, u_hi, strategy,
                          batch_size = batch_size,
                          player_position=player_position,
                          cache_actions=cache_actions)

## Setting up the Environment

When initializing the model, we'll ensure the initialization provides positive outputs on the domain we are interested in, as otherwise we can't learn.

In [None]:
mechanism = VickreyAuction(cuda = True)

output_is_positive = False
while not output_is_positive:
    model = NeuralNetStrategy(input_length,
                              size_hidden_layer = size_hidden_layer,
                              requires_grad=False
                             ).to(device)
    
    if model(torch.tensor([float(u_hi)], device=device)) > 0:
        output_is_positive = True    
        

env = AuctionEnvironment(mechanism,
                  agents = [], #dynamically built
                  max_env_size = n_players - 1, #
                  batch_size = batch_size,
                  n_players =n_players,
                  strategy_to_bidder_closure = strat_to_bidder
                 )
optimizer = ES(model=model, environment = env,
               lr = learning_rate, momentum=momentum,
               sigma=sigma, n_perturbations=n_perturbations)

## Setting up Evaluation and logging

In [None]:
# for evaluation
def optimal_bid(valuation):
    return valuation

bneStrategy = ClosureStragegy(optimal_bid)

# environment filled with optimal players for logging
# use higher batch size for calculating optimum
bne_env = AuctionEnvironment(mechanism,
                            agents = [strat_to_bidder(bneStrategy,
                                                      player_position= i,
                                                      batch_size = eval_batch_size,
                                                      cache_actions=True)
                                      for i in range(n_players)
                                     ],
                            batch_size = eval_batch_size,
                            n_players=n_players,
                            strategy_to_bidder_closure = strat_to_bidder
                           )

bne_utility = bne_env.get_reward(bne_env.agents[0], draw_valuations=True)
print('Utility in BNE: \t\t{:.5f}'.format(bne_utility))
utility_vs_bne = bne_env.get_reward(strat_to_bidder(model, batch_size = eval_batch_size))
print('Model utility vs BNE: \t\t{:.5f}'.format(utility_vs_bne))
utility_learning_env = env.get_reward(strat_to_bidder(model), draw_valuations = True)
print('Model utility in learning env:\t{:.5f}'.format(utility_learning_env))

v_opt = np.linspace(plot_xmin, plot_xmax, 100)
b_opt = optimal_bid(v_opt)
    
def plot_bid_function(fig, v,b, writer=None, e=None, plot_points=plot_points):
    
    # subsample points and plot
    v = v.detach().cpu().numpy()[:plot_points]
    b= b.detach().cpu().numpy()[:plot_points]
    
    fig = plt.gcf()
    plt.cla()
    plt.xlim(plot_xmin, plot_xmax)
    plt.ylim(plot_ymin, plot_ymax)
    plt.plot(v,b, 'o', v_opt, b_opt, 'r--')
    #if is_ipython:
        #display.clear_output(wait=True)
    display.display(plt.gcf())
    if writer:
        writer.add_figure('eval/bid_function', fig, e)  

## Set up training loop

In [None]:
def log_hyperparams(writer, e):
    writer.add_scalar('hyperparams/batch_size', batch_size, e)
    writer.add_scalar('hyperparams/size_hidden_layer', size_hidden_layer, e)
    writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
    writer.add_scalar('hyperparams/momentum', momentum, e)
    writer.add_scalar('hyperparams/sigma', sigma, e)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)
    writer.add_scalar('debug/eval_batch_size', eval_batch_size, e)

def training_loop(e, writer):
    
    global overhead_mins, learning_rate
    
    if lr_decay and e % lr_decay_every == 0 and e > 0:
        learning_rate = learning_rate * lr_decay_factor
        log_hyperparams(writer, e)
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

    # always: do optimizer step
    utility = -optimizer.step()
    writer.add_scalar('eval/utility', utility, e)
        
    utility_vs_bne = bne_env.get_reward(strat_to_bidder(model, batch_size = eval_batch_size), draw_valuations=False)
    epsilon_relative = 1 - utility_vs_bne / bne_utility
    epsilon_absolute = bne_utility - utility_vs_bne
    writer.add_scalar('eval/utility_vs_bne', utility_vs_bne, e)
    writer.add_scalar('eval/epsilon_relative', epsilon_relative, e)
    writer.add_scalar('debug/epsilon_absolute', epsilon_absolute, e) # debug because only interesting to see if numeric precision is a problem, otherwise same as relative but scaled.

    if e % plot_epoch == 0:
        start_time = timer()
        # plot current function output
        bidder = strat_to_bidder(model, batch_size)
        bidder.draw_valuations_()
        v = bidder.valuations
        b = bidder.get_action()
        #share = b.mean()/optimal_bid(v).mean()
        #diff = (b-optimal_bid(v)).mean()
        #writer.add_scalar('eval/share', share, e)
        #writer.add_scalar('eval/diff', diff, e)
        writer.add_graph(model, bidder.valuations) 

        print("Epoch {}: \tcurrent utility: {:.3f},\t utility vs BNE: {:.3f}, \tepsilon (abs/rel): ({:.5f}, {:.5f})".format(e, utility, utility_vs_bne, epsilon_absolute, epsilon_relative))
        plot_bid_function(fig, v,b,writer,e)
            
        elapsed = timer() - start_time
            
        overhead_mins = overhead_mins + elapsed/60
        writer.add_scalar('debug/overhead_mins', overhead_mins, e)
            
        print("Logging checkpoint took {:.2f}s.".format(elapsed))  

## Training

In [None]:
#if not 'resume' in locals() or not resume:
#    e = 0
#    overhead_mins = 0

# setup logger
if os.name == 'nt': raise ValueError('The run_name may not contain : on Windows! (change datetime format to fix this)') 
run_name = time.strftime('%Y-%m-%d %a %H:%M')
if run_comment:
    run_name = run_name + ' - ' + str(run_comment)
logdir = os.path.join(root_path, 'notebooks', 'vickrey', str(n_players) + 'p', 'uniform', 'symmetric', run_name)

e = 0
overhead_mins = 0

print(logdir)
fig = plt.figure()
torch.cuda.empty_cache()

with SummaryWriter(logdir, flush_secs=30) as writer:
    
    torch.cuda.empty_cache()
    log_hyperparams(writer, 0)    
    
    for e in range(e,e+epoch+1):
        training_loop(e, writer)       
            