# This notebook is deprecated

## Its functionality has been implemented in run_single_item_auction.py
## Some implementations are even missing here



In [None]:
import os, sys, time, warnings
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
from timeit import default_timer as timer

import numpy as np
import scipy.integrate as integrate
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.utils as ut
from torch.optim.optimizer import Optimizer, required
from torch.utils.tensorboard import SummaryWriter

from bnelearn.strategy import NeuralNetStrategy, ClosureStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import FirstPriceSealedBidAuction, VickreyAuction
from bnelearn.learner import ESPGLearner
from bnelearn.environment import AuctionEnvironment
from bnelearn.experiment import Experiment

In [None]:
## Settings
# device and seed
cuda = True
specific_gpu = 3
seed = None

# run settings
epochs = 1000
run_comment = '' # used in log title in addition to datetime

# Logging and plotting
logging_options = dict(
    log_root = os.path.abspath('.'),    
    save_figure_to_disk_png = False,
    save_figure_to_disk_svg = False, #for publishing. better quality but a pain to work with
    plot_epoch = 100,
    show_plot_inline = True
)

# Experiment setting parameters
n_players = 2
auction_mechanism = 'first_price' # one of 'first_price', 'second_price'
valuation_prior = 'uniform' # for now, one of 'uniform' / 'normal', specific params defined in script
risk_alpha = 1.0 # optimal strategy for alpha!= 1.0 only known for uniform first price

# Learning
model_sharing = True
pretrain_iters = 10
batch_size = 2**18
## ES
learner_hyperparams = {
    'population_size': 64,
    'sigma': 1.,
    'scale_sigma_by_model_size': True
}
## Optimizer
            # SGD standards
            #'lr': 1e-3,
            #'momentum': 0.7
            # Adam standards:
            # 'lr': 1e-3
            # 'betas': (0.9, 0.999), #coefficients for running avgs of grad and square grad
            # 'eps': 1e-8 , # added to denominator for numeric stability
            # 'weight_decay': 0, #L2-decay
            # 'amsgrad': False #whether to use amsgrad-variant
optimizer_type = torch.optim.Adam
optimizer_hyperparams ={    
    'lr': 3e-3
}

# Evaluation
eval_batch_size = 2**24
cache_eval_actions = True
n_processes_optimal_strategy = 44 if valuation_prior != 'uniform' and auction_mechanism != 'second_price' else 0

# in single item auctions there's only a single input
### strategy model architecture
input_length = 1
hidden_nodes = [5, 5]
hidden_activations = [nn.SELU(), nn.SELU()]

In [None]:
# set up matplotlib
is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
plt.rcParams['figure.figsize'] = [8, 5]

### set device settings
if cuda and not torch.cuda.is_available():
    warnings.warn('Cuda not available. Falling back to CPU!')
    cuda = False
device = 'cuda' if cuda else 'cpu'

if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)
    
### Set up random seeds
if seed is not None:
    torch.random.manual_seed(manual_seed)
    torch.cuda.manual_seed_all(manual_seed)

### Game setup
if auction_mechanism == 'first_price' :
    mechanism = FirstPriceSealedBidAuction(cuda = cuda)
elif auction_mechanism == 'second_price':
    mechanism = VickreyAuction(cuda = cuda)
    
### Set up experiment domain and bidders
if valuation_prior == 'uniform':
    u_lo =0
    u_hi =10
    common_prior = torch.distributions.uniform.Uniform(low = u_lo, high = u_hi)

    positive_output_point = u_hi
    def strat_to_bidder(strategy, batch_size=batch_size, player_position=None, cache_actions=False):
        return Bidder.uniform(u_lo, u_hi, strategy, batch_size = batch_size,
                              player_position=player_position, cache_actions=cache_actions, risk_alpha = risk_alpha)
    plot_xmin = u_lo
    plot_xmax = u_hi
    plot_ymin = 0
    plot_ymax = 10

elif valuation_prior == 'normal':
    if risk_alpha != 1.0:
        warnings.warn('No analytical setting with risk_alpha != 1 for normal priors. Did you mean to set risk_alpha=1?')
    valuation_mean = 10.0
    valuation_std = 5.0
    common_prior = torch.distributions.normal.Normal(loc = valuation_mean, scale = valuation_std)
    positive_output_point = valuation_mean

    plot_xmin = int(max(0, valuation_mean - 3*valuation_std))
    plot_xmax = int(valuation_mean + 3*valuation_std)
    plot_ymin = 0
    plot_ymax = 20 if auction_mechanism == 'first_price' else plot_xmax
    def strat_to_bidder(strategy, batch_size=batch_size, player_position=None, cache_actions=False):
        return Bidder.normal(valuation_mean, valuation_std, strategy,
                             batch_size = batch_size,
                             player_position=player_position,
                             cache_actions=cache_actions)
else:
    raise ValueError('Only normal and uniform priors supported by this script.')

def setup_bidders(self, model_sharing = True):
    if model_sharing:  
        print('Model Sharing...')
        self.model = NeuralNetStrategy(
            input_length, hidden_nodes = hidden_nodes,hidden_activations = hidden_activations,
            ensure_positive_output = torch.tensor([float(u_hi)])
            ).to(device)


        self.bidders = [strat_to_bidder(self.model, batch_size, player_position)
                   for player_position in range(n_players)]
        if pretrain_iters > 0:
            print('pretraining')
            self.model.pretrain(self.bidders[0].valuations, pretrain_iters)
    else:
        raise NotImplementedError("only model sharing has been implemented.")
        
### Setup Learning Environment and Learner(s)
def setup_learning_environment(self): self.env = AuctionEnvironment(self.mechanism, agents = self.bidders,
                                  batch_size = batch_size, n_players =n_players,
                                  strategy_to_player_closure = strat_to_bidder)
def setup_learner(self):   self.learner = ESPGLearner(
        model = self.model, environment = self.env, hyperparams = learner_hyperparams,
        optimizer_type = optimizer_type, optimizer_hyperparams = optimizer_hyperparams)
    
    
### Setup Evaluation
# for evaluation
if auction_mechanism == 'second_price':
    def optimal_bid(valuation): return valuation
elif auction_mechanism == 'first_price':
    if valuation_prior == 'uniform':
        def optimal_bid(valuation):
            return valuation * (n_players - 1) / (n_players - 1 + risk_alpha)
    elif valuation_prior == 'normal':
        import scipy.integrate as integrate
        common_prior = torch.distributions.normal.Normal(loc = valuation_mean, scale = valuation_std)

        def optimal_bid(valuation: torch.Tensor or np.ndarray or float) -> torch.Tensor:    
            # For float and numpy --> convert to tensor
            if not isinstance(valuation, torch.Tensor):
                valuation = torch.tensor(valuation, dtype = torch.float)           
            # For float / 0d tensors --> unsqueeze to allow list comprehension below
            if valuation.dim() == 0:
                valuation.unsqueeze_(0)

            # shorthand notation for F^(n-1)
            Fpowered = lambda v: torch.pow(common_prior.cdf(v), n_players - 1)  

            # do the calculations
            numerator = torch.tensor(
                    [integrate.quad(Fpowered, 0, v)[0] for v in valuation],
                    device = valuation.device
                ).reshape(valuation.shape)                                 
            return valuation - numerator / Fpowered(valuation)
else:
    raise ValueError("unknown auction mechanism.")

bneStrategy = ClosureStrategy(optimal_bid, parallel=n_processes_optimal_strategy)

if auction_mechanism == 'first_price':
    if valuation_prior == 'uniform':
        global_bne_utility = risk_alpha/(n_players - 1 + risk_alpha)*(u_hi - u_lo)/(n_players+1)
    elif valuation_prior == 'normal':
        if risk_alpha != 1.0:
            warnings.warn('Risk aversion ignored in optimal bid for Gaussian priors!')
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            # don't print scipy accuracy warnings
            global_bne_utility, analytical_error = integrate.dblquad(
                lambda x, v: common_prior.cdf(x)**(n_players - 1) * common_prior.log_prob(v).exp(),
                0, float('inf'), # outer boundaries
                lambda v: 0, lambda v: v) # inner boundaries
            global_bne_utility_sampled = global_bne_env.get_reward(global_bne_env.agents[0], draw_valuations=True)
            if analytical_error > 1e-7:
                warnings.warn('Error in optimal utility might not be negligible')
        print("Utility in BNE (analytical): \t{:.5f}".format(global_bne_utility))
        print('Utility in BNE (sampled): \t{:.5f}'.format(global_bne_utility_sampled))
elif auction_mechanism == 'second_price':
    F = lambda x: common_prior.cdf(x)
    f = lambda x: common_prior.log_prob(torch.tensor(x)).exp()
    f1n = lambda x,n: n * F(x)**(n - 1) * f(x)

    global_bne_utility, error_estimate = integrate.dblquad(
        lambda x,v: (v-x) * f1n(x, n_players-1) * f(v) ,
        0, float('inf'), # outer boundaries
        lambda v: 0, lambda v: v) # inner boundaries

    if error_estimate > 1e-6:
        warnings.warn('Error bound on analytical bne utility is not negligible!')
else:
    raise ValueError("Invalid auction mechanism.")

def setup_eval_environment(self):
        # environment filled with optimal players for logging
        # use higher batch size for calculating optimum
        self.bne_env = global_bne_env
        self.bne_utility = global_bne_utility

    
### Setup Plotting
plot_points = min(100, batch_size)
v_opt = np.linspace(plot_xmin, plot_xmax, 100)
b_opt = optimal_bid(v_opt)
def plot_bid_function(self, fig, plot_data, writer=None, e=None):
    v,b = plot_data    
    v = v.detach().cpu().numpy()[:plot_points]
    b= b.detach().cpu().numpy()[:plot_points]
    
    # create the plot
    fig = plt.gcf()
    plt.cla()
    plt.xlim(plot_xmin, plot_xmax)
    plt.ylim(plot_ymin, plot_ymax)
    plt.xlabel('valuation')
    plt.ylabel('bid')
    plt.text(plot_xmin + 0.05*(plot_xmax - plot_xmin),
             plot_ymax - 0.05*(plot_ymax - plot_ymin),
             'iteration {}'.format(e))
    plt.plot(v,b, 'o', v_opt, b_opt, 'r--')    
    #show and/or log    
    self._process_figure(fig, writer, e)
        
## Setup logging
def log_once(self, writer, e):
    """Everything that should be logged only once on initialization."""
    #writer.add_scalar('debug/total_model_parameters', n_parameters, e)
    #writer.add_text('hyperparams/neural_net_spec', str(self.model), 0)    
    #writer.add_scalar('debug/eval_batch_size', eval_batch_size, e)
    writer.add_graph(self.model, self.env.agents[0].valuations)    
    
def log_metrics(self, writer, e):
    writer.add_scalar('eval/utility', self.utility, e)
    writer.add_scalar('debug/norm_parameter_update', self.update_norm, e)
    writer.add_scalar('eval/utility_vs_bne', self.utility_vs_bne, e)
    writer.add_scalar('eval/epsilon_relative', self.epsilon_relative, e)
    writer.add_scalar('eval/epsilon_absolute', self.epsilon_absolute, e) # debug because only interesting to see if numeric precision is a problem, otherwise same as relative but scaled.

# TODO: deferred until writing logger
def log_hyperparams(self, writer, e):
    """Everything that should be logged on every learning_rate updates"""
#     writer.add_scalar('hyperparams/batch_size', batch_size, e)
#     writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
#     writer.add_scalar('hyperparams/momentum', momentum, e)
#     writer.add_scalar('hyperparams/sigma', sigma, e)
#     writer.add_scalar('hyperparams/n_perturbations', n_perturbations, e)

## Define Training Loop
def training_loop(self, writer, e):    

    ### do in every iteration ###
    # save current params to calculate update norm
    prev_params = torch.nn.utils.parameters_to_vector(self.model.parameters())
    #update model
    self.utility = self.learner.update_strategy_and_evaluate_utility()
    
    ## everything after this is logging --> measure overhead
    start_time = timer()
    
    # calculate infinity-norm of update step
    new_params = torch.nn.utils.parameters_to_vector(self.model.parameters())
    self.update_norm = (new_params - prev_params).norm(float('inf'))    
    # calculate utility vs bne    
    self.utility_vs_bne = self.bne_env.get_reward(strat_to_bidder(self.model, batch_size = eval_batch_size), draw_valuations=False)
    self.epsilon_relative = 1 - self.utility_vs_bne / self.bne_utility
    self.epsilon_absolute = self.bne_utility - self.utility_vs_bne
    
    self.log_metrics(writer, e)
    
    if e % self._logging_options['plot_epoch'] == 0:
        # plot current function output
        #bidder = strat_to_bidder(model, batch_size)
        #bidder.draw_valuations_()
        v = self.bidders[0].valuations
        b = self.bidders[0].get_action()
        plot_data = (v,b)

        print("Epoch {}: \tcurrent utility: {:.3f},\t utility vs BNE: {:.3f}, \tepsilon (abs/rel): ({:.5f}, {:.5f})".format(
            e, self.utility, self.utility_vs_bne, self.epsilon_absolute, self.epsilon_relative))
        self.plot(self.fig, plot_data ,writer,e)
            
    elapsed = timer() - start_time        
    self.overhead_mins = self.overhead_mins + elapsed/60
    writer.add_scalar('debug/overhead_mins', self.overhead_mins, e)
    
# Define Experiment Class
class SymmetricSingleItemAuctionExperiment(Experiment):
    setup_players = setup_bidders
    setup_learning_environment = setup_learning_environment
    setup_learners = setup_learner
    equilibrium_strategy = optimal_bid
    setup_eval_environment = setup_eval_environment
    plot = plot_bid_function
    log_once = log_once
    log_metrics = log_metrics
    log_hyperparams = log_hyperparams
    training_loop = training_loop

In [None]:
## Create the experiment
### THIS IS WHERE FIRST RANDOM THINGS HAPPEN. Set Seed

exp = SymmetricSingleItemAuctionExperiment(   
    name = ['single_item', auction_mechanism, valuation_prior, 'symmetric', str(n_players)+'p'],
    mechanism = mechanism,
    n_players = n_players,
    logging_options = logging_options)

In [None]:
## Check setup

print(exp.model)
n_parameters = sum([p.numel() for p in exp.model.parameters()])
print('Total parameters: ' + str(n_parameters))



# when calculating utilities, make sure valuations are drawn at least once.
print("Utility in BNE (analytical): \t{:.5f}".format(exp.bne_utility))
print('Utility in BNE (sampled): \t{:.5f}'.format(
    exp.bne_env.get_reward(exp.bne_env.agents[0], draw_valuations=True)))
print('Model utility vs BNE: \t\t{:.5f}'.format(
    exp.bne_env.get_strategy_reward(exp.model, player_position=0)))
print('Model utility in learning env:\t{:.5f}'.format(
    exp.env.get_strategy_reward(exp.model, player_position=0, draw_valuations = True)))

In [None]:
torch.cuda.memory_allocated()

In [None]:
exp.run(epochs, run_comment)

In [None]:
torch.cuda.memory_allocated()

In [None]:
del exp
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
torch.cuda.memory_allocated()

In [None]:
del np