In [None]:
from __future__ import division
import numpy as np
import os
import sys
import time
import functools
import pickle
import multiprocess
from sklearn.utils import shuffle
from algorithms import Problem
import utils
%load_ext autoreload
%autoreload 2

In [None]:
show_fig = True
save_fig = True
save_data = True

## Similarity Matrix Creation

In [None]:
iclr_data = np.load('iclr2018_all.npz')
iclr_similarity = iclr_data['similarity_matrix']
iclr_author_mask = iclr_data['mask_matrix']

row_idx = np.where(iclr_author_mask == 1)[0]
col_idx = np.where(iclr_author_mask == 1)[1]
iclr_similarity[row_idx, col_idx] = 0

save_dir = os.path.join(os.getcwd(), 'UAI_FIGS_ICLR_FINAL4')
data_dir = os.path.join(os.getcwd(), 'UAI_DATA_ICLR_FINAL4')

## Gain and Bidding Functions

In [None]:
# Gain functions for paper-side gain.                     
sqrt = lambda d: np.sqrt(d)
minimum_3 = lambda d: np.minimum(d, 3)
minimum_5 = lambda d: np.minimum(d, 5)
minimum_6 = lambda d: np.minimum(d, 6)
minimum_10 = lambda d: np.minimum(d, 10)

# Gain functions for reviewer-side gain.
def DCG(s, pi=None): 
    if pi is None:
        return (2.**(s)-1)
    else:
        return (2.**(s)-1)/np.log2(pi + 1)
    
def DCG_sqrt(s, pi=None): 
    if pi is None: 
        return (2.**(s)-1) 
    else: 
        return (2.**(s)-1)/np.sqrt(pi)

# Bidding functions.
def bid_log(s, pi=None): 
    if pi is None:
        return s
    else:
        return s/np.log2(pi + 1)
    
def bid_sqrt(s, pi=None): 
    if pi is None:
        return s
    else:
        return s/np.sqrt(pi)

## Function to Simulate All Algorithms

In [None]:
def simulate_all(s_list, g_p, g_r, f, f_tilde, noise, hyper, special, stop, poisson, subset, seed, verbose=True):

    start = time.time()
    super_mean_heuristic = Problem(s=s_list, g_p=g_p, g_r=g_r, f=f, f_tilde=f_tilde, noise=noise, 
                                     hyper=hyper, special=special, stop=stop, poisson=poisson, subset=subset)
    super_mean_heuristic.simulate(super_mean_heuristic.super_mean_heuristic_policy, seed)
    end = time.time()

    if verbose:
        print('finish mean heuristic', end-start)

    start = time.time()
    super_zero_heuristic = Problem(s=s_list, g_p=g_p, g_r=g_r, f=f, f_tilde=f_tilde, noise=noise, hyper=hyper, 
                                   special=special, stop=stop, poisson=poisson, subset=subset)
    super_zero_heuristic.simulate(super_zero_heuristic.super_zero_heuristic_policy, seed)
    end = time.time()

    if verbose:
        print('finish zero heuristic', end-start)

    start = time.time()
    sim = Problem(s=s_list, g_p=g_p, g_r=g_r, f=f, f_tilde=f_tilde, noise=noise, hyper=hyper, 
                  special=special, stop=stop, poisson=poisson, subset=subset)
    sim.simulate(sim.sim_policy, seed)
    end = time.time()

    if verbose:
        print('finish sim', end-start)

    start = time.time()
    bid = Problem(s=s_list, g_p=g_p, g_r=g_r, f=f, f_tilde=f_tilde, noise=noise, hyper=hyper, 
                  special=special, stop=stop, poisson=poisson, subset=subset)
    bid.simulate(bid.bid_policy, seed)
    end = time.time()

    if verbose:
        print('finish bid', end-start)

    start = time.time()
    random = Problem(s=s_list, g_p=g_p, g_r=g_r, f=f, f_tilde=f_tilde, noise=noise, hyper=hyper, 
                     special=special, stop=stop, poisson=poisson, subset=subset)
    random.simulate(random.random_policy, seed)
    end = time.time()

    if verbose:
        print('finish random', end-start)

    algs = [super_mean_heuristic, super_zero_heuristic, sim, bid, random]

    return algs

## Simulation Set 1: Standard Parameters

In [None]:
hyper_range = np.arange(0, 1.1, .2)

hyper_range_set = [hyper_range]
gain_function_set = [minimum_6]
name1 = 'min6_normal_all'
name2 = 'min6_normal_gain'

num = 0
for g_p, hyper_range in zip(gain_function_set, hyper_range_set):

    full_all_data = []
    full_gain_data = []

    for hyper in hyper_range:
        g_r = DCG
        f = bid_log
        f_tilde = bid_log
        noise = 0
        special = True
        stop = lambda x: x
        poisson = False
        subset = False
        seed = 0
        count = 20        

        np.random.seed(seed)
        
        s_list = [shuffle(iclr_similarity) for _ in xrange(count)]
        
        algs = simulate_all(s_list, g_p, g_r, f, f_tilde, noise, hyper, special, stop, poisson, subset, seed, verbose=True)

        all_data = [(algs[i].gain_mean, algs[i].p_gain_mean, algs[i].r_gain_mean, algs[i].r_gain_unweighted_mean, algs[i].bid_history) for i in xrange(len(algs))]
        gain_data = [(algs[i].gain_mean, algs[i].gain_se) for i in xrange(len(algs))]

        full_all_data.append(all_data)
        full_gain_data.append(gain_data)
        
    hyper_range = np.array([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    utils.sweep_comparison_plot(full_gain_data, hyper_range, 'Tradeoff parameter $\lambda$', os.path.join(save_dir, 'min6.pdf'), show_fig, save_fig)
    utils.sweep_comparison_plot_sim_base(full_gain_data, hyper_range, 'Tradeoff parameter $\lambda$', os.path.join(save_dir, 'min6_base.pdf'), show_fig, save_fig)

    hyper_index = 4
    index = None
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2, 3, 4]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'min6_bids.pdf'), show_fig, save_fig)

    hyper_index = 4
    index = None
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'min6_bids_base.pdf'), show_fig, save_fig)

    if save_data:
        pickle.dump(full_all_data, open(os.path.join(data_dir, name1+str(num)+'.p'), 'wb'))
        pickle.dump(full_gain_data, open(os.path.join(data_dir, name2+str(num)+'.p'), 'wb'))
    num += 1

## Simulation Set 2: Varying Parameters

In [None]:
hyper_range1 = np.arange(0.0, 0.6, 0.1)
hyper_range2 = np.arange(0, 1.5, .2)

hyper_range_set = [hyper_range1, hyper_range2]
parameter_sets = [(sqrt, bid_log, DCG), (minimum_6, bid_sqrt, DCG_sqrt)]
name1 = 'param_vary_all'
name2 = 'param_vary_gain'

num = 0
for param_set, hyper_range in zip(parameter_sets, hyper_range_set):

    full_all_data = []
    full_gain_data = []

    for hyper in hyper_range:
        g_p = param_set[0]
        g_r = param_set[2]
        f = param_set[1]
        f_tilde = param_set[1]
        noise = 0
        special = True
        stop = lambda x: x
        poisson = False
        subset = False
        seed = 0
        count = 20        

        np.random.seed(seed)
        
        s_list = [shuffle(iclr_similarity) for _ in xrange(count)]
        
        algs = simulate_all(s_list, g_p, g_r, f, f_tilde, noise, hyper, special, stop, poisson, subset, seed, verbose=True)

        all_data = [(algs[i].gain_mean, algs[i].p_gain_mean, algs[i].r_gain_mean, algs[i].r_gain_unweighted_mean, algs[i].bid_history) for i in xrange(len(algs))]
        gain_data = [(algs[i].gain_mean, algs[i].gain_se) for i in xrange(len(algs))]

        full_all_data.append(all_data)
        full_gain_data.append(gain_data)
        
    if num == 0:
        hyper_range = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.6])
    elif num == 1:
        hyper_range = np.array([0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4])

    utils.sweep_comparison_plot(full_gain_data, hyper_range, 'Tradeoff parameter $\lambda$', os.path.join(save_dir, 'param_set' + str(num) + '.pdf'), show_fig, save_fig)
    utils.sweep_comparison_plot_sim_base(full_gain_data, hyper_range, 'Tradeoff parameter $\lambda$', os.path.join(save_dir,'param_set' + str(num) + 'base.pdf'), show_fig, save_fig)

    if num == 0:
        hyper_index = 4
    elif num == 1:
        hyper_index = 6
    index = None
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2, 3, 4]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'param_set' + str(num) + 'bids.pdf'), show_fig, save_fig)

    if num == 0:
        hyper_index = 4
    elif num == 1:
        hyper_index = 6
    index = None
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'param_set' + str(num) + 'bids_base.pdf'), show_fig, save_fig)

    if save_data:
        pickle.dump(full_all_data, open(os.path.join(data_dir, name1+str(num)+'.p'), 'wb'))
        pickle.dump(full_gain_data, open(os.path.join(data_dir, name2+str(num)+'.p'), 'wb'))
    num += 1

## Simulation Set 3: Robustness

In [None]:
save_dir

In [None]:
hyper_range = np.array([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
parameter_sets = [(bid_sqrt, 0, lambda x: x, False, False), (bid_log, 0.01, lambda x:x, False, False), (bid_log, 0, lambda x:int(3*x/4), False, False), (bid_log, 0, lambda x:x, True, False), (bid_log, 0, lambda x:x, False, True)]
name1 = 'robust_all'
name2 = 'robust_gain'

num = 4
for param_set in parameter_sets[-1:]:

    full_all_data = []
    full_gain_data = []

    for hyper in hyper_range:
        g_p = minimum_6
        g_r = DCG
        f = bid_log
        f_tilde = param_set[0]
        noise = param_set[1]
        special = True
        stop = param_set[2]
        poisson = param_set[3]
        subset = param_set[4]
        seed = 0
        count = 20   

        np.random.seed(seed)
        
        s_list = [shuffle(iclr_similarity) for _ in xrange(count)]
        
        algs = simulate_all(s_list, g_p, g_r, f, f_tilde, noise, hyper, special, stop, poisson, subset, seed, verbose=True)

        all_data = [(algs[i].gain_mean, algs[i].p_gain_mean, algs[i].r_gain_mean, algs[i].r_gain_unweighted_mean, algs[i].bid_history) for i in xrange(len(algs))]
        gain_data = [(algs[i].gain_mean, algs[i].gain_se) for i in xrange(len(algs))]

        full_all_data.append(all_data)
        full_gain_data.append(gain_data)
        
    utils.sweep_comparison_plot(full_gain_data, hyper_range, 'Tradeoff parameter $\lambda$', os.path.join(save_dir, 'robust' + str(num) + '.pdf'), show_fig, save_fig)
    utils.sweep_comparison_plot_sim_base(full_gain_data, hyper_range, 'Tradeoff parameter $\lambda$', os.path.join(save_dir,'robust' + str(num) + 'base.pdf'), show_fig, save_fig)

    hyper_index = 4
    index = None
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2, 3, 4]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'robust' + str(num) + 'bids.pdf'), show_fig, save_fig)

    hyper_index = 4
    index = None
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'robust' + str(num) + 'bids_base.pdf'), show_fig, save_fig)

    if save_data:
        pickle.dump(full_all_data, open(os.path.join(data_dir, name1+str(num)+'.p'), 'wb'))
        pickle.dump(full_gain_data, open(os.path.join(data_dir, name2+str(num)+'.p'), 'wb'))
    num += 1

## Simulation Set 4: Varying Similarity Matrix

In [None]:
from scipy.linalg import block_diag
s_func_block = lambda pair: np.random.uniform(0, .05, (pair[0], pair[1])) +block_diag(*[np.random.uniform(.7, .7)*np.ones((25,25)) for i in range(int(pair[0]/25))])

s_func_inter = lambda pair: np.vstack([np.hstack([.17*np.ones((int(pair[0]/2), int(.4*pair[1]))), .005*np.ones((int(pair[0]/2), int(.4*pair[1]))), .085*np.ones((int(pair[0]/2), int(.2*pair[1])))]), 
                            np.hstack([.005*np.ones((int(pair[0]/2), int(.4*pair[1]))), .17*np.ones((int(pair[0]/2), int(.4*pair[1]))), .085*np.ones((int(pair[0]/2), int(.2*pair[1])))])])

s_func_beta = lambda pair: np.random.beta(1, 15, pair)
s_func_low_rank = lambda pair: np.vstack([np.dot(np.ones(int(pair[0]/10)).reshape(-1, 1), np.random.beta(i, 60, pair[1]).reshape(1, -1)) for i in range(1, 11)])

In [None]:
name1 = 'vary_sim_all'
name2 = 'vary_sim_gain'
param_list = [(250,250), (500,500), (750,750), (1000,1000)]
s_func_list = [s_func_beta, s_func_low_rank, s_func_block, s_func_inter]

num = 0
for s_func in s_func_list:

    full_all_data = []
    full_gain_data = []

    for pair in param_list:
        g_p = minimum_6
        g_r = DCG
        f = bid_log
        f_tilde = bid_log
        hyper = 0.8
        noise = 0
        special = True
        stop = lambda x: x
        poisson = False
        subset = False
        seed = 0
        count = 20      

        np.random.seed(seed)
        
        s_list = [s_func(pair) for _ in xrange(count)]
        
        algs = simulate_all(s_list, g_p, g_r, f, f_tilde, noise, hyper, special, stop, poisson, subset, seed, verbose=True)

        all_data = [(algs[i].gain_mean, algs[i].p_gain_mean, algs[i].r_gain_mean, algs[i].r_gain_unweighted_mean, algs[i].bid_history) for i in xrange(len(algs))]
        gain_data = [(algs[i].gain_mean, algs[i].gain_se) for i in xrange(len(algs))]

        full_all_data.append(all_data)
        full_gain_data.append(gain_data)
        
    hyper_range = np.array([250, 500, 750, 1000])
    utils.sweep_comparison_plot(full_gain_data, hyper_range, 'Number of reviewers and papers', os.path.join(save_dir, 'matrix' + str(num) + '.pdf'), show_fig, save_fig)
    utils.sweep_comparison_plot_sim_base(full_gain_data, hyper_range, 'Number of reviewers and papers', os.path.join(save_dir,'matrix' + str(num) + 'base.pdf'), show_fig, save_fig)

    if num == 3:
        index = int(param_list[hyper_index][0]*.8)
    else:
        index = None
    hyper_index = 2
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2, 3, 4]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'matrix' + str(num) + 'bids.pdf'), show_fig, save_fig)

    hyper_index = 2
    intervals = [(0, 2), (3, 5), (6, 8), (9, 10)]
    alg_set = [0, 1, 2]
    endpoint = False
    bid_count_data = full_all_data
    utils.plot_bid_count_data(bid_count_data, hyper_index, intervals, alg_set, endpoint, index, os.path.join(save_dir, 'matrix' + str(num) + 'bids_base.pdf'), show_fig, save_fig)

    if save_data:
        pickle.dump(full_all_data, open(os.path.join(data_dir, name1+str(num)+'.p'), 'wb'))
        pickle.dump(full_gain_data, open(os.path.join(data_dir, name2+str(num)+'.p'), 'wb'))
    num += 1