In [1]:
import os
import pickle
from scipy.stats import gaussian_kde
from sklearn.neighbors import KernelDensity

In [2]:
from srcfanova.confspace_utils import get_configspace, integer_encode_dataframe


import itertools as it
from collections import OrderedDict

import ConfigSpace
import numpy as np
import pandas as pd
import pyrfr.regression as reg
import pyrfr.util
from ConfigSpace.hyperparameters import CategoricalHyperparameter, UniformFloatHyperparameter, \
    NumericalHyperparameter, Constant, OrdinalHyperparameter

from surrogate import fANOVA_surrogate
from hyperband_infinite import HyperBandOptimiser

from typing import List

# Prepare data for fitting surrogate models 

In [3]:
dataf = pd.read_csv('./results_hyper.csv', sep=",")

# removing these two datasets because the performance can not be explained using ANOVA
dataf = dataf[dataf.dataset != 'ilpd']
dataf = dataf[dataf.dataset != 'blood-transfusion-service-center']
dataf = dataf.reset_index()
task_ids = sorted(dataf['task_id'].unique())

measure = 'val_binary_accuracy'

# important hyperparameter keys to consider: learning_rate, depth, use_reuploading, input_activation_function
# make sure data is numerical and in right order for configspace
config_space = get_configspace(bool(1))
cs_params = config_space.get_hyperparameter_names()

original_df = dataf.loc[:, [cs_params[i] for i in range(len(cs_params))]]

data = dataf.loc[:, [cs_params[i] for i in range(len(cs_params))]]
data = integer_encode_dataframe(data, config_space)
data['task_id'] = dataf.task_id
data['dataset'] = dataf.dataset
data[measure] = dataf[measure]

# Fit surrogate models

In [4]:
model_per_task = {}
n_trees= 128

for t_idx, task_id in enumerate(task_ids):
    
    data_task = data[data['task_id'] == task_id]
    del data_task['task_id']
    del data_task['dataset']
    
    y_data = data_task[measure].values
    X_data = data_task.copy()
    del X_data[measure]
    
    model_per_task[task_id] = fANOVA_surrogate(X=X_data, Y=y_data, n_trees=n_trees, seed=t_idx)

# Hyperband Optimisation Uniform vs KDE Prior Experiment

The experiment configuration for the uniform prior experiment is defined in the next cell.

In [5]:
n_runs = 15

seed_exp = np.arange(n_runs)
eta_exp = [2, 3, 4] # halving factor
starting_budget_exp = [int(1e3), int(1e4)] # starting budget, can be though of as number of shots to compute the expectation value.
max_k_exp = [4, 5, 6] # defines how many iterations does the hyperband algorithm run for; also called s_max in the paper.
search_type = 'uniform' 

# Run hyperband algorithm with uniform priors

Doing a search over hyperparameter configuration space by employing hyperband with uniform sampling of hyperparameters. The objective is to find the hyperparameter configuration which gives maximum validation accuracy of the surrogate model.

In [6]:
# For uniform prior experiments

for max_k in max_k_exp:
    for eta in eta_exp:
        for starting_budget in starting_budget_exp:
            for task_id in task_ids:
                for seed in seed_exp:
                    optimiser = HyperBandOptimiser(eta=eta,
                                                   config_space=config_space,
                                                   optimisation_goal='performance',
                                                   max_k=max_k,
                                                   min_or_max=max,
                                                   task_id=task_id,
                                                   starting_shots_nb=starting_budget,
                                                   search_type=search_type,
                                                   seed_nb=seed)
                    optimiser.run_optimisation(model_for_task_id=model_per_task[task_id],
                                               all_data=data,
                                               store_optimiser=True,
                                               verbosity=False)

# Retrieve & Save necessary results  

In [7]:

for max_k in max_k_exp:
    for eta in eta_exp:
        for starting_budget in starting_budget_exp:
            
            results = {}
            for task_id in task_ids:
                results[task_id] = {}
                
            for task_id in task_ids:
                for seed in seed_exp:
                    f_name = f'./optimiser/{search_type}/task_id{task_id}_search_{search_type}_eta_{eta}_max_k_{max_k}_shots_{starting_budget}_seed_{seed}.pckl'
                    optimiser = pickle.load(open(f_name, 'rb'))
                    results[task_id][seed] = optimiser.eval_history
                    
                    f_name = f'./data/{search_type}_eta_{eta}_max_k_{max_k}_shots_{starting_budget}.pckl'
                    os.makedirs(os.path.dirname(f_name), exist_ok=True)
                    pickle.dump(results, open(f_name, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
                    

# Run hyperband algorithm with KDE priors

The experiment configuration for the uniform prior experiment is defined in the next cell.

In [8]:
n_runs = 15

best_N_exp = [10, 20]
seed_exp = np.arange(n_runs)
eta_exp = [2, 3, 4]
starting_budget_exp = [int(1e3), int(1e4)]
max_k_exp = [4, 5, 6]
search_type = 'kde'

# this is given as a list of a list, where the inside list contains the indices of (important) hyperparameters
# Index {6: learning_rate, 1: depth, 4: input_activation_function, 9: use_reuploading}
imp_hyperparams_list_exp = [[6], [6, 1], [6, 1, 4], [6, 1, 4, 9]]
kde_bw_estimator_exp = ['sj', 'silverman'] # bandwith estimator for Kernel Density to fit the data

Doing a search over hyperparameter configuration space by employing hyperband with (some, mostly important ones) hyperparameters sampled from kernel density estimator which is fitted with best_N performing hyperparameter configurations for each task.

In [9]:
# For kde prior experiments

for imp_hyperparams_list in imp_hyperparams_list_exp:
    for max_k in max_k_exp:
        for eta in eta_exp:
            for starting_budget in starting_budget_exp:
                for best_N in best_N_exp:
                    for kde_bw_estimator in kde_bw_estimator_exp:
                        for task_id in task_ids:
                            for seed in seed_exp:
                                optimiser = HyperBandOptimiser(eta=eta,
                                                               config_space=config_space,
                                                               optimisation_goal='performance',
                                                               max_k=max_k,
                                                               min_or_max=max,
                                                               task_id=task_id,
                                                               starting_shots_nb=starting_budget,
                                                               search_type=search_type,
                                                               important_hyperparams_indices=imp_hyperparams_list,
                                                               best_N=best_N,
                                                               seed_nb=seed,
                                                               kde_bw_estimator=kde_bw_estimator,
                                                               kde_bw=None,
                                                               pickle_path=None)
                                optimiser.run_optimisation(model_for_task_id=model_per_task[task_id],
                                                           all_data=data,
                                                           store_optimiser=True,
                                                           verbosity=False)

# Retrieve & Save necessary results  

In [10]:

for imp_hyperparams_list in imp_hyperparams_list_exp:
    for max_k in max_k_exp:
        for eta in eta_exp:
            for starting_budget in starting_budget_exp:
                for best_N in best_N_exp:
                    for kde_bw_estimator in kde_bw_estimator_exp:
                        
                        results = {}
                        for task_id in task_ids:
                            results[task_id] = {}
                        
                        for task_id in task_ids:
                            for seed in seed_exp:
                                opt_f_name = f'./optimiser/{search_type}/task_id{task_id}_search_{search_type}_bw_None_bw_est_{kde_bw_estimator}_bestN_{best_N}_eta_{eta}_max_k_{max_k}_shots_{starting_budget}_imp_hyp_{imp_hyperparams_list}_seed_{seed}.pckl'
                                optimiser = pickle.load(open(opt_f_name, 'rb'))
                                results[task_id][seed] = optimiser.eval_history
                        f_name = f'./data/{search_type}_bw_est_{kde_bw_estimator}_bestN_{best_N}_eta_{eta}_max_k_{max_k}_shots_{starting_budget}_imp_hyp_{imp_hyperparams_list}.pckl'
                        os.makedirs(os.path.dirname(f_name), exist_ok=True)
                        pickle.dump(results, open(f_name, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
                        

# Data Preparation for the Plots

In [None]:
data_directory = './data/'
df = pd.DataFrame(columns=['task_id', 'seed', 'max_k', 'eta', 'starting_budget', 'imp_hyperparams', 'best_N', 'bw_estimator', 'mean_result_kde', 'mean_result_uniform', 'difference'])

In [None]:
import warnings
warnings.filterwarnings("ignore")

data_plot = []

for imp_hyperparams_list in imp_hyperparams_list_exp:
    for max_k in max_k_exp:
        for eta in eta_exp:
            for starting_budget in starting_budget_exp:
            
                f_uni_name = data_directory + f'uniform_eta_{eta}_max_k_{max_k}_shots_{starting_budget}.pckl'
                uni_results = pickle.load(open(f_uni_name, 'rb'))
            
                for best_N in best_N_exp:
                    for kde_bw_estimator in kde_bw_estimator_exp:
                        # data = []                                
                        f_kde_name = data_directory + f'kde_bw_est_{kde_bw_estimator}_bestN_{best_N}_eta_{eta}_max_k_{max_k}_shots_{int(starting_budget)}_imp_hyp_{imp_hyperparams_list}.pckl'
                        kde_results = pickle.load(open(f_kde_name, 'rb'))
                        
                        for task_id in task_ids:
                            for seed in seed_exp:

                                # we do this by taking either taking the max, median or mean of eval_history
                                scores_kde = np.mean(kde_results[task_id][seed])
                                scores_uniform = np.mean(uni_results[task_id][seed])
                                current_difference = scores_kde - scores_uniform
                                data_plot.append(current_difference)
                                
                                current_row = {'task_id': task_id, 'seed': seed, 'max_k': max_k, 'eta': eta, 'starting_budget': starting_budget, 'imp_hyperparams': imp_hyperparams_list, 'best_N': best_N, 'bw_estimator': kde_bw_estimator, 'mean_result_kde': scores_kde, 'mean_result_uniform': scores_uniform, 'difference': current_difference}
                                df = df.append(current_row, ignore_index=True)
                        
#                         plt.figure(figsize=(3, 6))
#                         plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
#                         plt.plot([0.5, 1.5], [0, 0], 'k-', linestyle='--', lw=1)
#                         plt.violinplot(data)

#                         plt.xlabel(f'{kde_bw_estimator}_{best_N}N_{eta}eta_{max_k}k_{int(starting_budget)}s_{imp_hyperparams_list}', fontweight='bold')
#                         plt.tight_layout()
#                         plot_name = f'./plots/uniform_v_kde_bw_est_{kde_bw_estimator}_bestN_{best_N}_eta_{eta}_max_k_{max_k}_shots_{int(starting_budget)}_imp_hyp_{imp_hyperparams_list}.pdf'
#                         os.makedirs(os.path.dirname(plot_name), exist_ok=True)
#                         plt.savefig(plot_name, bbox_inches='tight')
#                         plt.clf()
                        

# Best Max and Mean Statistics among all Hyperband runs

In [None]:
for col in df.columns[:-1]:
    df[f'{col}'] = df[f'{col}'].astype('string')
df['difference'] = df['difference'].astype('float')

df_mean = df.groupby(by=['max_k', 'eta', 'starting_budget', 'imp_hyperparams', 
                     'best_N', 'bw_estimator'])['difference'].mean().unstack().reset_index()

# df_max = df2 = df.groupby(by=['max_k', 'eta', 'starting_budget', 'imp_hyperparams',
#                               'best_N', 'bw_estimator'])['difference'].max().unstack()



In [None]:
df_mean.iloc[df_mean['sj'].idxmax()] # best mean run of all hyperband runs.

In [None]:
print(np.mean(data_plot), np.median(data_plot), np.max(data_plot)) # statistics for all runs!

Best mean run stats

In [None]:
# data for best mean run

n_runs = 15
seed_exp = np.arange(n_runs)
max_k_best = 4
budget_best = 1e3
eta_best = 2
imp_hyperparam_best = [6, 1, 4]
bestN_best = 10
bw_best = 'sj'

f_name_uni_best = data_directory + f'uniform_eta_{eta_best}_max_k_{max_k_best}_shots_{budget_best}.pckl'
uni_results_best = pickle.load(open(f_name_uni_best, 'rb'))

f_name_kde_best = data_directory + f'kde_bw_est_{bw_best}_bestN_{bestN_best}_eta_{eta_best}_max_k_{max_k_best}_shots_{int(budget_best)}_imp_hyp_{imp_hyperparam_best}.pckl'
kde_results_best = pickle.load(open(f_kde_name, 'rb'))

data_plot1 = []

for task_id in task_ids:
    for seed in seed_exp:

        scores_kde = np.mean(kde_results_best[task_id][seed])
        scores_uniform = np.mean(uni_results_best[task_id][seed])
        current_difference = scores_kde - scores_uniform
        data_plot1.append(current_difference)

In [None]:
print(np.mean(data_plot1), np.median(data_plot1), np.max(data_plot1)) # statistics for best ,ean run!

# Plot Helping functions

In [None]:
def get_violin_plot(data, ax, title, f1, f2):
    
    def draw_quartiles(self, ax, data, support, density, center, split=False):
        mean_ = np.mean(data)
        self.draw_to_density(
            ax,
            center,
            mean_,
            support,
            density,
            split,
            linewidth=self.linewidth,
        )
        
    sns.categorical._ViolinPlotter.draw_quartiles = draw_quartiles
    ax.axhline(y=0, color="black", linestyle="--", linewidth=0.6)

    sns.violinplot(data=data, bw='silverman', ax=ax, saturation=0.6, scale='width', cut=0, inner='quartile', linewidth=0.8)
    ax.invert_yaxis()
    ax.tick_params(axis='both', which='major', labelsize=10)
    ax.tick_params(axis='both', which='minor', labelsize=8)
    ax.get_xaxis().set_ticks([])
    ax.set_title(f'{title}', fontsize =f1)
    ax.set_ylabel('Improvement', fontsize =f2)
    ax.get_xaxis().set_ticks([])

Figure 7 | Violin Plot

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.transforms as mtransforms

fig, axs = plt.subplot_mosaic([['(a)', '(b)']], constrained_layout=True, figsize=(7, 6), sharey=True)

for label, ax in axs.items():
    # label physical distance to the left and up:
    trans = mtransforms.ScaledTranslation(1.1, -5, fig.dpi_scale_trans)
    ax.text(0.0, 1.0, label, transform=ax.transAxes + trans,
            fontsize=10, va='bottom')

get_violin_plot(data_plot, axs['(a)'], 'All runs of Hyperband', f1=13, f2=12)
get_violin_plot(data_plot1, axs['(b)'], 'Best average run of Hyperband', f1=13, f2=12)
fig.tight_layout()
plt.show()
# plt.savefig("hyperband_results.pdf", dpi=600)

Figure 6 | KDE pdf plot

In [None]:
import ConfigSpace
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
    CategoricalHyperparameter,
    UniformIntegerHyperparameter,
    UniformFloatHyperparameter,
    NumericalHyperparameter,
)

from bw_select import *
from srcfanova.confspace_utils import get_unimp_hyperparam_configspace, get_configspace, integer_encode_dataframe
from prior_utils import *

In [None]:
# for task_id in task_ids:
task_id = 15
imp_hyperparams_priors, kde_bw = get_kde_essentials(task_id=task_id,
                                                    config_space=config_space,
                                                    cs_params=cs_params,
                                                    important_hyperparams_indices=[6,1,4,9],
                                                    all_data=data,
                                                    kde_bw=None,
                                                    kde_bw_estimator='sj',
                                                    best_N=10)

In [None]:
from matplotlib.ticker import MaxNLocator

resolution = 200

fig, axes = plt.subplot_mosaic([['(a)', '(b)']], constrained_layout=True, figsize=(10, 5), sharey=True)
for label, ax in axes.items():
    # label physical distance to the left and up:
    trans = mtransforms.ScaledTranslation(1.9, -4.37, fig.dpi_scale_trans)
    ax.text(0.0, 1.0, label, transform=ax.transAxes + trans,
            fontsize=10, va='bottom')

# Only plotting it for learning rate and depth. Other two are categorical!
for i, h_i in enumerate([6, 1]):
    if i == 0:
        axs = axes['(a)']
    elif i == 1:
        axs = axes['(b)']
    
    imp_hyperparam_obj = config_space[cs_params[h_i]]
    factor = 1.0
    _min = np.power(imp_hyperparam_obj.lower, factor)
    _max = np.power(imp_hyperparam_obj.upper, factor)
    
    if _max < imp_hyperparam_obj.upper:
        _max = imp_hyperparam_obj.upper * factor
    if imp_hyperparam_obj.log:
        X_values_plot = np.logspace(np.log(_min), np.log(_max), resolution)
        axs.set_xscale("log")
    else:
        X_values_plot = np.linspace(_min, _max, resolution)

    if isinstance(imp_hyperparam_obj, UniformIntegerHyperparameter):
        axs.xaxis.set_major_locator(MaxNLocator(integer=True))

    # plot pdfs
    wrapper = KDEWrapper(imp_hyperparam_obj, 
                         imp_hyperparam_obj.name, 
                         imp_hyperparams_priors[imp_hyperparam_obj.name], 
                         'resample', 
                         0.3)
    axs.plot(X_values_plot, wrapper.pdf(X_values_plot), lw=2, alpha=0.6, label=imp_hyperparam_obj.name.replace('_', ' '))
    axs.set_xlim(_min, max(X_values_plot))
    axs.set_ylabel('Probability', fontsize=12)
    # axs.set_xlabel('Values', fontsize=12)
    axs.tick_params(axis='both', which='major', labelsize=10)
    axs.tick_params(axis='both', which='minor', labelsize=8)
    axs.legend(loc='upper left', fontsize=10)


fig.tight_layout()  
# plt.savefig("kde_numeric_hyperparameters.pdf", dpi=600)
plt.show()