# Find best hyperparameter settings
The 'best' hyperparamter settings of a dataset can be found by observing performance data of other datasets.

In [1]:
from hyperimp import settings
from scipy.io import arff
import pandas as pd

In [2]:
# load data
data = arff.loadarff('data/meta_svm.arff')
svm = pd.DataFrame(data[0])
data = arff.loadarff('data/meta_random_forest.arff')
rf = pd.DataFrame(data[0])
data = arff.loadarff('data/meta_adaboost.arff')
ada = pd.DataFrame(data[0])

# group svm data by kernel
g = svm.groupby('kernel')

# create dictionary with dataframes per algorithm
algs = ['svm_' + s.decode('UTF-8') for s in list(g.groups.keys())] + ['rf', 'ada']
dfs_all = [g.get_group(x) for x in g.groups] + [rf, ada]
dfs = {}
for alg, df in zip(algs, dfs_all):
    dfs[alg] = df

# initialize Alg objects
algs = settings.init_algs('data/parameters.csv', dfs)

In [4]:
# example
task_id = 3 # task to consider
alg = algs[2] # algorithm to consider
n = 10 # top n
m = 40 # minimum number of observations
print(settings.find_settings(task_id, alg, n, m))

Dataset 18 not included because no. obervations is 27.
Dataset 3913 not included because no. obervations is 30.
Dataset 3917 not included because no. obervations is 36.
Dataset 14965 not included because no. obervations is 9.
Dataset 14968 not included because no. obervations is 32.
{'gamma': 0.0016378466109978518, 'C': 9.187842332321658, 'tol': 3.5138073842994044e-05, 'coef0': 0.7262941230770367}
