[View in Colaboratory](https://colab.research.google.com/github/thundercat95/AutoML/blob/master/SMAC.ipynb)

### Installation and Uploads

In [0]:
!pip install openml
!apt-get install build-essential swig
!pip install smac
!pip install xgboost

In [0]:
from google.colab import files

uploaded = files.upload()

##files.upload returns a dictionary of the files which were uploaded. The 
##dictionary is keyed by the file name, the value is the data which was 
##uploaded.

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(uploaded[fn])))

Saving creditopenml.csv to creditopenml.csv
User uploaded file "creditopenml.csv" with length 150828752 bytes


### Imports and datasets

In [0]:
import numpy as np
import pandas as pd
import time
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Imputer
from sklearn import decomposition
import openml as oml
apikey = '2f6c58f4f6d62ea93dd32764ec88089c'
oml.config.apikey = apikey

# Import ConfigSpace and different types of parameters
from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, UniformFloatHyperparameter, UniformIntegerHyperparameter
from ConfigSpace.conditions import InCondition

# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

In [0]:
# We load the iris-dataset
iris = datasets.load_iris()

# We load Credit Card dataset from openml
creditcard = oml.datasets.get_dataset(1597)

### SVM with SMAC on iris data

In [0]:
# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

In [0]:
# We define a few possible types of SVM-kernels and add them as "kernel" to our cs
kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default_value="poly")
cs.add_hyperparameter(kernel)

kernel, Type: Categorical, Choices: {linear, rbf, poly, sigmoid}, Default: poly

In [0]:
# There are some hyperparameters shared by all kernels
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default_value="true")
cs.add_hyperparameters([C, shrinking])

[C, Type: UniformFloat, Range: [0.001, 1000.0], Default: 1.0,
 shrinking, Type: Categorical, Choices: {true, false}, Default: true]

In [0]:
# Others are kernel-specific, so we can add conditions to limit the searchspace
degree = UniformIntegerHyperparameter("degree", 1, 5, default_value=3)     # Only used by kernel poly
coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default_value=0.0)  # poly, sigmoid
cs.add_hyperparameters([degree, coef0])
use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"])
cs.add_conditions([use_degree, use_coef0])

[degree | kernel in {'poly'}, coef0 | kernel in {'poly', 'sigmoid'}]

In [0]:
# This also works for parameters that are a mix of categorical and values from a range of numbers
# For example, gamma can be either "auto" or a fixed float
gamma = CategoricalHyperparameter("gamma", ["auto", "value"], default_value="auto")  # only rbf, poly, sigmoid
gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default_value=1)
cs.add_hyperparameters([gamma, gamma_value])

[gamma, Type: Categorical, Choices: {auto, value}, Default: auto,
 gamma_value, Type: UniformFloat, Range: [0.0001, 8.0], Default: 1.0]

In [0]:
# We only activate gamma_value if gamma is set to "value"
cs.add_condition(InCondition(child=gamma_value, parent=gamma, values=["value"]))
# And again we can restrict the use of gamma in general to the choice of the kernel
cs.add_condition(InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]))

gamma | kernel in {'rbf', 'poly', 'sigmoid'}

In [0]:
def svm_from_cfg(cfg):
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    cfg["shrinking"] = True if cfg["shrinking"] == "true" else False
    # And for gamma, we set it to a fixed value or to "auto" (if used)
    if "gamma" in cfg:
        cfg["gamma"] = cfg["gamma_value"] if cfg["gamma"] == "value" else "auto"
        cfg.pop("gamma_value", None)  # Remove "gamma_value"

    clf = svm.SVC(**cfg, random_state=42)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5)
    return 1-np.mean(scores)  # Minimize!

In [0]:
# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 200,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

In [0]:
# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=svm_from_cfg)
incumbent = smac.optimize()
inc_value = svm_from_cfg(incumbent)
print("Optimized Value: %.2f" % (inc_value))
print("Best parameter : ", incumbent.get_dictionary())


Optimizing! Depending on your machine, this might take a few minutes.
Optimized Value: 0.01
Best parameter :  {'C': 1.3375368604838664, 'kernel': 'linear', 'shrinking': 'false'}


### SVM with SMAC on Credit Card dataset (OpenML id : 1597)

In [0]:
# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 5,    # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

In [0]:
def svm_from_cfg(cfg):
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    cfg["shrinking"] = True if cfg["shrinking"] == "true" else False
    # And for gamma, we set it to a fixed value or to "auto" (if used)
    if "gamma" in cfg:
        cfg["gamma"] = cfg["gamma_value"] if cfg["gamma"] == "value" else "auto"
        cfg.pop("gamma_value", None)  # Remove "gamma_value"
    
    X, y = creditcard.get_data(target=creditcard.default_target_attribute)
    y[y == "'0'"]=0
    y[y == "'1'"]=1
    
    print(cfg)
    ICA = decomposition.FastICA(tol=0.2)
    X_new = ICA.fit_transform(X)
    clf = svm.SVC(**cfg, random_state=42)
    scores = cross_val_score(clf, X_new, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

In [0]:
# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=svm_from_cfg)
incumbent = smac.optimize()
inc_value = svm_from_cfg(incumbent)
print("Optimized Value: %.2f" % (inc_value))
print("Best parameter : ", incumbent.get_dictionary())

Optimizing! Depending on your machine, this might take a few minutes.




{'C': 1.0, 'kernel': 'poly', 'shrinking': True, 'degree': 3, 'gamma': 'auto'}
0.8751031406834452




{'C': 342.8603177140093, 'kernel': 'linear', 'shrinking': False}
0.9440142339532102




{'C': 805.9573023422622, 'kernel': 'poly', 'shrinking': False, 'coef0': 6.1318511844018, 'degree': 1, 'gamma': 5.491021967983701}
0.9473073025938182




{'C': 178.1953156683978, 'kernel': 'poly', 'shrinking': False, 'coef0': 9.105183204612164, 'degree': 1, 'gamma': 5.258017005850199}
0.9420137649390415




{'C': 333.0243590891621, 'kernel': 'poly', 'shrinking': True, 'coef0': 2.4586429644161365, 'degree': 2, 'gamma': 0.9314552526320006}
0.9436951822198567




{'C': 805.9573023422622, 'kernel': 'poly', 'shrinking': False, 'coef0': 6.1318511844018, 'degree': 1, 'gamma': 5.491021967983701}
0.9473073025938182
Optimized Value: 0.05
Best parameter :  {'C': 805.9573023422622, 'kernel': 'poly', 'shrinking': 'false', 'coef0': 6.1318511844018, 'degree': 1, 'gamma': 'value', 'gamma_value': 5.491021967983701}


### Logistic Regression with SMAC on Credit Card Dataset (OpenML id : 1597)

In [0]:
# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

In [0]:
# Penalty as l1 or l2
penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2")
cs.add_hyperparameter(penalty)

penalty, Type: Categorical, Choices: {l1, l2}, Default: l2

In [0]:
# Common Hyperparameters
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
cs.add_hyperparameter(C)

C, Type: UniformFloat, Range: [0.001, 1000.0], Default: 1.0

In [0]:
# penalty specific dual parameter
dual = CategoricalHyperparameter("dual", ["true", "false"], default_value="false")
cs.add_hyperparameter(dual)

# dual is used only when penalty = "l2"
cs.add_condition(InCondition(child=dual, parent=penalty, values=["l2"]))

dual | penalty in {'l2'}

In [0]:
def logreg_from_cfg(cfg):
    print(cfg)
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    if(cfg["penalty"]=="l2"):
      cfg["dual"] = True if cfg["dual"] == "true" else False
    
    X, y = creditcard.get_data(target=creditcard.default_target_attribute)
    y[y == "'0'"]=0
    y[y == "'1'"]=1
    
    ICA = decomposition.FastICA(tol=0.2)
    X_new = ICA.fit_transform(X)
    logreg = LogisticRegression(**cfg, random_state=42)
    scores = cross_val_score(logreg, X_new, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

In [0]:
# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 10,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

In [0]:
# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=logreg_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = logreg_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  C, Value: 1.0
  dual, Value: 'false'
  penalty, Value: 'l2'





0.9760163267568283
Configuration:
  C, Value: 258.52303800096433
  dual, Value: 'false'
  penalty, Value: 'l2'





0.9793975054880338
Configuration:
  C, Value: 376.15135912829777
  penalty, Value: 'l1'





0.9693237279649232
Configuration:
  C, Value: 681.1143643997688
  penalty, Value: 'l1'





0.9661814925466157
Configuration:
  C, Value: 137.4362714801022
  dual, Value: 'true'
  penalty, Value: 'l2'





0.9793791874203188
Configuration:
  C, Value: 325.421794841775
  penalty, Value: 'l1'





0.9671057528778215
Configuration:
  C, Value: 885.0548269692787
  dual, Value: 'false'
  penalty, Value: 'l2'





0.978405854867576
Configuration:
  C, Value: 30.11112564388111
  dual, Value: 'false'
  penalty, Value: 'l2'





0.9778142598397297
Configuration:
  C, Value: 111.52232056523157
  dual, Value: 'false'
  penalty, Value: 'l2'





0.9792770816574897
Configuration:
  C, Value: 271.03106801915885
  dual, Value: 'false'
  penalty, Value: 'l2'





0.979383089648364
Configuration:
  C, Value: 258.52303800096433
  dual, Value: 'false'
  penalty, Value: 'l2'





0.9793975054880338
Optimized Value: 0.97940
Best parameter :  {'C': 258.52303800096433, 'penalty': 'l2', 'dual': 'false'}
Time required in seconds : 259.95366501808167


### Xgboost with SMAC on Sick Dataset (OpenML id : 38)

In [0]:
# We load Sick dataset from openml
sick = oml.datasets.get_dataset(38)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#max-depth 1 to 10
max_depth = UniformIntegerHyperparameter("max_depth", 1, 10, default_value=3)

#learning rate 0.001 to 1
learning_rate = UniformFloatHyperparameter("learning_rate", 0.001, 1.0, default_value=0.1)

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=100)

#gamma 0 to 1
gamma = UniformFloatHyperparameter("gamma", 0.0, 1.0, default_value=0.0)

#min_child_weight 1 to 100
min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 50, default_value=1)

#max_delta_step  0 to 50
max_delta_step  = UniformIntegerHyperparameter("max_delta_step ", 0, 50, default_value=0)

#subsample 0.1 to 1
subsample = UniformFloatHyperparameter("subsample", 0.0, 1.0, default_value=1.0)

#add all to configuration
cs.add_hyperparameters([max_depth, learning_rate, n_estimators, gamma, min_child_weight, max_delta_step, subsample])

def xgbclassifier_from_cfg(cfg):
    print(cfg)
    
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    
    X, y = sick.get_data(target=sick.default_target_attribute)
    
    xgbc = XGBClassifier(**cfg, random_state=42)
    scores = cross_val_score(xgbc, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 30,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=xgbclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = xgbclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  gamma, Value: 0.0
  learning_rate, Value: 0.1
  max_delta_step , Value: 0
  max_depth, Value: 3
  min_child_weight, Value: 1
  n_estimators, Value: 100
  subsample, Value: 1.0

0.9970142862931576
Configuration:
  gamma, Value: 0.6036795486258081
  learning_rate, Value: 0.10105139771364258
  max_delta_step , Value: 37
  max_depth, Value: 6
  min_child_weight, Value: 1
  n_estimators, Value: 319
  subsample, Value: 0.8400870811802357

0.9978198700329601
Configuration:
  gamma, Value: 0.9494840671337647
  learning_rate, Value: 0.12263054541216019
  max_delta_step , Value: 33
  max_depth, Value: 8
  min_child_weight, Value: 27
  n_estimators, Value: 317
  subsample, Value: 0.2645617534245237

0.9383643401635788
Configuration:
  gamma, Value: 0.7498602859753619
  learning_rate, Value: 0.7255105719227523
  max_delta_step , Value: 15
  max_depth, Value: 7
  min_child_weight, Value: 33
  n_estimators, Value:

### Xgboost with SMAC on Splice datset (OpenML id : 46)

In [0]:
# We load Splice dataset from openml
splice = oml.datasets.get_dataset(46)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#max-depth 1 to 10
max_depth = UniformIntegerHyperparameter("max_depth", 1, 10, default_value=3)

#learning rate 0.001 to 1
learning_rate = UniformFloatHyperparameter("learning_rate", 0.001, 1.0, default_value=0.1)

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=100)

#gamma 0 to 1
gamma = UniformFloatHyperparameter("gamma", 0.0, 1.0, default_value=0.0)

#min_child_weight 1 to 100
min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 50, default_value=1)

#max_delta_step  0 to 50
max_delta_step  = UniformIntegerHyperparameter("max_delta_step ", 0, 50, default_value=0)

#subsample 0.1 to 1
subsample = UniformFloatHyperparameter("subsample", 0.0, 1.0, default_value=1.0)

#add all to configuration
cs.add_hyperparameters([max_depth, learning_rate, n_estimators, gamma, min_child_weight, max_delta_step, subsample])

def xgbclassifier_from_cfg(cfg):
    print(cfg)
    
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    
    X, y = splice.get_data(target=splice.default_target_attribute)
    
    xgbc = XGBClassifier(**cfg, random_state=42)
    scores = cross_val_score(xgbc, X, y, cv=5)
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 20,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=xgbclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = xgbclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  gamma, Value: 0.0
  learning_rate, Value: 0.1
  max_delta_step , Value: 0
  max_depth, Value: 3
  min_child_weight, Value: 1
  n_estimators, Value: 100
  subsample, Value: 1.0



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9589311410930783
Configuration:
  gamma, Value: 0.9502929219898029
  learning_rate, Value: 0.5607222058491671
  max_delta_step , Value: 39
  max_depth, Value: 4
  min_child_weight, Value: 33
  n_estimators, Value: 393
  subsample, Value: 0.20925721868698854



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9304054256527717
Configuration:
  gamma, Value: 0.9494840671337647
  learning_rate, Value: 0.12263054541216019
  max_delta_step , Value: 33
  max_depth, Value: 8
  min_child_weight, Value: 27
  n_estimators, Value: 317
  subsample, Value: 0.2645617534245237



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9448250224146983
Configuration:
  gamma, Value: 0.6049761627894861
  learning_rate, Value: 0.7651511513060467
  max_delta_step , Value: 47
  max_depth, Value: 8
  min_child_weight, Value: 36
  n_estimators, Value: 257
  subsample, Value: 0.7497175863384331



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9510965895271731
Configuration:
  gamma, Value: 0.15867921112231842
  learning_rate, Value: 0.3534931928833193
  max_delta_step , Value: 25
  max_depth, Value: 8
  min_child_weight, Value: 23
  n_estimators, Value: 285
  subsample, Value: 0.15887465732563533



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9420051775356015
Configuration:
  gamma, Value: 0.18952652599586173
  learning_rate, Value: 0.26372777627174043
  max_delta_step , Value: 29
  max_depth, Value: 8
  min_child_weight, Value: 3
  n_estimators, Value: 381
  subsample, Value: 0.5419535486527898



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9554872819772562
Configuration:
  gamma, Value: 0.9816211156765089
  learning_rate, Value: 0.41190658420121035
  max_delta_step , Value: 12
  max_depth, Value: 3
  min_child_weight, Value: 15
  n_estimators, Value: 271
  subsample, Value: 0.15485890977669914



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9410632623079719
Configuration:
  gamma, Value: 0.5526679941468379
  learning_rate, Value: 0.07843877303201902
  max_delta_step , Value: 6
  max_depth, Value: 6
  min_child_weight, Value: 19
  n_estimators, Value: 234
  subsample, Value: 0.6221714544323198



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9589345797629985
Configuration:
  gamma, Value: 0.0546348205656082
  learning_rate, Value: 0.527105331921316
  max_delta_step , Value: 43
  max_depth, Value: 5
  min_child_weight, Value: 50
  n_estimators, Value: 60
  subsample, Value: 0.7575719666202708



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9473308955512522
Configuration:
  gamma, Value: 0.8471045843015826
  learning_rate, Value: 0.17199567915644767
  max_delta_step , Value: 2
  max_depth, Value: 5
  min_child_weight, Value: 20
  n_estimators, Value: 130
  subsample, Value: 0.9091289280263428



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.95925100901858
Configuration:
  gamma, Value: 0.37300343576133377
  learning_rate, Value: 0.9029761665978346
  max_delta_step , Value: 49
  max_depth, Value: 7
  min_child_weight, Value: 11
  n_estimators, Value: 142
  subsample, Value: 0.4347155929436214



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9489036977346142
Configuration:
  gamma, Value: 0.8959891815487098
  learning_rate, Value: 0.7996862634398139
  max_delta_step , Value: 1
  max_depth, Value: 5
  min_child_weight, Value: 11
  n_estimators, Value: 29
  subsample, Value: 0.3161889918893641



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9520424324628861
Configuration:
  gamma, Value: 0.2719612839985093
  learning_rate, Value: 0.943746547304732
  max_delta_step , Value: 33
  max_depth, Value: 1
  min_child_weight, Value: 12
  n_estimators, Value: 258
  subsample, Value: 0.15655850960472006



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9238267625973169
Configuration:
  gamma, Value: 0.511779249784858
  learning_rate, Value: 0.05632229804937057
  max_delta_step , Value: 1
  max_depth, Value: 4
  min_child_weight, Value: 9
  n_estimators, Value: 177
  subsample, Value: 0.45356315021265603



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9592495342027014
Configuration:
  gamma, Value: 0.5236661018211719
  learning_rate, Value: 0.3023594605111704
  max_delta_step , Value: 32
  max_depth, Value: 9
  min_child_weight, Value: 43
  n_estimators, Value: 388
  subsample, Value: 0.233338245382816



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9304078770046992
Configuration:
  gamma, Value: 0.8883906387133728
  learning_rate, Value: 0.10527653524640494
  max_delta_step , Value: 5
  max_depth, Value: 6
  min_child_weight, Value: 21
  n_estimators, Value: 265
  subsample, Value: 0.3311433846088596



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9510960989487319
Configuration:
  gamma, Value: 0.19973623840118793
  learning_rate, Value: 0.9818452561484889
  max_delta_step , Value: 35
  max_depth, Value: 7
  min_child_weight, Value: 27
  n_estimators, Value: 473
  subsample, Value: 0.5749173928731734



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9492117809955871
Configuration:
  gamma, Value: 0.8756771388222394
  learning_rate, Value: 0.10093839333112807
  max_delta_step , Value: 1
  max_depth, Value: 4
  min_child_weight, Value: 4
  n_estimators, Value: 457
  subsample, Value: 0.7603726644787125



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9608179180995389
Configuration:
  gamma, Value: 0.9728252884513209
  learning_rate, Value: 0.034712489778642434
  max_delta_step , Value: 4
  max_depth, Value: 6
  min_child_weight, Value: 15
  n_estimators, Value: 153
  subsample, Value: 0.6742657086055155



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9570507508480173
Configuration:
  gamma, Value: 0.7213503976790632
  learning_rate, Value: 0.06432805366020328
  max_delta_step , Value: 5
  max_depth, Value: 3
  min_child_weight, Value: 2
  n_estimators, Value: 427
  subsample, Value: 0.47671679196010186



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


0.9611323804205231
Configuration:
  gamma, Value: 0.7213503976790632
  learning_rate, Value: 0.06432805366020328
  max_delta_step , Value: 5
  max_depth, Value: 3
  min_child_weight, Value: 2
  n_estimators, Value: 427
  subsample, Value: 0.47671679196010186



  if diff:
  if diff:
  if diff:
  if diff:


0.9611323804205231
Optimized Value: 0.96113
Best parameter :  {'gamma': 0.7213503976790632, 'learning_rate': 0.06432805366020328, 'max_delta_step ': 5, 'max_depth': 3, 'min_child_weight': 2, 'n_estimators': 427, 'subsample': 0.47671679196010186}
Time required in seconds : 300.6217291355133


  if diff:


### Xgboost with SMAC on Adult Dataset (OpenML id : 179)

In [0]:
# We load Adult dataset from openml
adult = oml.datasets.get_dataset(179)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#max-depth 1 to 10
max_depth = UniformIntegerHyperparameter("max_depth", 1, 10, default_value=3)

#learning rate 0.001 to 1
learning_rate = UniformFloatHyperparameter("learning_rate", 0.001, 1.0, default_value=0.1)

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=100)

#gamma 0 to 1
gamma = UniformFloatHyperparameter("gamma", 0.0, 1.0, default_value=0.0)

#min_child_weight 1 to 100
min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 50, default_value=1)

#max_delta_step  0 to 50
max_delta_step  = UniformIntegerHyperparameter("max_delta_step ", 0, 50, default_value=0)

#subsample 0.1 to 1
subsample = UniformFloatHyperparameter("subsample", 0.0, 1.0, default_value=1.0)

#add all to configuration
cs.add_hyperparameters([max_depth, learning_rate, n_estimators, gamma, min_child_weight, max_delta_step, subsample])

def xgbclassifier_from_cfg(cfg):
    print(cfg)
    
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    
    X, y = adult.get_data(target=adult.default_target_attribute)
    
    xgbc = XGBClassifier(**cfg, random_state=42)
    scores = cross_val_score(xgbc, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 20,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=xgbclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = xgbclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  gamma, Value: 0.0
  learning_rate, Value: 0.1
  max_delta_step , Value: 0
  max_depth, Value: 3
  min_child_weight, Value: 1
  n_estimators, Value: 100
  subsample, Value: 1.0

0.911016965531515
Configuration:
  gamma, Value: 0.08149316430306286
  learning_rate, Value: 0.05169121736447841
  max_delta_step , Value: 10
  max_depth, Value: 5
  min_child_weight, Value: 42
  n_estimators, Value: 95
  subsample, Value: 0.20540160693644838

0.9067643995655047
Configuration:
  gamma, Value: 0.9494840671337647
  learning_rate, Value: 0.12263054541216019
  max_delta_step , Value: 33
  max_depth, Value: 8
  min_child_weight, Value: 27
  n_estimators, Value: 317
  subsample, Value: 0.2645617534245237

0.9091058865104399
Configuration:
  gamma, Value: 0.269662435985783
  learning_rate, Value: 0.5450664566415014
  max_delta_step , Value: 50
  max_depth, Value: 9
  min_child_weight, Value: 37
  n_estimators, Value:

### SVM with SMAC on Kropt Dataset (OpenML id : 184)

In [0]:
# We load Kropt dataset from openml
kropt = oml.datasets.get_dataset(184)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

# We define a few possible types of SVM-kernels and add them as "kernel" to our cs
kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default_value="poly")
cs.add_hyperparameter(kernel)

# There are some hyperparameters shared by all kernels
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default_value="true")
cs.add_hyperparameters([C, shrinking])

# Others are kernel-specific, so we can add conditions to limit the searchspace
degree = UniformIntegerHyperparameter("degree", 1, 5, default_value=3)     # Only used by kernel poly
coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default_value=0.0)  # poly, sigmoid
cs.add_hyperparameters([degree, coef0])
use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"])
cs.add_conditions([use_degree, use_coef0])

# This also works for parameters that are a mix of categorical and values from a range of numbers
# For example, gamma can be either "auto" or a fixed float
gamma = CategoricalHyperparameter("gamma", ["auto", "value"], default_value="auto")  # only rbf, poly, sigmoid
gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default_value=1)
cs.add_hyperparameters([gamma, gamma_value])

# We only activate gamma_value if gamma is set to "value"
cs.add_condition(InCondition(child=gamma_value, parent=gamma, values=["value"]))
# And again we can restrict the use of gamma in general to the choice of the kernel
cs.add_condition(InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]))

def svm_from_cfg(cfg):
    print(cfg)
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    cfg["shrinking"] = True if cfg["shrinking"] == "true" else False
    # And for gamma, we set it to a fixed value or to "auto" (if used)
    if "gamma" in cfg:
        cfg["gamma"] = cfg["gamma_value"] if cfg["gamma"] == "value" else "auto"
        cfg.pop("gamma_value", None)  # Remove "gamma_value"
    
    X, y = kropt.get_data(target=kropt.default_target_attribute)
    clf = svm.SVC(**cfg, random_state=42)
    scores = cross_val_score(clf, X, y, cv=5)
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 5,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=svm_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = svm_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

### SVM with SMAC on mnist_784 Dataset (OpenML id : 554)

In [0]:
# We load mnist_784 dataset from openml
mn = oml.datasets.get_dataset(554)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

# We define a few possible types of SVM-kernels and add them as "kernel" to our cs
kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default_value="poly")
cs.add_hyperparameter(kernel)

# There are some hyperparameters shared by all kernels
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default_value="true")
cs.add_hyperparameters([C, shrinking])

# Others are kernel-specific, so we can add conditions to limit the searchspace
degree = UniformIntegerHyperparameter("degree", 1, 5, default_value=3)     # Only used by kernel poly
coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default_value=0.0)  # poly, sigmoid
cs.add_hyperparameters([degree, coef0])
use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"])
cs.add_conditions([use_degree, use_coef0])

# This also works for parameters that are a mix of categorical and values from a range of numbers
# For example, gamma can be either "auto" or a fixed float
gamma = CategoricalHyperparameter("gamma", ["auto", "value"], default_value="auto")  # only rbf, poly, sigmoid
gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default_value=1)
cs.add_hyperparameters([gamma, gamma_value])

# We only activate gamma_value if gamma is set to "value"
cs.add_condition(InCondition(child=gamma_value, parent=gamma, values=["value"]))
# And again we can restrict the use of gamma in general to the choice of the kernel
cs.add_condition(InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]))

def svm_from_cfg(cfg):
    print(cfg)
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    cfg["shrinking"] = True if cfg["shrinking"] == "true" else False
    # And for gamma, we set it to a fixed value or to "auto" (if used)
    if "gamma" in cfg:
        cfg["gamma"] = cfg["gamma_value"] if cfg["gamma"] == "value" else "auto"
        cfg.pop("gamma_value", None)  # Remove "gamma_value"
    
    X, y = mn.get_data(target=mn.default_target_attribute)
    clf = svm.SVC(**cfg, random_state=42)
    scores = cross_val_score(clf, X, y, cv=5)
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 5,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=svm_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = svm_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  C, Value: 1.0
  coef0, Value: 0.0
  degree, Value: 3
  gamma, Value: 'auto'
  kernel, Value: 'poly'
  shrinking, Value: 'true'

0.9775571857194374
Configuration:
  C, Value: 943.4979148863757
  coef0, Value: 3.0983685031796506
  gamma, Value: 'value'
  gamma_value, Value: 6.77581842194688
  kernel, Value: 'sigmoid'
  shrinking, Value: 'true'



### ExtraTreeClassifier with SMAC on quake Dataset (OpenML id : 772)

In [0]:
# We load Quake dataset from openml
quake = oml.datasets.get_dataset(772)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=100)

#criterion
criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini")

#max_features 0.01 to 1
max_features = UniformFloatHyperparameter("max_features", 0.01, 1.0, default_value=1.0)

#min_samples_split
min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 50, default_value=2)

#min_samples_leaf
min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 50, default_value=1)

#bootstrap
bootstrap = CategoricalHyperparameter("bootstrap", ["true", "false"], default_value="false")

#oob_score
oob_score = CategoricalHyperparameter("oob_score", ["true", "false"], default_value="false")
use_oob_score = InCondition(child=oob_score, parent=bootstrap, values=["true"])

#add all to configuration
cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, bootstrap, oob_score])
cs.add_conditions([use_oob_score])

def etclassifier_from_cfg(cfg):
    print(cfg)
    
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    cfg["bootstrap"] = True if cfg["bootstrap"] == "true" else False
    if(cfg["bootstrap"]==True):
      cfg["oob_score"] = True if cfg["oob_score"] == "true" else False
    X, y = quake.get_data(target=quake.default_target_attribute)
    
    etc = ExtraTreesClassifier(**cfg, random_state=42)
    scores = cross_val_score(etc, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 20,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=etclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = etclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 1.0
  min_samples_leaf, Value: 1
  min_samples_split, Value: 2
  n_estimators, Value: 100

0.5027316888164324
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.14551054697624458
  min_samples_leaf, Value: 50
  min_samples_split, Value: 14
  n_estimators, Value: 62
  oob_score, Value: 'false'

0.5416088049591001
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.5987914276996833
  min_samples_leaf, Value: 31
  min_samples_split, Value: 11
  n_estimators, Value: 500
  oob_score, Value: 'true'

0.5427316178998484
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'entropy'
  max_features, Value: 0.24001530720606706
  min_samples_leaf, Value: 27
  min_samples_split, Value: 10
  n_estimators, Value: 282

0.5448324012151448
Conf

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


0.5385063647363904
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.2926391765638486
  min_samples_leaf, Value: 34
  min_samples_split, Value: 4
  n_estimators, Value: 360
  oob_score, Value: 'false'

0.5399220395391195
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'gini'
  max_features, Value: 0.8009647626834414
  min_samples_leaf, Value: 42
  min_samples_split, Value: 5
  n_estimators, Value: 51
  oob_score, Value: 'false'

0.5452620328943374
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'entropy'
  max_features, Value: 0.7950537887171029
  min_samples_leaf, Value: 22
  min_samples_split, Value: 34
  n_estimators, Value: 320

0.5391935719420516
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 0.9487335162256878
  min_samples_leaf, Value: 11
  min_samples_split, Value: 3
  n_estimators, Value: 276

0.5336017202119359
Configuration:
  bootstrap, Value: 'true'
  c

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


0.546374977915616
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'gini'
  max_features, Value: 0.8066860513869993
  min_samples_leaf, Value: 11
  min_samples_split, Value: 48
  n_estimators, Value: 151
  oob_score, Value: 'true'

0.5373964373563462
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 0.31300306473070333
  min_samples_leaf, Value: 24
  min_samples_split, Value: 45
  n_estimators, Value: 164

0.5420085627868884
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 0.471859466950918
  min_samples_leaf, Value: 12
  min_samples_split, Value: 19
  n_estimators, Value: 202

0.5418649582156062
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'entropy'
  max_features, Value: 0.6916179613679688
  min_samples_leaf, Value: 18
  min_samples_split, Value: 40
  n_estimators, Value: 44

0.5392671619337523
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'entropy'
 

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


### ExtraTreeClassifier with SMAC on fri_c1_1000_25 Dataset (OpenML id : 917)

In [0]:
# We load fri_c1_1000_25 dataset from openml
fri = oml.datasets.get_dataset(917)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=100)

#criterion
criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini")

#max_features 0.01 to 1
max_features = UniformFloatHyperparameter("max_features", 0.01, 1.0, default_value=1.0)

#min_samples_split
min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 50, default_value=2)

#min_samples_leaf
min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 50, default_value=1)

#bootstrap
bootstrap = CategoricalHyperparameter("bootstrap", ["true", "false"], default_value="false")

#oob_score
oob_score = CategoricalHyperparameter("oob_score", ["true", "false"], default_value="false")
use_oob_score = InCondition(child=oob_score, parent=bootstrap, values=["true"])

#add all to configuration
cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, bootstrap, oob_score])
cs.add_conditions([use_oob_score])

def etclassifier_from_cfg(cfg):
    print(cfg)
    
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    cfg["bootstrap"] = True if cfg["bootstrap"] == "true" else False
    if(cfg["bootstrap"]==True):
      cfg["oob_score"] = True if cfg["oob_score"] == "true" else False
    X, y = fri.get_data(target=fri.default_target_attribute)
    
    etc = ExtraTreesClassifier(**cfg, random_state=42)
    scores = cross_val_score(etc, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 20,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=etclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = etclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 1.0
  min_samples_leaf, Value: 1
  min_samples_split, Value: 2
  n_estimators, Value: 100

0.9703603939383756
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.2872386436010404
  min_samples_leaf, Value: 6
  min_samples_split, Value: 26
  n_estimators, Value: 108
  oob_score, Value: 'true'

0.9363032685968466
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.5987914276996833
  min_samples_leaf, Value: 31
  min_samples_split, Value: 11
  n_estimators, Value: 500
  oob_score, Value: 'true'

0.9115141942664879
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.09882612937415385
  min_samples_leaf, Value: 15
  min_samples_split, Value: 12
  n_estimators, Value: 332
  oob_score, Value: 'false'

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


0.772140825535321
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.8710441238024745
  min_samples_leaf, Value: 43
  min_samples_split, Value: 19
  n_estimators, Value: 63
  oob_score, Value: 'true'

0.8998907310833916
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 0.31300306473070333
  min_samples_leaf, Value: 24
  min_samples_split, Value: 45
  n_estimators, Value: 164

0.9097077438361841
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 0.8831415233623687
  min_samples_leaf, Value: 42
  min_samples_split, Value: 10
  n_estimators, Value: 67

0.9396179966822169
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'entropy'
  max_features, Value: 0.6916179613679688
  min_samples_leaf, Value: 18
  min_samples_split, Value: 40
  n_estimators, Value: 44

0.9592095570077221
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'entropy'

### Logistic Regression with SMAC on pc4 Dataset (AutoML id : 1049)

In [0]:
# We load pc4 dataset from openml
pc4 = oml.datasets.get_dataset(1049)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

# Penalty as l1 or l2
penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2")
cs.add_hyperparameter(penalty)

# Common Hyperparameters
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
cs.add_hyperparameter(C)

# penalty specific dual parameter
dual = CategoricalHyperparameter("dual", ["true", "false"], default_value="false")
cs.add_hyperparameter(dual)

# dual is used only when penalty = "l2"
cs.add_condition(InCondition(child=dual, parent=penalty, values=["l2"]))

def logreg_from_cfg(cfg):
    print(cfg)
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    if(cfg["penalty"]=="l2"):
      cfg["dual"] = True if cfg["dual"] == "true" else False
    
    X, y = pc4.get_data(target=pc4.default_target_attribute)
    
    logreg = LogisticRegression(**cfg, random_state=42)
    scores = cross_val_score(logreg, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!
  
# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 20,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=logreg_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = logreg_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  C, Value: 1.0
  dual, Value: 'false'
  penalty, Value: 'l2'

0.8947135416666667
Configuration:
  C, Value: 290.551400716599
  dual, Value: 'false'
  penalty, Value: 'l2'

0.8994593253968255
Configuration:
  C, Value: 376.15135912829777
  penalty, Value: 'l1'

0.9167714533730159
Configuration:
  C, Value: 399.97836839058147
  dual, Value: 'true'
  penalty, Value: 'l2'

0.6524088541666666
Configuration:
  C, Value: 137.4362714801022
  dual, Value: 'true'
  penalty, Value: 'l2'

0.6432998511904762
Configuration:
  C, Value: 816.0141182123881
  penalty, Value: 'l1'

0.9164397321428572
Configuration:
  C, Value: 885.0548269692787
  dual, Value: 'false'
  penalty, Value: 'l2'

0.8879154265873016
Configuration:
  C, Value: 545.2255230051459
  penalty, Value: 'l1'

0.916483134920635
Configuration:
  C, Value: 111.52232056523157
  dual, Value: 'false'
  penalty, Value: 'l2'

0.8885515873015872
Configuration:


### RandomForestClassifier with SMAC on KDDCup09_appetency (OpenML id : 1111)


In [0]:
# We load KDDCup09_appetency dataset from openml
kdd = oml.datasets.get_dataset(1111)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=10)

#criterion
criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini")

#max_features 0.01 to 1
max_features = UniformFloatHyperparameter("max_features", 0.01, 1.0, default_value=1.0)

#min_samples_split
min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 50, default_value=2)

#min_samples_leaf
min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 50, default_value=1)

#bootstrap
bootstrap = CategoricalHyperparameter("bootstrap", ["true", "false"], default_value="false")

#oob_score
oob_score = CategoricalHyperparameter("oob_score", ["true", "false"], default_value="false")
use_oob_score = InCondition(child=oob_score, parent=bootstrap, values=["true"])

#add all to configuration
cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, bootstrap, oob_score])
cs.add_conditions([use_oob_score])

def rfclassifier_from_cfg(cfg):
    print(cfg)
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    cfg["bootstrap"] = True if cfg["bootstrap"] == "true" else False
    if(cfg["bootstrap"]==True):
      cfg["oob_score"] = True if cfg["oob_score"] == "true" else False
    imp = Imputer(missing_values="NaN", strategy="mean", axis=0, verbose=0, copy=True)
    X, y = kdd.get_data(target=kdd.default_target_attribute)
    X_new = imp.fit_transform(X,y)
    etc = RandomForestClassifier(**cfg, random_state=42)
    scores = cross_val_score(etc, X_new, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 5,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=rfclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = rfclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'gini'
  max_features, Value: 1.0
  min_samples_leaf, Value: 1
  min_samples_split, Value: 2
  n_estimators, Value: 10

0.538194411079004
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'gini'
  max_features, Value: 0.289933630577206
  min_samples_leaf, Value: 4
  min_samples_split, Value: 16
  n_estimators, Value: 293
  oob_score, Value: 'true'

0.767211190654321
Configuration:
  bootstrap, Value: 'true'
  criterion, Value: 'entropy'
  max_features, Value: 0.5987914276996833
  min_samples_leaf, Value: 31
  min_samples_split, Value: 11
  n_estimators, Value: 500
  oob_score, Value: 'true'

0.8246945060275145
Configuration:
  bootstrap, Value: 'false'
  criterion, Value: 'entropy'
  max_features, Value: 0.7638945454477457
  min_samples_leaf, Value: 33
  min_samples_split, Value: 39
  n_estimators, Value: 365

0.798093651262129
Configuration:


### Xgboost with SMAC on MagicTelescope Dataset (OpenML id : 1120)

In [0]:
# We load MagicTelescope dataset from openml
tele = oml.datasets.get_dataset(1120)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#max-depth 1 to 10
max_depth = UniformIntegerHyperparameter("max_depth", 1, 10, default_value=3)

#learning rate 0.001 to 1
learning_rate = UniformFloatHyperparameter("learning_rate", 0.001, 1.0, default_value=0.1)

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=100)

#gamma 0 to 1
gamma = UniformFloatHyperparameter("gamma", 0.0, 1.0, default_value=0.0)

#min_child_weight 1 to 100
min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 50, default_value=1)

#max_delta_step  0 to 50
max_delta_step  = UniformIntegerHyperparameter("max_delta_step ", 0, 50, default_value=0)

#subsample 0.1 to 1
subsample = UniformFloatHyperparameter("subsample", 0.0, 1.0, default_value=1.0)

#add all to configuration
cs.add_hyperparameters([max_depth, learning_rate, n_estimators, gamma, min_child_weight, max_delta_step, subsample])

def xgbclassifier_from_cfg(cfg):
    print(cfg)
    
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    
    X, y = tele.get_data(target=tele.default_target_attribute)
    
    xgbc = XGBClassifier(**cfg, random_state=42)
    scores = cross_val_score(xgbc, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 10,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=xgbclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = xgbclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  gamma, Value: 0.0
  learning_rate, Value: 0.1
  max_delta_step , Value: 0
  max_depth, Value: 3
  min_child_weight, Value: 1
  n_estimators, Value: 100
  subsample, Value: 1.0

0.9232925084792702
Configuration:
  gamma, Value: 0.86402743809919
  learning_rate, Value: 0.07948902233771658
  max_delta_step , Value: 38
  max_depth, Value: 5
  min_child_weight, Value: 49
  n_estimators, Value: 112
  subsample, Value: 0.3496101714366677

0.9206544401273818
Configuration:
  gamma, Value: 0.9494840671337647
  learning_rate, Value: 0.12263054541216019
  max_delta_step , Value: 33
  max_depth, Value: 8
  min_child_weight, Value: 27
  n_estimators, Value: 317
  subsample, Value: 0.2645617534245237

0.9282962150717481
Configuration:
  gamma, Value: 0.7482549767310993
  learning_rate, Value: 0.553562021217303
  max_delta_step , Value: 35
  max_depth, Value: 7
  min_child_weight, Value: 45
  n_estimators, Value: 6

### Logistic Regression with SMAC on OVA_Breast Dataset (OpenML id : 1128)

In [7]:
# We load OVA_Breast dataset from openml
ova = oml.datasets.get_dataset(1128)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

# Penalty as l1 or l2
penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2")
cs.add_hyperparameter(penalty)

# Common Hyperparameters
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
cs.add_hyperparameter(C)

# penalty specific dual parameter
dual = CategoricalHyperparameter("dual", ["true", "false"], default_value="false")
cs.add_hyperparameter(dual)

# dual is used only when penalty = "l2"
cs.add_condition(InCondition(child=dual, parent=penalty, values=["l2"]))

def logreg_from_cfg(cfg):
    print(cfg)
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    if(cfg["penalty"]=="l2"):
      cfg["dual"] = True if cfg["dual"] == "true" else False
    
    X, y = ova.get_data(target=ova.default_target_attribute)
    
    logreg = LogisticRegression(**cfg, random_state=42)
    scores = cross_val_score(logreg, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!
  
# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 20,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=logreg_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = logreg_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

Optimizing! Depending on your machine, this might take a few minutes.
Configuration:
  C, Value: 1.0
  dual, Value: 'false'
  penalty, Value: 'l2'

0.9706116175497803
Configuration:
  C, Value: 533.5897920881849
  penalty, Value: 'l1'

0.9647187371400541
Configuration:
  C, Value: 376.15135912829777
  penalty, Value: 'l1'

0.9646946827771942
Configuration:
  C, Value: 417.82477346009733
  penalty, Value: 'l1'

0.9647065323513493
Configuration:
  C, Value: 137.4362714801022
  dual, Value: 'true'
  penalty, Value: 'l2'

0.6913092559773323
Configuration:
  C, Value: 778.7203896838414
  penalty, Value: 'l1'

0.9647186870267982
Configuration:
  C, Value: 885.0548269692787
  dual, Value: 'false'
  penalty, Value: 'l2'

0.970353198964955
Configuration:
  C, Value: 293.74955604326345
  dual, Value: 'true'
  penalty, Value: 'l2'

0.6913090783700575
Configuration:
  C, Value: 111.52232056523157
  dual, Value: 'false'
  penalty, Value: 'l2'

0.9704630103740335
Configuration:
  C, Value: 857.34016

### RandomForestClassifier with SMAC on covertype Dataset (OpenML id : 293)

In [0]:
# We load ct dataset from openml
ct = oml.datasets.get_dataset(293)

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

#n_estimators 10 to 500
n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 500, default_value=10)

#criterion
criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini")

#max_features 0.01 to 1
max_features = UniformFloatHyperparameter("max_features", 0.01, 1.0, default_value=1.0)

#min_samples_split
min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 50, default_value=2)

#min_samples_leaf
min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 50, default_value=1)

#bootstrap
bootstrap = CategoricalHyperparameter("bootstrap", ["true", "false"], default_value="false")

#oob_score
oob_score = CategoricalHyperparameter("oob_score", ["true", "false"], default_value="false")
use_oob_score = InCondition(child=oob_score, parent=bootstrap, values=["true"])

#add all to configuration
cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, bootstrap, oob_score])
cs.add_conditions([use_oob_score])

def rfclassifier_from_cfg(cfg):
    print(cfg)
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    cfg["bootstrap"] = True if cfg["bootstrap"] == "true" else False
    if(cfg["bootstrap"]==True):
      cfg["oob_score"] = True if cfg["oob_score"] == "true" else False
    X, y = ct.get_data(target=ct.default_target_attribute)
    etc = RandomForestClassifier(**cfg, random_state=42)
    scores = cross_val_score(etc, X, y, cv=5, scoring="roc_auc")
    print(np.mean(scores))
    return 1-np.mean(scores)  # Minimize!

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 5,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=rfclassifier_from_cfg)
start = time.time()
incumbent = smac.optimize()
inc_value = rfclassifier_from_cfg(incumbent)
end = time.time()
print("Optimized Value: %.5f" % (1-inc_value))
print("Best parameter : ", incumbent.get_dictionary())
print("Time required in seconds :", (end - start))

### END

