[View in Colaboratory](https://colab.research.google.com/github/thundercat95/AutoML/blob/master/SMAC.ipynb)

# SMAC on Python3

### Installation and Uploads

In [0]:
!pip install openml
!apt-get install build-essential swig
!pip install smac

In [31]:
from google.colab import files

uploaded = files.upload()

##files.upload returns a dictionary of the files which were uploaded. The 
##dictionary is keyed by the file name, the value is the data which was 
##uploaded.

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(uploaded[fn])))

Saving creditopenml.csv to creditopenml.csv
User uploaded file "creditopenml.csv" with length 150828752 bytes


### SVM with SMAC on iris data

In [0]:
import numpy as np
import pandas as pd
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
import openml as oml
apikey = '2f6c58f4f6d62ea93dd32764ec88089c'
oml.config.apikey = apikey

# Import ConfigSpace and different types of parameters
from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, UniformFloatHyperparameter, UniformIntegerHyperparameter
from ConfigSpace.conditions import InCondition

# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

In [0]:
# We load the iris-dataset
iris = datasets.load_iris()

# We load Credit Card dataset from openml
creditcard = oml.datasets.get_dataset(1597)

#Credit card data of openml on local machine
creditopenml = pd.read_csv("creditopenml.csv")

In [0]:
# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

In [11]:
# We define a few possible types of SVM-kernels and add them as "kernel" to our cs
kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default_value="poly")
cs.add_hyperparameter(kernel)

kernel, Type: Categorical, Choices: {linear, rbf, poly, sigmoid}, Default: poly

In [12]:
# There are some hyperparameters shared by all kernels
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default_value="true")
cs.add_hyperparameters([C, shrinking])

[C, Type: UniformFloat, Range: [0.001, 1000.0], Default: 1.0,
 shrinking, Type: Categorical, Choices: {true, false}, Default: true]

In [13]:
# Others are kernel-specific, so we can add conditions to limit the searchspace
degree = UniformIntegerHyperparameter("degree", 1, 5, default_value=3)     # Only used by kernel poly
coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default_value=0.0)  # poly, sigmoid
cs.add_hyperparameters([degree, coef0])
use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"])
cs.add_conditions([use_degree, use_coef0])

[degree | kernel in {'poly'}, coef0 | kernel in {'poly', 'sigmoid'}]

In [14]:
# This also works for parameters that are a mix of categorical and values from a range of numbers
# For example, gamma can be either "auto" or a fixed float
gamma = CategoricalHyperparameter("gamma", ["auto", "value"], default_value="auto")  # only rbf, poly, sigmoid
gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default_value=1)
cs.add_hyperparameters([gamma, gamma_value])

[gamma, Type: Categorical, Choices: {auto, value}, Default: auto,
 gamma_value, Type: UniformFloat, Range: [0.0001, 8.0], Default: 1.0]

In [15]:
# We only activate gamma_value if gamma is set to "value"
cs.add_condition(InCondition(child=gamma_value, parent=gamma, values=["value"]))
# And again we can restrict the use of gamma in general to the choice of the kernel
cs.add_condition(InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]))

gamma | kernel in {'rbf', 'poly', 'sigmoid'}

In [0]:
def svm_from_cfg(cfg):
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    cfg["shrinking"] = True if cfg["shrinking"] == "true" else False
    # And for gamma, we set it to a fixed value or to "auto" (if used)
    if "gamma" in cfg:
        cfg["gamma"] = cfg["gamma_value"] if cfg["gamma"] == "value" else "auto"
        cfg.pop("gamma_value", None)  # Remove "gamma_value"

    clf = svm.SVC(**cfg, random_state=42)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5, scoring="roc_auc")
    return 1-np.mean(scores)  # Minimize!

In [0]:
# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 200,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

In [0]:
# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
        tae_runner=svm_from_cfg)
incumbent = smac.optimize()
inc_value = svm_from_cfg(incumbent)
print("Optimized Value: %.2f" % (inc_value))
print("Best parameter : ", incumbent.get_dictionary())


### SVM with SMAC on Credit Card dataset

In [0]:
def svm_from_cfg(cfg):
    """ Creates a SVM based on a configuration and evaluates it on the
    iris-dataset using cross-validation.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    Returns:
    --------
    A crossvalidated mean score for the svm on the loaded data-set.
    """
    # For deactivated parameters, the configuration stores None-values.
    # This is not accepted by the SVM, so we remove them.
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    # We translate boolean values:
    cfg["shrinking"] = True if cfg["shrinking"] == "true" else False
    # And for gamma, we set it to a fixed value or to "auto" (if used)
    if "gamma" in cfg:
        cfg["gamma"] = cfg["gamma_value"] if cfg["gamma"] == "value" else "auto"
        cfg.pop("gamma_value", None)  # Remove "gamma_value"

    X, y = creditcard.get_data(target=creditcard.default_target_attribute)

    clf = svm.SVC(**cfg, random_state=42)
    scores = cross_val_score(clf, X, y, cv=5, scoring="roc_auc")
    return 1-auc_score  # Minimize!

In [29]:
creditopenml['normAmout'] = StandardScaler().fit_transform(creditopenml['Amount'].reshape(-1, 1))
creditopenml = creditopenml.drop(['Time','Amount'],axis=1)

X = creditopenml.ix[:, creditopenml.columns != 'Class']
y = creditopenml.ix[:, creditopenml.columns == 'Class']

clf = svm.SVC()
scores = cross_val_score(clf, X, y, cv=5, scoring="roc_auc")
print(np.mean(scores))

TypeError: ignored