# AutoML - toy example

In [1]:
# docker run -it -p 8888:8888 mfeurer/auto-sklearn:master /bin/bash -c "mkdir -p /opt/nb && jupyter notebook --notebook-dir=/opt/nb --ip='0.0.0.0' --port=8888 --no-browser --allow-root"

In [2]:
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

from pprint import pprint

In [3]:
# laod data
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

In [4]:
# set up automl model
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder="/tmp/autosklearn_classification_example_tmp",
    n_jobs=2
)

In [5]:
# train models
automl.fit(X_train, y_train, dataset_name="breast_cancer")

AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      n_jobs=2, per_run_time_limit=30,
                      time_left_for_this_task=120,
                      tmp_folder='/tmp/autosklearn_classification_example_tmp')

In [6]:
# leaderboard
print(automl.leaderboard())

          rank  ensemble_weight                type      cost  duration
model_id                                                               
7            3             0.02         extra_trees  0.014184  1.192859
56           2             0.10         extra_trees  0.014184  1.254303
57           1             0.02   gradient_boosting  0.014184  1.179003
16           8             0.02   gradient_boosting  0.021277  0.822546
39           6             0.02         extra_trees  0.021277  1.181524
43           5             0.06                 mlp  0.021277  0.720480
44           4             0.08         extra_trees  0.021277  1.192886
63           7             0.02         extra_trees  0.021277  1.444731
2            9             0.04       random_forest  0.028369  1.340270
3           17             0.04                 mlp  0.028369  0.892900
6           16             0.04                 mlp  0.028369  0.994666
10          15             0.02       random_forest  0.028369  1

In [7]:
# the final ensemble
pprint(automl.show_models(), indent=4)

{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7bead53a3c70>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7bead51a90a0>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7bead53a3070>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7bead53a31c0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.component

In [8]:
# final score
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.951048951048951
