In [1]:
%matplotlib inline

Matplotlib is building the font cache; this may take a moment.



# Classification

The following example shows how to fit a simple classification model with
*auto-sklearn*.


In [2]:
from pprint import pprint

import sklearn.datasets
import sklearn.metrics

import autosklearn.classification



## Data Loading



In [3]:
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

## Build and fit a classifier



In [4]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder="/tmp/autosklearn_classification_example_tmp",
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      per_run_time_limit=30, time_left_for_this_task=120,
                      tmp_folder='/tmp/autosklearn_classification_example_tmp')

## View the models found by auto-sklearn



In [5]:
print(automl.leaderboard())

          rank  ensemble_weight               type      cost  duration
model_id                                                              
7            1             0.28        extra_trees  0.014184  2.273840
27           2             0.14        extra_trees  0.014184  2.835571
16           3             0.04  gradient_boosting  0.021277  1.517895
21           4             0.02        extra_trees  0.021277  1.927852
2            5             0.04      random_forest  0.028369  2.240965
3            6             0.22                mlp  0.028369  1.679106
11           7             0.02      random_forest  0.028369  2.730853
14           8             0.02                mlp  0.028369  2.975147
26           9             0.02        extra_trees  0.028369  3.231604
5           10             0.04      random_forest  0.035461  2.658178
12          11             0.02  gradient_boosting  0.035461  1.817088
17          12             0.02  gradient_boosting  0.035461  2.202067
9     

## Print the final ensemble constructed by auto-sklearn



In [6]:
pprint(automl.show_models(), indent=4)

{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7c4dcde640>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7c4de7b4f0>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7c4dcdef70>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7c4dea3ca0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_p

## Get the Score of the final ensemble



In [7]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.951048951048951
