In [1]:
%run ../talktools.py

## AutoML & Hyperparameter optimization

<img src="https://docs.microsoft.com/en-us/azure/machine-learning/media/concept-automated-ml/automl-concept-diagram2.png">
    
```bash
brew install swig # mac
pip install -U auto-sklearn
```
or

```bash
conda install auto-sklearn
```

auto-sklearn is an automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator
    
https://automl.github.io/auto-sklearn/master/index.html#example

This next cell will run a time = `time_left_for_this_task` seconds:

In [1]:
import sklearn.datasets
from sklearn.model_selection import train_test_split

import autosklearn.classification

X, y = sklearn.datasets.load_digits(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.1, random_state=42)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=180,
    per_run_time_limit=45,
    memory_limit=None,
    n_jobs=-1,
    max_models_on_disc=None,)
automl.fit(X_train, y_train, dataset_name='digits')

AutoSklearnClassifier(max_models_on_disc=None, memory_limit=None, n_jobs=-1,
                      per_run_time_limit=45, time_left_for_this_task=180)

In [None]:
print(automl.leaderboard())

In [None]:
automl.show_models()

In [None]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

In [None]:
# https://towardsdatascience.com/exploring-auto-sklearn-models-with-pipelineprofiler-5b2c54136044
# !pip install pipelineprofiler

In [None]:
import PipelineProfiler
profiler_data = PipelineProfiler.import_autosklearn(automl)
PipelineProfiler.plot_pipeline_matrix(profiler_data)