# Auto-sklearn demos

In [None]:
!pip3 install auto-sklearn cloudpickle==1.5.0 imgaug==0.2.5 scipy==1.7.0

Collecting auto-sklearn
  Downloading auto-sklearn-0.14.2.tar.gz (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 4.4 MB/s 
[?25hCollecting cloudpickle==1.5.0
  Downloading cloudpickle-1.5.0-py3-none-any.whl (22 kB)
Collecting imgaug==0.2.5
  Downloading imgaug-0.2.5.tar.gz (562 kB)
[K     |████████████████████████████████| 562 kB 47.5 MB/s 
[?25hCollecting scipy==1.7.0
  Downloading scipy-1.7.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (28.5 MB)
[K     |████████████████████████████████| 28.5 MB 1.6 MB/s 
Collecting scikit-learn<0.25.0,>=0.24.0
  Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 81.4 MB/s 
Collecting distributed<2021.07,>=2.2.0
  Downloading distributed-2021.6.2-py3-none-any.whl (722 kB)
[K     |████████████████████████████████| 722 kB 68.2 MB/s 
Collecting liac-arff
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
Collecting ConfigSpace<0.5,>=0.4.14
  Downloading Conf

Now **restart** the Colab runtime after the installation is done.

## Classification

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

x = load_breast_cancer().data
y = load_breast_cancer().target
labels = load_breast_cancer().target_names

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [None]:
from autosklearn.classification import AutoSklearnClassifier

clf = AutoSklearnClassifier(
    time_left_for_this_task=30
)
clf.fit(x_train, y_train, dataset_name='breast_cancer')

AutoSklearnClassifier(per_run_time_limit=3, time_left_for_this_task=30)

In [None]:
clf.leaderboard()

Unnamed: 0_level_0,rank,ensemble_weight,type,cost,duration
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,1,0.24,mlp,0.014184,1.291691
6,2,0.22,mlp,0.035461,1.414363
8,3,0.1,extra_trees,0.049645,2.233963
4,4,0.06,mlp,0.056738,1.988179
5,5,0.1,random_forest,0.056738,2.522853
9,6,0.08,extra_trees,0.06383,1.893754
7,7,0.08,random_forest,0.070922,2.33216
2,8,0.12,random_forest,0.085106,2.087917


In [None]:
print('Training accuracy:', clf.score(x_train, y_train).round(3))
print('Test accuracy:', clf.score(x_test, y_test).round(3))

Training accuracy: 0.991
Test accuracy: 0.979


In [None]:
predicted = clf.predict(x_test)

In [None]:
from autosklearn.metrics import roc_auc, average_precision

print('ROC AUC score:', roc_auc(y_test, predicted).round(3))
print('PR AUC score:', average_precision(y_test, predicted).round(3))

ROC AUC score: 0.976
PR AUC score: 0.974


In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predicted, target_names=labels))

              precision    recall  f1-score   support

   malignant       0.98      0.96      0.97        54
      benign       0.98      0.99      0.98        89

    accuracy                           0.98       143
   macro avg       0.98      0.98      0.98       143
weighted avg       0.98      0.98      0.98       143



## Regression

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

x = fetch_california_housing().data
y = fetch_california_housing().target

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [None]:
from autosklearn.regression import AutoSklearnRegressor

reg = AutoSklearnRegressor(
    time_left_for_this_task=180,
    per_run_time_limit=30
)
reg.fit(x_train, y_train, dataset_name='california_housing')

AutoSklearnRegressor(per_run_time_limit=30, time_left_for_this_task=180)

In [None]:
reg.leaderboard()

Unnamed: 0_level_0,rank,ensemble_weight,type,cost,duration
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,1,0.48,gradient_boosting,0.158474,6.560408
5,2,0.24,gradient_boosting,0.161214,15.644512
6,3,0.26,gradient_boosting,0.165872,15.211561
11,4,0.02,k_nearest_neighbors,0.385453,1.496486


In [None]:
print('Training loss:', reg.score(x_train, y_train).round(3))
print('Test loss:', reg.score(x_test, y_test).round(3))

Training loss: 0.94
Test loss: 0.844


In [None]:
predicted = reg.predict(x_test)

In [None]:
from autosklearn.metrics import r2

print('R2:', r2(y_test, predicted).round(3))

R2: 0.844
