# Auto-sklearn demos

Remember to **restart** the Colab runtime after installation is done (ignore the errors).

In [2]:
!pip3 install auto-sklearn

Collecting auto-sklearn
  Using cached auto-sklearn-0.14.2.tar.gz (6.3 MB)
Collecting scipy>=1.7.0
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.2 MB/s 
Collecting scikit-learn<0.25.0,>=0.24.0
  Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 1.5 MB/s 
Collecting distributed<2021.07,>=2.2.0
  Downloading distributed-2021.6.2-py3-none-any.whl (722 kB)
[K     |████████████████████████████████| 722 kB 44.0 MB/s 
Collecting liac-arff
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
Collecting ConfigSpace<0.5,>=0.4.14
  Downloading ConfigSpace-0.4.20-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 52.6 MB/s 
[?25hCollecting pynisher>=0.6.3
  Downloading pynisher-0.6.4.tar.gz (11 kB)
Collecting pyrfr<0.9,>=0.8.1
  Downloading pyrfr-0.8.2-cp37-cp37

## Classification

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

x = load_iris().data
y = load_iris().target
labels = load_iris().target_names

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [2]:
from autosklearn.classification import AutoSklearnClassifier

clf = AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=10
)
clf.fit(x_train, y_train, dataset_name='iris')

AutoSklearnClassifier(per_run_time_limit=10, time_left_for_this_task=60)

In [3]:
clf.leaderboard()

Unnamed: 0_level_0,rank,ensemble_weight,type,cost,duration
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,1,0.28,mlp,0.025,1.609006
12,2,0.32,decision_tree,0.025,1.035039
6,3,0.06,libsvm_svc,0.075,1.9297
4,4,0.08,passive_aggressive,0.1,4.286171
9,5,0.02,random_forest,0.1,2.171753
3,6,0.04,random_forest,0.125,8.43378
5,7,0.12,random_forest,0.125,4.831507
14,8,0.06,random_forest,0.125,1.790408
8,9,0.02,gradient_boosting,0.15,1.575993


In [4]:
print('Training accuracy:', clf.score(x_train, y_train).round(3))
print('Test accuracy:', clf.score(x_test, y_test).round(3))

Training accuracy: 0.992
Test accuracy: 1.0


In [5]:
predicted = clf.predict(x_test)

In [6]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predicted, target_names=labels))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



## Regression

In [7]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

x = load_boston().data
y = load_boston().target

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [8]:
from autosklearn.regression import AutoSklearnRegressor

reg = AutoSklearnRegressor(
    time_left_for_this_task=300,
    per_run_time_limit=30
)
reg.fit(x_train, y_train, dataset_name='boston')



AutoSklearnRegressor(per_run_time_limit=30, time_left_for_this_task=300)

In [9]:
reg.leaderboard()

Unnamed: 0_level_0,rank,ensemble_weight,type,cost,duration
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
12,1,0.02,gaussian_process,0.07656,18.587469
3,2,0.18,gaussian_process,0.07731,15.554443
38,3,0.26,adaboost,0.079273,1.997614
9,4,0.26,gaussian_process,0.083105,16.257156
11,5,0.12,gradient_boosting,0.093048,1.456221
13,6,0.12,random_forest,0.096343,22.957669
22,7,0.04,libsvm_svr,0.103631,0.816414


In [10]:
print('Training loss:', reg.score(x_train, y_train).round(3))
print('Test loss:', reg.score(x_test, y_test).round(3))

Training loss: 0.975
Test loss: 0.868


In [11]:
predicted = reg.predict(x_test)

In [12]:
from autosklearn.metrics import r2

print('R2:', r2(y_test, predicted).round(3))

R2: 0.868
