## FLAML (A Fast Library for Automated Machine Learning & Tuning) demos

In [1]:
!pip3 install --upgrade flaml flaml[automl]

### Classification

In [2]:
import pandas as pd

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
                     header=None, names=('sepal length', 'sepal width', 'petal length', 'petal width', 'species'))
df

Unnamed: 0,sepal length,sepal width,petal length,petal width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df.species = le.fit_transform(df.species)
labels = le.classes_

labels

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [4]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2, random_state=42)

x_train = train.drop(['species'], axis=1)
y_train = train.species
x_test = test.drop(['species'], axis=1)
y_test = test.species

In [5]:
from flaml import AutoML

clf = AutoML()
clf.fit(x_train, y_train, task='classification', time_budget=30)

[flaml.automl.logger: 04-12 07:57:43] {1680} INFO - task = classification
[flaml.automl.logger: 04-12 07:57:43] {1691} INFO - Evaluation method: cv
[flaml.automl.logger: 04-12 07:57:43] {1789} INFO - Minimizing error metric: log_loss
[flaml.automl.logger: 04-12 07:57:43] {1901} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl.logger: 04-12 07:57:43] {2219} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 04-12 07:57:43] {2345} INFO - Estimated sufficient time budget=1522s. Estimated necessary time budget=35s.
[flaml.automl.logger: 04-12 07:57:43] {2392} INFO -  at 0.2s,	estimator lgbm's best error=0.6695,	best estimator lgbm's best error=0.6695
[flaml.automl.logger: 04-12 07:57:43] {2219} INFO - iteration 1, current learner lgbm


  from .autonotebook import tqdm as notebook_tqdm


[flaml.automl.logger: 04-12 07:57:43] {2392} INFO -  at 0.2s,	estimator lgbm's best error=0.6695,	best estimator lgbm's best error=0.6695
[flaml.automl.logger: 04-12 07:57:43] {2219} INFO - iteration 2, current learner lgbm
[flaml.automl.logger: 04-12 07:57:43] {2392} INFO -  at 0.2s,	estimator lgbm's best error=0.3505,	best estimator lgbm's best error=0.3505
[flaml.automl.logger: 04-12 07:57:43] {2219} INFO - iteration 3, current learner lgbm
[flaml.automl.logger: 04-12 07:57:43] {2392} INFO -  at 0.3s,	estimator lgbm's best error=0.2013,	best estimator lgbm's best error=0.2013
[flaml.automl.logger: 04-12 07:57:43] {2219} INFO - iteration 4, current learner lgbm
[flaml.automl.logger: 04-12 07:57:43] {2392} INFO -  at 0.3s,	estimator lgbm's best error=0.2013,	best estimator lgbm's best error=0.2013
[flaml.automl.logger: 04-12 07:57:43] {2219} INFO - iteration 5, current learner lgbm
[flaml.automl.logger: 04-12 07:57:43] {2392} INFO -  at 0.4s,	estimator lgbm's best error=0.2013,	best e

In [6]:
print('Best ML leaner:', clf.best_estimator)
print('Best hyperparmeter config:', clf.best_config)

Best ML leaner: rf
Best hyperparmeter config: {'n_estimators': 42, 'max_features': 0.4960393048486127, 'max_leaves': 21, 'criterion': 'gini'}


In [7]:
predicted = clf.predict(x_test)

In [8]:
from sklearn.metrics import classification_report

print(classification_report(y_test, predicted, target_names=labels))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



### Regression

In [9]:
import pandas as pd

df = pd.read_csv('https://github.com/PacktPublishing/Automated-Machine-Learning-with-AutoKeras/raw/main/boston.csv')
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,6.48,22.0


In [10]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2, random_state=42)

In [11]:
from flaml import AutoML

reg = AutoML()
reg.fit(dataframe=train, label='MEDV', task='regression',
        estimator_list=['lgbm', 'xgboost'],
        time_budget=300)

[flaml.automl.logger: 04-12 07:58:14] {1680} INFO - task = regression
[flaml.automl.logger: 04-12 07:58:14] {1691} INFO - Evaluation method: cv
[flaml.automl.logger: 04-12 07:58:14] {1789} INFO - Minimizing error metric: 1-r2
[flaml.automl.logger: 04-12 07:58:14] {1901} INFO - List of ML learners in AutoML Run: ['lgbm', 'xgboost']
[flaml.automl.logger: 04-12 07:58:14] {2219} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 04-12 07:58:14] {2345} INFO - Estimated sufficient time budget=443s. Estimated necessary time budget=0s.
[flaml.automl.logger: 04-12 07:58:14] {2392} INFO -  at 0.1s,	estimator lgbm's best error=0.6264,	best estimator lgbm's best error=0.6264
[flaml.automl.logger: 04-12 07:58:14] {2219} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 04-12 07:58:14] {2392} INFO -  at 0.1s,	estimator lgbm's best error=0.6264,	best estimator lgbm's best error=0.6264
[flaml.automl.logger: 04-12 07:58:14] {2219} INFO - iteration 2, current learner lgbm
[flaml

In [12]:
print('Best ML leaner:', reg.best_estimator)
print('Best hyperparmeter config:', reg.best_config)

Best ML leaner: xgboost
Best hyperparmeter config: {'n_estimators': 62, 'max_leaves': 11, 'min_child_weight': 0.20902382190463337, 'learning_rate': 0.13948116165042687, 'subsample': 0.4992939858323191, 'colsample_bylevel': 0.5809296615570975, 'colsample_bytree': 0.946650685780894, 'reg_alpha': 0.053950275327704814, 'reg_lambda': 0.0014745070199918}


In [13]:
x_test = test.drop('MEDV', axis=1)
y_test = test.MEDV

predicted = reg.predict(x_test)

In [14]:
from sklearn.metrics import r2_score, mean_absolute_error

print('Prection R2:', r2_score(y_test, predicted).round(3))
print('Prection MAE:', mean_absolute_error(y_test, predicted).round(3))

Prection R2: 0.904
Prection MAE: 1.894
