In [1]:
import pandas as pd

In [2]:
X_train = pd.read_csv('data/X_train.csv')
X_test = pd.read_csv('data/X_test.csv')
y_train = pd.read_csv('data/y_train.csv')
y_test = pd.read_csv('data/y_test.csv')

## FLAML

[FLAML](https://github.com/microsoft/FLAML) uses [state for the art methods](https://github.com/microsoft/FLAML/tree/main/flaml/tune) for hyperparameter optimization.

See [tutorial blog](https://www.anyscale.com/blog/fast-automl-with-flaml-ray-tune)

In [3]:
%%time

from flaml import AutoML

automl = AutoML()
automl.fit(X_train,
           y_train.values, 
           task='classification',
           verbose=0,
           time_budget=60,
          )

CPU times: user 5min 24s, sys: 40.7 s, total: 6min 5s
Wall time: 1min 2s


In [4]:
automl.estimator_list

['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'lrl1']

### Best estimator

In [5]:
automl.model

<flaml.model.CatBoostEstimator at 0x164efa610>

In [6]:
automl.model.get_params()

{'early_stopping_rounds': 11,
 'n_estimators': 8192,
 'learning_rate': 0.06310701526622366,
 'thread_count': -1,
 'verbose': False,
 'random_seed': 10242048,
 'task': 'binary:logistic',
 '_estimator_type': 'classifier',
 'n_jobs': -1}

### Best estimator for a different class


In [7]:
automl.best_model_for_estimator('xgboost')

<flaml.model.XGBoostSklearnEstimator at 0x15e8e2bb0>

In [8]:
automl.best_config_per_estimator['xgboost']

{'n_estimators': 46,
 'max_leaves': 24,
 'min_child_weight': 7.8191147585210325,
 'learning_rate': 0.35833790786349273,
 'subsample': 0.9009483350407286,
 'colsample_bylevel': 0.8546862042012804,
 'colsample_bytree': 0.787713092935094,
 'reg_alpha': 0.005771390107656191,
 'reg_lambda': 1.8015204006400616}

### Using model

In [9]:
automl.predict_proba(X_test)[:10]

array([[0.93313544, 0.06686456],
       [0.84285751, 0.15714249],
       [0.17989189, 0.82010811],
       [0.85449378, 0.14550622],
       [0.3056853 , 0.6943147 ],
       [0.59037193, 0.40962807],
       [0.13393378, 0.86606622],
       [0.90734348, 0.09265652],
       [0.89802154, 0.10197846],
       [0.88699476, 0.11300524]])

In [10]:
automl.predict(X_test)[:10]

array([0, 0, 1, 0, 1, 0, 1, 0, 0, 0])

### Save and load using `pickle`

In [12]:
import pickle

In [13]:
with open('automl.pic', 'wb') as f:
    pickle.dump(automl, f)

In [14]:
with open('automl.pic', 'rb') as f:
    ml = pickle.load(f)

In [15]:
ml.predict_proba(X_test)[:10]

array([[0.93313544, 0.06686456],
       [0.84285751, 0.15714249],
       [0.17989189, 0.82010811],
       [0.85449378, 0.14550622],
       [0.3056853 , 0.6943147 ],
       [0.59037193, 0.40962807],
       [0.13393378, 0.86606622],
       [0.90734348, 0.09265652],
       [0.89802154, 0.10197846],
       [0.88699476, 0.11300524]])