## Importing Library

In [1]:
# install library for automated machine learning
!pip install flaml

# install Penn Machine Learning Benchmarks library
! pip install pmlb

Collecting flaml
  Downloading FLAML-1.0.13-py3-none-any.whl (205 kB)
Collecting lightgbm>=2.3.1
  Downloading lightgbm-3.3.3-py3-none-win_amd64.whl (1.0 MB)
Installing collected packages: lightgbm, flaml
Successfully installed flaml-1.0.13 lightgbm-3.3.3
Collecting pmlb
  Downloading pmlb-1.0.1.post3-py3-none-any.whl (19 kB)
Installing collected packages: pmlb
Successfully installed pmlb-1.0.1.post3


## Import Data

In [2]:
from pmlb import fetch_data

# Returns a pandas DataFrame
df = fetch_data('adult')

In [3]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,target
0,39.0,7,77516.0,9,13.0,4,1,1,4,1,2174.0,0.0,40.0,39,1
1,50.0,6,83311.0,9,13.0,2,4,0,4,1,0.0,0.0,13.0,39,1
2,38.0,4,215646.0,11,9.0,0,6,1,4,1,0.0,0.0,40.0,39,1
3,53.0,4,234721.0,1,7.0,2,6,0,2,1,0.0,0.0,40.0,39,1
4,28.0,4,338409.0,9,13.0,2,10,5,2,0,0.0,0.0,40.0,5,1


In [11]:
df.shape

(48842, 15)

In [12]:
df.columns

Index(['age', 'workclass', 'fnlwgt', 'education', 'education-num',
       'marital-status', 'occupation', 'relationship', 'race', 'sex',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'target'],
      dtype='object')

In [13]:
X = df[['age', 'workclass', 'fnlwgt', 'education', 'education-num',
       'marital-status', 'occupation', 'relationship', 'race', 'sex',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']]

## Test Train Split

In [14]:
from sklearn.model_selection import train_test_split

# split data into train and test sets
xtrain, xtest, ytrain, ytest = train_test_split(X, 
                                                df['target'], 
                                                test_size= 0.3, 
                                                random_state=21)

## Training data

In [15]:
from flaml import AutoML

automl = AutoML()
automl.fit(xtrain, ytrain, task="classification") # use task = "regression" when target variable is continuous


[flaml.automl: 10-17 10:47:27] {2600} INFO - task = classification
[flaml.automl: 10-17 10:47:27] {2602} INFO - Data split method: stratified
[flaml.automl: 10-17 10:47:27] {2605} INFO - Evaluation method: holdout
[flaml.automl: 10-17 10:47:27] {2727} INFO - Minimizing error metric: 1-roc_auc
[flaml.automl: 10-17 10:47:27] {2869} INFO - List of ML learners in AutoML Run: ['extra_tree', 'lgbm', 'rf', 'xgboost', 'xgb_limitdepth', 'lrl1']
[flaml.automl: 10-17 10:47:27] {3164} INFO - iteration 0, current learner extra_tree
[flaml.automl: 10-17 10:47:30] {3297} INFO - Estimated sufficient time budget=28369s. Estimated necessary time budget=344s.
[flaml.automl: 10-17 10:47:30] {3344} INFO -  at 3.0s,	estimator extra_tree's best error=0.0820,	best estimator extra_tree's best error=0.0820
[flaml.automl: 10-17 10:47:30] {3164} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-17 10:47:31] {3344} INFO -  at 3.7s,	estimator lgbm's best error=0.0687,	best estimator lgbm's best error=0.068

## Best model

In [16]:
# get best model 
automl.best_estimator


'lgbm'

In [17]:
print(automl.best_config)

{'n_estimators': 141, 'num_leaves': 139, 'min_child_samples': 8, 'learning_rate': 0.04824748268727149, 'log_max_bin': 9, 'colsample_bytree': 0.5261441571042451, 'reg_alpha': 0.0028969208338993344, 'reg_lambda': 0.024463247502165594}


## Making Predictions

In [18]:
# get predictions
preds = automl.predict(xtest)

from sklearn.metrics import classification_report

# print evaluation scores
print(classification_report(ytest, preds))

              precision    recall  f1-score   support

           0       0.78      0.66      0.71      3530
           1       0.90      0.94      0.92     11123

    accuracy                           0.87     14653
   macro avg       0.84      0.80      0.82     14653
weighted avg       0.87      0.87      0.87     14653

