# myopia_flaml.ipynb

- This notebook was run in Google Colaboratory
- The technology used in FLAML is Microsofts's AutoML

In [1]:
!python --version

Python 3.7.13


In [2]:
!pip list

Package                       Version
----------------------------- ----------------------------
absl-py                       1.2.0
aeppl                         0.0.33
aesara                        2.7.9
aiohttp                       3.8.1
aiosignal                     1.2.0
alabaster                     0.7.12
albumentations                1.2.1
altair                        4.2.0
appdirs                       1.4.4
arviz                         0.12.1
astor                         0.8.1
astropy                       4.3.1
astunparse                    1.6.3
async-timeout                 4.0.2
asynctest                     0.13.0
atari-py                      0.2.9
atomicwrites                  1.4.1
attrs                         22.1.0
audioread                     3.0.0
autograd                      1.4
Babel                         2.10.3
backcall                      0.2.0
beautifulsoup4                4.6.3
bleach                        5.0.1
blis                          0.7.8

In [4]:
!pip install flaml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from flaml import AutoML

In [15]:

automl = AutoML()
automl_settings={
    'time_budget':800,
    'metric': 'roc_auc',
    'task':  'classification',
    'log_file_name': 'myopia.log'
}

In [16]:
df = pd.read_csv('../content/reduced_filtered_df.csv')

In [17]:
df.head(2)

Unnamed: 0,ACD,LT,VCD,SPORTHR,DADMY,delta_spheq,total_positive_screen,MYOPIC
0,3.702,3.392,15.29,4,1,1.358,8,0
1,3.462,3.514,15.52,14,0,1.929,10,0


In [18]:
X=df.drop('MYOPIC', axis=1)
y=df['MYOPIC']

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y)

## Test AUTOML

In [20]:
automl.fit(X_train, y_train, **automl_settings)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[flaml.automl: 09-07 16:05:44] {3174} INFO - iteration 1273, current learner lgbm
INFO:flaml.automl:iteration 1273, current learner lgbm
[flaml.automl: 09-07 16:05:44] {3360} INFO -  at 259.2s,	estimator lgbm's best error=0.1074,	best estimator xgboost's best error=0.1051
INFO:flaml.automl: at 259.2s,	estimator lgbm's best error=0.1074,	best estimator xgboost's best error=0.1051
[flaml.automl: 09-07 16:05:44] {3174} INFO - iteration 1274, current learner lgbm
INFO:flaml.automl:iteration 1274, current learner lgbm
[flaml.automl: 09-07 16:05:45] {3360} INFO -  at 259.3s,	estimator lgbm's best error=0.1074,	best estimator xgboost's best error=0.1051
INFO:flaml.automl: at 259.3s,	estimator lgbm's best error=0.1074,	best estimator xgboost's best error=0.1051
[flaml.automl: 09-07 16:05:45] {3174} INFO - iteration 1275, current learner xgboost
INFO:flaml.automl:iteration 1275, current learner xgboost
[flaml.automl: 09-07 16:05:4

In [47]:
print(automl.model)

<flaml.model.ExtraTreesEstimator object at 0x7efdd070c5d0>


In [48]:
automl.best_estimator

'extra_tree'

In [49]:
print(f"Best Loss:  {automl.best_loss_per_estimator['xgboost']}")
xgb_params = automl.best_config_per_estimator['xgboost']
xgb_params

Best Loss:  0.10512861394557824


{'n_estimators': 13,
 'max_leaves': 7,
 'min_child_weight': 4.335302249239383,
 'learning_rate': 0.2586916043984912,
 'subsample': 0.6107563495205902,
 'colsample_bylevel': 1.0,
 'colsample_bytree': 0.8667840140423164,
 'reg_alpha': 0.08918943610851687,
 'reg_lambda': 0.08977890903450676}

In [50]:
print(f"Best Loss:  {automl.best_loss_per_estimator['extra_tree']}")
et_params = automl.best_config_per_estimator['extra_tree']
et_params

Best Loss:  0.09757121598639451


{'n_estimators': 8,
 'max_leaves': 6,
 'max_features': 0.8256461243349812,
 'criterion': 'entropy'}

In [51]:
print(f"Best Loss:  {automl.best_loss_per_estimator['rf']}")
rf_params = automl.best_config_per_estimator['rf']
rf_params

Best Loss:  0.11372767857142863


{'n_estimators': 7,
 'max_features': 0.740603165778064,
 'max_leaves': 6,
 'criterion': 'gini'}

In [52]:
# Here is how the models compared
automl.best_loss_per_estimator

{'lgbm': 0.10673666788143825,
 'rf': 0.11372767857142863,
 'xgboost': 0.10512861394557824,
 'extra_tree': 0.09757121598639451,
 'xgb_limitdepth': 0.10159742468415936,
 'lrl1': 0.14579689018464528}

In [54]:
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

Best ML leaner: extra_tree
Best hyperparmeter config: {'n_estimators': 8, 'max_leaves': 6, 'max_features': 0.8256461243349812, 'criterion': 'entropy'}
Best accuracy on validation data: 0.9024
Training duration of best run: 0.3296 s


## Test the XGBoost with Params

In [55]:
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score


In [56]:
clf = XGBClassifier()
clf.set_params(**xgb_params)
scores = cross_val_score(clf, X_train, y_train, scoring='precision')

In [57]:
np.mean(scores)

0.6666666666666666

In [58]:
clf.get_params()

{'base_score': 0.5,
 'booster': 'gbtree',
 'colsample_bylevel': 1.0,
 'colsample_bynode': 1,
 'colsample_bytree': 0.8667840140423164,
 'gamma': 0,
 'learning_rate': 0.2586916043984912,
 'max_delta_step': 0,
 'max_depth': 3,
 'min_child_weight': 4.335302249239383,
 'missing': None,
 'n_estimators': 13,
 'n_jobs': 1,
 'nthread': None,
 'objective': 'binary:logistic',
 'random_state': 0,
 'reg_alpha': 0.08918943610851687,
 'reg_lambda': 0.08977890903450676,
 'scale_pos_weight': 1,
 'seed': None,
 'silent': None,
 'subsample': 0.6107563495205902,
 'verbosity': 1,
 'max_leaves': 7}

In [59]:
clf.fit(X,y)
y_pred=clf.predict(X_test)

In [60]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.95      0.92        81
           1       0.43      0.25      0.32        12

    accuracy                           0.86        93
   macro avg       0.66      0.60      0.62        93
weighted avg       0.84      0.86      0.84        93

