In [1]:
!python --version

Python 3.7.13


In [3]:
!pip list

Package                       Version
----------------------------- ----------------------------
absl-py                       1.2.0
aeppl                         0.0.33
aesara                        2.7.9
aiohttp                       3.8.1
aiosignal                     1.2.0
alabaster                     0.7.12
albumentations                1.2.1
altair                        4.2.0
appdirs                       1.4.4
arviz                         0.12.1
astor                         0.8.1
astropy                       4.3.1
astunparse                    1.6.3
async-timeout                 4.0.2
asynctest                     0.13.0
atari-py                      0.2.9
atomicwrites                  1.4.1
attrs                         22.1.0
audioread                     3.0.0
autograd                      1.4
Babel                         2.10.3
backcall                      0.2.0
beautifulsoup4                4.6.3
bleach                        5.0.1
blis                          0.7.8

In [4]:
!pip install flaml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flaml
  Downloading FLAML-1.0.11-py3-none-any.whl (205 kB)
[K     |████████████████████████████████| 205 kB 4.3 MB/s 
Collecting lightgbm>=2.3.1
  Downloading lightgbm-3.3.2-py3-none-manylinux1_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 65.9 MB/s 
Installing collected packages: lightgbm, flaml
  Attempting uninstall: lightgbm
    Found existing installation: lightgbm 2.2.3
    Uninstalling lightgbm-2.2.3:
      Successfully uninstalled lightgbm-2.2.3
Successfully installed flaml-1.0.11 lightgbm-3.3.2


In [40]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from flaml import AutoML

In [18]:

automl = AutoML()
automl_settings={
    'time_budget':300,
    'metric': 'roc_auc',
    'task':  'classification',
    'log_file_name': 'myopia.log'

}


In [9]:
df = pd.read_csv('../content/reduced_filtered_df.csv')

In [10]:
df.head(2)

Unnamed: 0,ACD,LT,VCD,SPORTHR,DADMY,delta_spheq,total_positive_screen,MYOPIC
0,3.702,3.392,15.29,4,1,1.358,8,0
1,3.462,3.514,15.52,14,0,1.929,10,0


In [13]:
X=df.drop('MYOPIC', axis=1)
y=df['MYOPIC']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y)

## Test AUTOML

In [19]:
automl.fit(X_train, y_train, **automl_settings)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
INFO:flaml.automl: at 10.2s,	estimator lgbm's best error=0.1752,	best estimator lgbm's best error=0.1752
[flaml.automl: 09-05 00:03:29] {3133} INFO - iteration 36, current learner extra_tree
INFO:flaml.automl:iteration 36, current learner extra_tree
[flaml.automl: 09-05 00:03:31] {3319} INFO -  at 11.8s,	estimator extra_tree's best error=0.2082,	best estimator lgbm's best error=0.1752
INFO:flaml.automl: at 11.8s,	estimator extra_tree's best error=0.2082,	best estimator lgbm's best error=0.1752
[flaml.automl: 09-05 00:03:31] {3133} INFO - iteration 37, current learner lgbm
INFO:flaml.automl:iteration 37, current learner lgbm
[flaml.automl: 09-05 00:03:31] {3319} INFO -  at 11.8s,	estimator lgbm's best error=0.1752,	best estimator lgbm's best error=0.1752
INFO:flaml.automl: at 11.8s,	estimator lgbm's best error=0.1752,	best estimator lgbm's best error=0.1752
[flaml.automl: 09-05 00:03:31] {3133} INFO - iteration 38, current

In [20]:
print(automl.model)

<flaml.model.XGBoostSklearnEstimator object at 0x7fcdef8c1ed0>


In [26]:
automl.best_estimator

'xgboost'

In [33]:
params = automl.best_config_per_estimator['xgboost']
params

{'n_estimators': 7,
 'max_leaves': 6,
 'min_child_weight': 5.8614537300277965,
 'learning_rate': 0.11677765280016518,
 'subsample': 0.8895588746662894,
 'colsample_bylevel': 0.7905358317292889,
 'colsample_bytree': 0.9544060556215052,
 'reg_alpha': 0.0015245843735931766,
 'reg_lambda': 0.5536296597037936}

## Test the XGBoost with Params

In [36]:
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score


In [38]:
clf = XGBClassifier()
clf.set_params(**params)
scores = cross_val_score(clf, X, y, scoring='precision')

In [41]:
np.mean(scores)

0.5533333333333333

In [43]:
clf.get_params()

{'base_score': 0.5,
 'booster': 'gbtree',
 'colsample_bylevel': 0.7905358317292889,
 'colsample_bynode': 1,
 'colsample_bytree': 0.9544060556215052,
 'gamma': 0,
 'learning_rate': 0.11677765280016518,
 'max_delta_step': 0,
 'max_depth': 3,
 'min_child_weight': 5.8614537300277965,
 'missing': None,
 'n_estimators': 7,
 'n_jobs': 1,
 'nthread': None,
 'objective': 'binary:logistic',
 'random_state': 0,
 'reg_alpha': 0.0015245843735931766,
 'reg_lambda': 0.5536296597037936,
 'scale_pos_weight': 1,
 'seed': None,
 'silent': None,
 'subsample': 0.8895588746662894,
 'verbosity': 1,
 'max_leaves': 6}

In [45]:
clf.fit(X,y)
y_pred=clf.predict(X_test)

In [46]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.99      0.95        81
           1       0.80      0.33      0.47        12

    accuracy                           0.90        93
   macro avg       0.85      0.66      0.71        93
weighted avg       0.90      0.90      0.89        93

