## Predict having Monkey Pox

In [14]:
import pandas as pd
import numpy as np
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss, f1_score
import optuna
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('DATA.csv').set_index('Patient_ID')
df

Unnamed: 0_level_0,Systemic Illness,Rectal Pain,Sore Throat,Penile Oedema,Oral Lesions,Solitary Lesion,Swollen Tonsils,HIV Infection,Sexually Transmitted Infection,MonkeyPox
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
P0,,False,True,True,True,False,True,False,False,Negative
P1,Fever,True,False,True,True,False,False,True,False,Positive
P2,Fever,False,True,True,False,False,False,True,False,Positive
P3,,True,False,False,False,True,True,True,False,Positive
P4,Swollen Lymph Nodes,True,True,True,False,False,True,True,False,Positive
...,...,...,...,...,...,...,...,...,...,...
P24995,,True,True,False,True,True,False,False,True,Positive
P24996,Fever,False,True,True,False,True,True,True,True,Positive
P24997,,True,True,False,False,True,True,False,False,Positive
P24998,Swollen Lymph Nodes,False,True,False,True,True,True,False,False,Negative


In [3]:
bincols = list(set(df.columns) - {'Systemic Illness','MonkeyPox'})
df.MonkeyPox = df.MonkeyPox.replace({'Negative':0,'Positive':1})
df = pd.get_dummies(df, columns = bincols)
df['Systemic Illness'] = df['Systemic Illness'].astype('category').cat.codes
df

Unnamed: 0_level_0,Systemic Illness,MonkeyPox,Penile Oedema_False,Penile Oedema_True,Rectal Pain_False,Rectal Pain_True,Swollen Tonsils_False,Swollen Tonsils_True,Oral Lesions_False,Oral Lesions_True,HIV Infection_False,HIV Infection_True,Sexually Transmitted Infection_False,Sexually Transmitted Infection_True,Sore Throat_False,Sore Throat_True,Solitary Lesion_False,Solitary Lesion_True
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
P0,2,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0
P1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0
P2,0,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0
P3,2,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1
P4,3,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P24995,2,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,1
P24996,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1
P24997,2,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1
P24998,3,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1


In [4]:
X, y = df.drop('MonkeyPox', axis=1), df.MonkeyPox
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123, shuffle=True)

In [18]:
model = LGBMClassifier(verbose=-1)
model.fit(X_train, y_train)
y_hat = model.predict(X_test)
roc1 = roc_auc_score(y_test, y_hat)

In [6]:
cv_roc1 = np.mean(cross_val_score(model, X_train, y_train, cv=5, scoring='roc_auc'))

In [7]:
from optuna.integration import LightGBMPruningCallback

def objective(trial, X, y):
    param_grid = {
        # "device_type": trial.suggest_categorical("device_type", ['gpu']),
        "n_estimators": trial.suggest_categorical("n_estimators", [10000]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
    }

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1121218)

    cv_scores = np.empty(5)
    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model = LGBMClassifier(objective="binary", **param_grid)
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="binary_logloss",
            early_stopping_rounds=100,
            callbacks=[
                LightGBMPruningCallback(trial, "binary_logloss")
            ],  # Add a pruning callback
        )
        preds = model.predict_proba(X_test)
        cv_scores[idx] = log_loss(y_test, preds)

    return np.mean(cv_scores)

In [8]:
study = optuna.create_study(direction="minimize", study_name="LGBM Classifier")
func = lambda trial: objective(trial, X_train, y_train)
study.optimize(func, n_trials=20)

[32m[I 2022-10-16 16:17:53,392][0m A new study created in memory with name: LGBM Classifier[0m




[32m[I 2022-10-16 16:17:54,245][0m Trial 0 finished with value: 0.5921783786120465 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1434445233517556, 'num_leaves': 880, 'max_depth': 8, 'min_data_in_leaf': 1800, 'lambda_l1': 25, 'lambda_l2': 85, 'min_gain_to_split': 1.1992742913655703, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 0.5921783786120465.[0m




[32m[I 2022-10-16 16:17:54,567][0m Trial 1 finished with value: 0.6554054224768588 and parameters: {'n_estimators': 10000, 'learning_rate': 0.29527662485133777, 'num_leaves': 2840, 'max_depth': 3, 'min_data_in_leaf': 6800, 'lambda_l1': 50, 'lambda_l2': 15, 'min_gain_to_split': 5.344977932342086, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.5921783786120465.[0m




[32m[I 2022-10-16 16:17:55,124][0m Trial 2 finished with value: 0.5985584649109992 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2989034198724002, 'num_leaves': 980, 'max_depth': 9, 'min_data_in_leaf': 1000, 'lambda_l1': 90, 'lambda_l2': 45, 'min_gain_to_split': 3.7346124111370873, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 0.5921783786120465.[0m




[32m[I 2022-10-16 16:17:55,325][0m Trial 3 finished with value: 0.6554054224768588 and parameters: {'n_estimators': 10000, 'learning_rate': 0.23855782166676312, 'num_leaves': 40, 'max_depth': 3, 'min_data_in_leaf': 8400, 'lambda_l1': 20, 'lambda_l2': 80, 'min_gain_to_split': 10.72185613023009, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 0.5921783786120465.[0m




[32m[I 2022-10-16 16:17:56,103][0m Trial 4 finished with value: 0.6213372291057673 and parameters: {'n_estimators': 10000, 'learning_rate': 0.13402646178552152, 'num_leaves': 1440, 'max_depth': 8, 'min_data_in_leaf': 1900, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 14.28410094224418, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.5921783786120465.[0m
[32m[I 2022-10-16 16:17:56,129][0m Trial 5 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-10-16 16:17:56,171][0m Trial 6 pruned. Trial was pruned at iteration 36.[0m
[32m[I 2022-10-16 16:17:56,197][0m Trial 7 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-10-16 16:17:56,223][0m Trial 8 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-10-16 16:17:56,248][0m Trial 9 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-10-16 16:17:56,298][0m Trial 10 pruned. Trial was pruned at iteration 0.[0m




[32m[I 2022-10-16 16:17:56,404][0m Trial 11 pruned. Trial was pruned at iteration 101.[0m




[32m[I 2022-10-16 16:17:57,491][0m Trial 12 finished with value: 0.5924553684135794 and parameters: {'n_estimators': 10000, 'learning_rate': 0.18000209378810605, 'num_leaves': 660, 'max_depth': 6, 'min_data_in_leaf': 2400, 'lambda_l1': 5, 'lambda_l2': 35, 'min_gain_to_split': 3.1635296405581785, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 0.5921783786120465.[0m




[32m[I 2022-10-16 16:17:57,602][0m Trial 13 pruned. Trial was pruned at iteration 101.[0m
[32m[I 2022-10-16 16:17:57,651][0m Trial 14 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-10-16 16:17:57,802][0m Trial 15 pruned. Trial was pruned at iteration 197.[0m
[32m[I 2022-10-16 16:17:57,847][0m Trial 16 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-10-16 16:17:57,894][0m Trial 17 pruned. Trial was pruned at iteration 1.[0m




[32m[I 2022-10-16 16:17:59,510][0m Trial 18 finished with value: 0.5917993127167316 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1462871442900454, 'num_leaves': 2360, 'max_depth': 10, 'min_data_in_leaf': 1400, 'lambda_l1': 10, 'lambda_l2': 50, 'min_gain_to_split': 1.7876363427558635, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 18 with value: 0.5917993127167316.[0m
[32m[I 2022-10-16 16:17:59,560][0m Trial 19 pruned. Trial was pruned at iteration 0.[0m




In [9]:
print(f"\tBest value (rmse): {study.best_value:.5f}")
print(f"\tBest params:")
param_set = {}
for key, value in study.best_params.items():
    param_set[key] = value

param_set['verbose'] = -1
param_set

	Best value (rmse): 0.59180
	Best params:


{'n_estimators': 10000,
 'learning_rate': 0.1462871442900454,
 'num_leaves': 2360,
 'max_depth': 10,
 'min_data_in_leaf': 1400,
 'lambda_l1': 10,
 'lambda_l2': 50,
 'min_gain_to_split': 1.7876363427558635,
 'bagging_fraction': 0.4,
 'bagging_freq': 1,
 'feature_fraction': 0.4,
 'verbose': -1}

In [19]:
model = LGBMClassifier(**param_set)
model.fit(X_train, y_train)
y_hat = model.predict(X_test)
roc2 = roc_auc_score(y_test, y_hat)



In [20]:
(roc2-roc1)*100

0.99698408643073

In [22]:
from flaml import AutoML
settings = {'estimator_list': ['lgbm'],
            'task': 'classification',
            'time_budget' : 300
           }
automl = AutoML()
automl.fit(X_train, y_train, **settings)

[flaml.automl: 10-16 16:27:54] {2600} INFO - task = classification
[flaml.automl: 10-16 16:27:54] {2602} INFO - Data split method: stratified
[flaml.automl: 10-16 16:27:54] {2605} INFO - Evaluation method: cv
[flaml.automl: 10-16 16:27:54] {2727} INFO - Minimizing error metric: 1-roc_auc
[flaml.automl: 10-16 16:27:54] {2869} INFO - List of ML learners in AutoML Run: ['lgbm']
[flaml.automl: 10-16 16:27:54] {3164} INFO - iteration 0, current learner lgbm
[flaml.automl: 10-16 16:27:54] {3297} INFO - Estimated sufficient time budget=822s. Estimated necessary time budget=1s.
[flaml.automl: 10-16 16:27:54] {3344} INFO -  at 0.1s,	estimator lgbm's best error=0.3372,	best estimator lgbm's best error=0.3372
[flaml.automl: 10-16 16:27:54] {3164} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-16 16:27:54] {3344} INFO -  at 0.2s,	estimator lgbm's best error=0.3372,	best estimator lgbm's best error=0.3372
[flaml.automl: 10-16 16:27:54] {3164} INFO - iteration 2, current learner lgbm
[fl

[flaml.automl: 10-16 16:28:03] {3164} INFO - iteration 37, current learner lgbm
[flaml.automl: 10-16 16:28:04] {3344} INFO -  at 10.1s,	estimator lgbm's best error=0.3015,	best estimator lgbm's best error=0.3015
[flaml.automl: 10-16 16:28:04] {3164} INFO - iteration 38, current learner lgbm
[flaml.automl: 10-16 16:28:04] {3344} INFO -  at 10.3s,	estimator lgbm's best error=0.3015,	best estimator lgbm's best error=0.3015
[flaml.automl: 10-16 16:28:04] {3164} INFO - iteration 39, current learner lgbm
[flaml.automl: 10-16 16:28:05] {3344} INFO -  at 11.2s,	estimator lgbm's best error=0.3015,	best estimator lgbm's best error=0.3015
[flaml.automl: 10-16 16:28:05] {3164} INFO - iteration 40, current learner lgbm
[flaml.automl: 10-16 16:28:05] {3344} INFO -  at 11.3s,	estimator lgbm's best error=0.3015,	best estimator lgbm's best error=0.3015
[flaml.automl: 10-16 16:28:05] {3164} INFO - iteration 41, current learner lgbm
[flaml.automl: 10-16 16:28:05] {3344} INFO -  at 11.5s,	estimator lgbm's

[flaml.automl: 10-16 16:28:17] {3164} INFO - iteration 76, current learner lgbm
[flaml.automl: 10-16 16:28:17] {3344} INFO -  at 23.5s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:17] {3164} INFO - iteration 77, current learner lgbm
[flaml.automl: 10-16 16:28:17] {3344} INFO -  at 23.6s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:17] {3164} INFO - iteration 78, current learner lgbm
[flaml.automl: 10-16 16:28:18] {3344} INFO -  at 24.2s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:18] {3164} INFO - iteration 79, current learner lgbm
[flaml.automl: 10-16 16:28:18] {3344} INFO -  at 24.6s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:18] {3164} INFO - iteration 80, current learner lgbm
[flaml.automl: 10-16 16:28:18] {3344} INFO -  at 24.8s,	estimator lgbm's

[flaml.automl: 10-16 16:28:31] {3164} INFO - iteration 115, current learner lgbm
[flaml.automl: 10-16 16:28:31] {3344} INFO -  at 37.2s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:31] {3164} INFO - iteration 116, current learner lgbm
[flaml.automl: 10-16 16:28:31] {3344} INFO -  at 37.7s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:31] {3164} INFO - iteration 117, current learner lgbm
[flaml.automl: 10-16 16:28:32] {3344} INFO -  at 38.6s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:32] {3164} INFO - iteration 118, current learner lgbm
[flaml.automl: 10-16 16:28:32] {3344} INFO -  at 38.7s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:32] {3164} INFO - iteration 119, current learner lgbm
[flaml.automl: 10-16 16:28:33] {3344} INFO -  at 39.1s,	estimator l

[flaml.automl: 10-16 16:28:43] {3164} INFO - iteration 154, current learner lgbm
[flaml.automl: 10-16 16:28:43] {3344} INFO -  at 49.8s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:43] {3164} INFO - iteration 155, current learner lgbm
[flaml.automl: 10-16 16:28:44] {3344} INFO -  at 50.4s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:44] {3164} INFO - iteration 156, current learner lgbm
[flaml.automl: 10-16 16:28:44] {3344} INFO -  at 50.6s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:44] {3164} INFO - iteration 157, current learner lgbm
[flaml.automl: 10-16 16:28:44] {3344} INFO -  at 50.7s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:44] {3164} INFO - iteration 158, current learner lgbm
[flaml.automl: 10-16 16:28:45] {3344} INFO -  at 51.3s,	estimator l

[flaml.automl: 10-16 16:28:54] {3164} INFO - iteration 193, current learner lgbm
[flaml.automl: 10-16 16:28:55] {3344} INFO -  at 61.2s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:55] {3164} INFO - iteration 194, current learner lgbm
[flaml.automl: 10-16 16:28:55] {3344} INFO -  at 61.4s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:55] {3164} INFO - iteration 195, current learner lgbm
[flaml.automl: 10-16 16:28:55] {3344} INFO -  at 61.6s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:55] {3164} INFO - iteration 196, current learner lgbm
[flaml.automl: 10-16 16:28:55] {3344} INFO -  at 61.9s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:28:55] {3164} INFO - iteration 197, current learner lgbm
[flaml.automl: 10-16 16:28:56] {3344} INFO -  at 62.3s,	estimator l

[flaml.automl: 10-16 16:29:04] {3164} INFO - iteration 232, current learner lgbm
[flaml.automl: 10-16 16:29:04] {3344} INFO -  at 70.5s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:29:04] {3164} INFO - iteration 233, current learner lgbm
[flaml.automl: 10-16 16:29:04] {3344} INFO -  at 70.7s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:29:04] {3164} INFO - iteration 234, current learner lgbm
[flaml.automl: 10-16 16:29:04] {3344} INFO -  at 70.9s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:29:04] {3164} INFO - iteration 235, current learner lgbm
[flaml.automl: 10-16 16:29:05] {3344} INFO -  at 71.2s,	estimator lgbm's best error=0.3012,	best estimator lgbm's best error=0.3012
[flaml.automl: 10-16 16:29:05] {3164} INFO - iteration 236, current learner lgbm
[flaml.automl: 10-16 16:29:05] {3344} INFO -  at 71.4s,	estimator l

[flaml.automl: 10-16 16:29:20] {3164} INFO - iteration 271, current learner lgbm
[flaml.automl: 10-16 16:29:20] {3344} INFO -  at 86.5s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:20] {3164} INFO - iteration 272, current learner lgbm
[flaml.automl: 10-16 16:29:22] {3344} INFO -  at 88.0s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:22] {3164} INFO - iteration 273, current learner lgbm
[flaml.automl: 10-16 16:29:23] {3344} INFO -  at 89.2s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:23] {3164} INFO - iteration 274, current learner lgbm
[flaml.automl: 10-16 16:29:23] {3344} INFO -  at 89.4s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:23] {3164} INFO - iteration 275, current learner lgbm
[flaml.automl: 10-16 16:29:24] {3344} INFO -  at 90.2s,	estimator l

[flaml.automl: 10-16 16:29:44] {3344} INFO -  at 110.8s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:44] {3164} INFO - iteration 310, current learner lgbm
[flaml.automl: 10-16 16:29:44] {3344} INFO -  at 111.0s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:44] {3164} INFO - iteration 311, current learner lgbm
[flaml.automl: 10-16 16:29:45] {3344} INFO -  at 111.6s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:45] {3164} INFO - iteration 312, current learner lgbm
[flaml.automl: 10-16 16:29:45] {3344} INFO -  at 111.9s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.automl: 10-16 16:29:45] {3164} INFO - iteration 313, current learner lgbm
[flaml.automl: 10-16 16:29:46] {3344} INFO -  at 112.1s,	estimator lgbm's best error=0.3011,	best estimator lgbm's best error=0.3011
[flaml.auto

[flaml.automl: 10-16 16:29:55] {3164} INFO - iteration 348, current learner lgbm
[flaml.automl: 10-16 16:29:55] {3344} INFO -  at 121.6s,	estimator lgbm's best error=0.3010,	best estimator lgbm's best error=0.3010
[flaml.automl: 10-16 16:29:55] {3164} INFO - iteration 349, current learner lgbm
[flaml.automl: 10-16 16:29:55] {3344} INFO -  at 121.8s,	estimator lgbm's best error=0.3010,	best estimator lgbm's best error=0.3010
[flaml.automl: 10-16 16:29:55] {3164} INFO - iteration 350, current learner lgbm
[flaml.automl: 10-16 16:29:56] {3344} INFO -  at 122.1s,	estimator lgbm's best error=0.3010,	best estimator lgbm's best error=0.3010
[flaml.automl: 10-16 16:29:56] {3164} INFO - iteration 351, current learner lgbm
[flaml.automl: 10-16 16:29:56] {3344} INFO -  at 122.3s,	estimator lgbm's best error=0.3010,	best estimator lgbm's best error=0.3010
[flaml.automl: 10-16 16:29:56] {3164} INFO - iteration 352, current learner lgbm
[flaml.automl: 10-16 16:29:56] {3344} INFO -  at 122.5s,	estima

[flaml.automl: 10-16 16:30:06] {3344} INFO -  at 132.7s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:06] {3164} INFO - iteration 387, current learner lgbm
[flaml.automl: 10-16 16:30:07] {3344} INFO -  at 133.0s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:07] {3164} INFO - iteration 388, current learner lgbm
[flaml.automl: 10-16 16:30:07] {3344} INFO -  at 133.2s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:07] {3164} INFO - iteration 389, current learner lgbm
[flaml.automl: 10-16 16:30:07] {3344} INFO -  at 133.4s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:07] {3164} INFO - iteration 390, current learner lgbm
[flaml.automl: 10-16 16:30:07] {3344} INFO -  at 133.7s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.auto

[flaml.automl: 10-16 16:30:18] {3164} INFO - iteration 425, current learner lgbm
[flaml.automl: 10-16 16:30:18] {3344} INFO -  at 144.6s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:18] {3164} INFO - iteration 426, current learner lgbm
[flaml.automl: 10-16 16:30:18] {3344} INFO -  at 144.8s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:18] {3164} INFO - iteration 427, current learner lgbm
[flaml.automl: 10-16 16:30:18] {3344} INFO -  at 145.0s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:18] {3164} INFO - iteration 428, current learner lgbm
[flaml.automl: 10-16 16:30:19] {3344} INFO -  at 145.3s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:19] {3164} INFO - iteration 429, current learner lgbm
[flaml.automl: 10-16 16:30:19] {3344} INFO -  at 145.7s,	estima

[flaml.automl: 10-16 16:30:31] {3344} INFO -  at 157.3s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:31] {3164} INFO - iteration 464, current learner lgbm
[flaml.automl: 10-16 16:30:31] {3344} INFO -  at 157.5s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:31] {3164} INFO - iteration 465, current learner lgbm
[flaml.automl: 10-16 16:30:31] {3344} INFO -  at 157.6s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:31] {3164} INFO - iteration 466, current learner lgbm
[flaml.automl: 10-16 16:30:32] {3344} INFO -  at 158.6s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:32] {3164} INFO - iteration 467, current learner lgbm
[flaml.automl: 10-16 16:30:32] {3344} INFO -  at 158.9s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.auto

[flaml.automl: 10-16 16:30:42] {3164} INFO - iteration 502, current learner lgbm
[flaml.automl: 10-16 16:30:43] {3344} INFO -  at 169.1s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:43] {3164} INFO - iteration 503, current learner lgbm
[flaml.automl: 10-16 16:30:43] {3344} INFO -  at 169.2s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:43] {3164} INFO - iteration 504, current learner lgbm
[flaml.automl: 10-16 16:30:43] {3344} INFO -  at 169.8s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:43] {3164} INFO - iteration 505, current learner lgbm
[flaml.automl: 10-16 16:30:43] {3344} INFO -  at 170.0s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:43] {3164} INFO - iteration 506, current learner lgbm
[flaml.automl: 10-16 16:30:44] {3344} INFO -  at 170.3s,	estima

[flaml.automl: 10-16 16:30:55] {3344} INFO -  at 181.6s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:55] {3164} INFO - iteration 541, current learner lgbm
[flaml.automl: 10-16 16:30:56] {3344} INFO -  at 182.6s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:56] {3164} INFO - iteration 542, current learner lgbm
[flaml.automl: 10-16 16:30:56] {3344} INFO -  at 182.7s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:56] {3164} INFO - iteration 543, current learner lgbm
[flaml.automl: 10-16 16:30:57] {3344} INFO -  at 183.0s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:30:57] {3164} INFO - iteration 544, current learner lgbm
[flaml.automl: 10-16 16:30:57] {3344} INFO -  at 183.3s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.auto

[flaml.automl: 10-16 16:31:09] {3164} INFO - iteration 579, current learner lgbm
[flaml.automl: 10-16 16:31:09] {3344} INFO -  at 195.5s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:31:09] {3164} INFO - iteration 580, current learner lgbm
[flaml.automl: 10-16 16:31:09] {3344} INFO -  at 195.7s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:31:09] {3164} INFO - iteration 581, current learner lgbm
[flaml.automl: 10-16 16:31:09] {3344} INFO -  at 195.9s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:31:09] {3164} INFO - iteration 582, current learner lgbm
[flaml.automl: 10-16 16:31:10] {3344} INFO -  at 196.4s,	estimator lgbm's best error=0.3009,	best estimator lgbm's best error=0.3009
[flaml.automl: 10-16 16:31:10] {3164} INFO - iteration 583, current learner lgbm
[flaml.automl: 10-16 16:31:10] {3344} INFO -  at 196.5s,	estima

[flaml.automl: 10-16 16:31:19] {3344} INFO -  at 205.2s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:19] {3164} INFO - iteration 618, current learner lgbm
[flaml.automl: 10-16 16:31:19] {3344} INFO -  at 205.5s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:19] {3164} INFO - iteration 619, current learner lgbm
[flaml.automl: 10-16 16:31:19] {3344} INFO -  at 205.9s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:19] {3164} INFO - iteration 620, current learner lgbm
[flaml.automl: 10-16 16:31:20] {3344} INFO -  at 206.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:20] {3164} INFO - iteration 621, current learner lgbm
[flaml.automl: 10-16 16:31:20] {3344} INFO -  at 206.2s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.auto

[flaml.automl: 10-16 16:31:27] {3164} INFO - iteration 656, current learner lgbm
[flaml.automl: 10-16 16:31:28] {3344} INFO -  at 214.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:28] {3164} INFO - iteration 657, current learner lgbm
[flaml.automl: 10-16 16:31:28] {3344} INFO -  at 214.1s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:28] {3164} INFO - iteration 658, current learner lgbm
[flaml.automl: 10-16 16:31:28] {3344} INFO -  at 214.5s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:28] {3164} INFO - iteration 659, current learner lgbm
[flaml.automl: 10-16 16:31:28] {3344} INFO -  at 214.6s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:28] {3164} INFO - iteration 660, current learner lgbm
[flaml.automl: 10-16 16:31:28] {3344} INFO -  at 214.9s,	estima

[flaml.automl: 10-16 16:31:37] {3344} INFO -  at 223.8s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:37] {3164} INFO - iteration 695, current learner lgbm
[flaml.automl: 10-16 16:31:37] {3344} INFO -  at 223.9s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:37] {3164} INFO - iteration 696, current learner lgbm
[flaml.automl: 10-16 16:31:38] {3344} INFO -  at 224.5s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:38] {3164} INFO - iteration 697, current learner lgbm
[flaml.automl: 10-16 16:31:38] {3344} INFO -  at 224.7s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:38] {3164} INFO - iteration 698, current learner lgbm
[flaml.automl: 10-16 16:31:38] {3344} INFO -  at 224.8s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.auto

[flaml.automl: 10-16 16:31:50] {3164} INFO - iteration 733, current learner lgbm
[flaml.automl: 10-16 16:31:51] {3344} INFO -  at 237.1s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:51] {3164} INFO - iteration 734, current learner lgbm
[flaml.automl: 10-16 16:31:51] {3344} INFO -  at 237.7s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:51] {3164} INFO - iteration 735, current learner lgbm
[flaml.automl: 10-16 16:31:51] {3344} INFO -  at 237.9s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:51] {3164} INFO - iteration 736, current learner lgbm
[flaml.automl: 10-16 16:31:52] {3344} INFO -  at 238.1s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:31:52] {3164} INFO - iteration 737, current learner lgbm
[flaml.automl: 10-16 16:31:52] {3344} INFO -  at 238.4s,	estima

[flaml.automl: 10-16 16:32:00] {3344} INFO -  at 246.8s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:00] {3164} INFO - iteration 772, current learner lgbm
[flaml.automl: 10-16 16:32:01] {3344} INFO -  at 247.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:01] {3164} INFO - iteration 773, current learner lgbm
[flaml.automl: 10-16 16:32:01] {3344} INFO -  at 247.1s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:01] {3164} INFO - iteration 774, current learner lgbm
[flaml.automl: 10-16 16:32:01] {3344} INFO -  at 247.5s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:01] {3164} INFO - iteration 775, current learner lgbm
[flaml.automl: 10-16 16:32:01] {3344} INFO -  at 247.6s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.auto

[flaml.automl: 10-16 16:32:12] {3164} INFO - iteration 810, current learner lgbm
[flaml.automl: 10-16 16:32:12] {3344} INFO -  at 258.5s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:12] {3164} INFO - iteration 811, current learner lgbm
[flaml.automl: 10-16 16:32:12] {3344} INFO -  at 258.7s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:12] {3164} INFO - iteration 812, current learner lgbm
[flaml.automl: 10-16 16:32:13] {3344} INFO -  at 259.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:13] {3164} INFO - iteration 813, current learner lgbm
[flaml.automl: 10-16 16:32:13] {3344} INFO -  at 259.2s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:13] {3164} INFO - iteration 814, current learner lgbm
[flaml.automl: 10-16 16:32:13] {3344} INFO -  at 259.5s,	estima

[flaml.automl: 10-16 16:32:21] {3344} INFO -  at 267.1s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:21] {3164} INFO - iteration 849, current learner lgbm
[flaml.automl: 10-16 16:32:21] {3344} INFO -  at 267.3s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:21] {3164} INFO - iteration 850, current learner lgbm
[flaml.automl: 10-16 16:32:21] {3344} INFO -  at 267.6s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:21] {3164} INFO - iteration 851, current learner lgbm
[flaml.automl: 10-16 16:32:21] {3344} INFO -  at 267.7s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:21] {3164} INFO - iteration 852, current learner lgbm
[flaml.automl: 10-16 16:32:22] {3344} INFO -  at 268.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.auto

[flaml.automl: 10-16 16:32:30] {3164} INFO - iteration 887, current learner lgbm
[flaml.automl: 10-16 16:32:30] {3344} INFO -  at 276.3s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:30] {3164} INFO - iteration 888, current learner lgbm
[flaml.automl: 10-16 16:32:30] {3344} INFO -  at 276.7s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:30] {3164} INFO - iteration 889, current learner lgbm
[flaml.automl: 10-16 16:32:31] {3344} INFO -  at 277.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:31] {3164} INFO - iteration 890, current learner lgbm
[flaml.automl: 10-16 16:32:31] {3344} INFO -  at 277.2s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:31] {3164} INFO - iteration 891, current learner lgbm
[flaml.automl: 10-16 16:32:31] {3344} INFO -  at 277.3s,	estima

[flaml.automl: 10-16 16:32:39] {3344} INFO -  at 285.5s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:39] {3164} INFO - iteration 926, current learner lgbm
[flaml.automl: 10-16 16:32:39] {3344} INFO -  at 285.7s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:39] {3164} INFO - iteration 927, current learner lgbm
[flaml.automl: 10-16 16:32:39] {3344} INFO -  at 286.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:39] {3164} INFO - iteration 928, current learner lgbm
[flaml.automl: 10-16 16:32:40] {3344} INFO -  at 286.1s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:40] {3164} INFO - iteration 929, current learner lgbm
[flaml.automl: 10-16 16:32:40] {3344} INFO -  at 286.3s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.auto

[flaml.automl: 10-16 16:32:49] {3164} INFO - iteration 964, current learner lgbm
[flaml.automl: 10-16 16:32:50] {3344} INFO -  at 296.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:50] {3164} INFO - iteration 965, current learner lgbm
[flaml.automl: 10-16 16:32:50] {3344} INFO -  at 296.3s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:50] {3164} INFO - iteration 966, current learner lgbm
[flaml.automl: 10-16 16:32:50] {3344} INFO -  at 296.4s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:50] {3164} INFO - iteration 967, current learner lgbm
[flaml.automl: 10-16 16:32:50] {3344} INFO -  at 297.0s,	estimator lgbm's best error=0.3008,	best estimator lgbm's best error=0.3008
[flaml.automl: 10-16 16:32:50] {3164} INFO - iteration 968, current learner lgbm
[flaml.automl: 10-16 16:32:51] {3344} INFO -  at 297.1s,	estima

In [26]:
model = LGBMClassifier(**automl.best_config)
model.fit(X_train, y_train)
y_hat = model.predict(X_test)
roc3 = roc_auc_score(y_test, y_hat)
roc1, roc2, roc3



(0.6131027184680355, 0.6230725593323428, 0.6187247137179586)

In [24]:
y_train.value_counts()

1    12730
0     7270
Name: MonkeyPox, dtype: int64