In [1]:
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
from flaml.automl import AutoML
from collections import Counter
# from lazyqsar.models import LazyXGBoostBinaryClassifier as LQ_XGB
# from lazyqsar.models import LazyZSRFBinaryClassifier as LQ_zsRF
import sys
import os
sys.path.insert(0, os.path.abspath("../../lazy-qsar/lazyqsar/models"))
from xgboost_binary_classifier import LazyXGBoostBinaryClassifier as LQ_XGB
from zsRF_binary_classifier import LazyZSRFBinaryClassifier as LQ_zsRF
import pandas as pd
import numpy as np
import collections
import joblib

In [None]:
# Get all pathogens i.e. {pathogen}_{target}
PATHOGENS = sorted(os.listdir(os.path.join("..", "data")))[:1]

# Define some paths
PATH_TO_FEATURES = os.path.join("..", "output", "02_features")
PATH_TO_OUTPUT = os.path.join("..", "output", "03_baseline_models")

for pathogen in PATHOGENS:

    print(f"----------------------- PATHOGEN: {pathogen} ---------------------------")

    # Get list of tasks
    tasks = sorted(os.listdir(os.path.join("..", "data", pathogen)))

    # For each task
    for task in tasks:

        # # if task is not done yet
        # if os.path.exists(os.path.join(PATH_TO_OUTPUT, pathogen, task.replace(".csv", ""), "LQ_optuna_CV.csv")) == True:
        #     continue

        print(f"TASK: {task}")

        # Create output_dir
        output_dir = os.path.join(PATH_TO_OUTPUT, pathogen, task.replace(".csv", ""))
        os.makedirs(output_dir, exist_ok=True)


        # Get IK to MFP
        IKs = open(os.path.join(PATH_TO_FEATURES, pathogen, 'IKS.txt')).read().splitlines()
        MFPs = np.load(os.path.join(PATH_TO_FEATURES, pathogen, "X.npz"))['X']
        IK_TO_MFP = {i: j for i, j in zip(IKs, MFPs)}

        # Load data
        df = pd.read_csv(os.path.join("..", "data", pathogen, task))
        cols = df.columns.tolist()
        X, Y = [], []
        for ik, act in zip(df['inchikey'], df[cols[2]]):
            if ik in IK_TO_MFP:
                X.append(IK_TO_MFP[ik])
                Y.append(act)

        # To np.array
        X = np.array(X)
        Y = np.array(Y)

        # Cross-validations
        skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
        aurocs_zsRF, aurocs_XGB = [], []
        for train_index, test_index in skf.split(X, Y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]

            # XGB
            model_XGB = LQ_XGB(reducer_method='pca', max_reducer_dim=100, base_num_splits=1, base_test_size=0.2)
            model_XGB.fit(X=X_train, y=y_train)
            fpr, tpr, _ = roc_curve(y_test, model_XGB.predict(X_test))
            auroc = auc(fpr, tpr)
            aurocs_XGB.append(auroc)

            # zsRF
            model_zsRF = LQ_zsRF(reducer_method='pca', max_reducer_dim=100, base_num_splits=1, base_test_size=0.2)
            model_zsRF.fit(X=X_train, y=y_train)
            fpr, tpr, _ = roc_curve(y_test, model_zsRF.predict(X_test))
            auroc = auc(fpr, tpr)
            aurocs_zsRF.append(auroc)

        # Save AUROC CVs zsRF
        with open(os.path.join(PATH_TO_OUTPUT, pathogen, task.replace(".csv", ""), "LQ_zsRF_CV.csv"), "w") as f:
            f.write(",".join([str(round(i, 4)) for i in aurocs_zsRF]))

        # Save AUROC CVs XGB
        with open(os.path.join(PATH_TO_OUTPUT, pathogen, task.replace(".csv", ""), "LQ_XGB_CV.csv"), "w") as f:
            f.write(",".join([str(round(i, 4)) for i in aurocs_XGB
                              ]))

----------------------- PATHOGEN: abaumannii_organism ---------------------------
TASK: 1_assay_CHEMBL4296188_Inhibition_percentage_activity_percentile_50_ORGANISM_2.csv
Total samples: 35812, positive samples: 7162, negative samples: 28650
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 7162, negative samples per round: 28650
Desired positive proportion: 0.19998883055958896 Actual positive proportion:  0.19998883055958896


[I 2025-06-05 16:28:20,584] A new study created in memory with name: no-name-30f0a387-a214-494a-bead-b60db53286bc


Fitting model on 35812 samples, positive samples: 7162, negative samples: 28650, number of features 100
Fitting...


[I 2025-06-05 16:28:20,840] Trial 0 finished with value: 0.9145740699468645 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.9145740699468645.
[I 2025-06-05 16:28:21,205] Trial 1 finished with value: 0.8760070831034614 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.9145740699468645.
[I 2025-06-05 16:28:21,523] Trial 2 finished with value: 0.7783084827958285 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.452048365748842e-05, 'subsample': 0.8281407691144109, 'colsample_bytree': 0.3597390257266878, 'max_depth': 7,

Best AUROC: 0.9306
Model fitted.
Total samples: 35812, positive samples: 7162, negative samples: 28650
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 7162, negative samples per round: 28650
Desired positive proportion: 0.19998883055958896 Actual positive proportion:  0.19998883055958896
Fitting model on 35812 samples, positive samples: 7162, negative samples: 28650, number of features 100
Model fitted.
Total samples: 35813, positive samples: 7163, negative samples: 28650
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 7163, negative samples per round: 28650
Desired positive proportion: 0.20001116912852876 Actual positive proportion:  0.20001116912852876


[I 2025-06-05 16:37:53,507] A new study created in memory with name: no-name-a175133e-50f3-443c-a77c-33a870fcab4a


Fitting model on 35813 samples, positive samples: 7163, negative samples: 28650, number of features 100
Fitting...


[I 2025-06-05 16:37:53,781] Trial 0 finished with value: 0.9200956267681879 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.9200956267681879.
[I 2025-06-05 16:37:53,997] Trial 1 finished with value: 0.8778585425321122 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.9200956267681879.
[I 2025-06-05 16:37:54,333] Trial 2 finished with value: 0.8124627789976728 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.452048365748842e-05, 'subsample': 0.8281407691144109, 'colsample_bytree': 0.3597390257266878, 'max_depth': 7,

Best AUROC: 0.9354
Model fitted.
Total samples: 35813, positive samples: 7163, negative samples: 28650
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 7163, negative samples per round: 28650
Desired positive proportion: 0.20001116912852876 Actual positive proportion:  0.20001116912852876
Fitting model on 35813 samples, positive samples: 7163, negative samples: 28650, number of features 100
Model fitted.
Total samples: 35813, positive samples: 7163, negative samples: 28650
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 7163, negative samples per round: 28650
Desired positive proportion: 0.20001116912852876 Actual positive proportion:  0.20001116912852876


[I 2025-06-05 16:47:14,592] A new study created in memory with name: no-name-cc2316d8-5567-4225-919f-60fde4396c17


Fitting model on 35813 samples, positive samples: 7163, negative samples: 28650, number of features 100
Fitting...


[I 2025-06-05 16:47:14,921] Trial 0 finished with value: 0.9156756289359879 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.9156756289359879.
[I 2025-06-05 16:47:15,455] Trial 1 finished with value: 0.8749554565837179 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.9156756289359879.
[I 2025-06-05 16:47:16,549] Trial 2 finished with value: 0.7542109512866135 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.452048365748842e-05, 'subsample': 0.8281407691144109, 'colsample_bytree': 0.3597390257266878, 'max_depth': 7,

Best AUROC: 0.9289
Model fitted.
Total samples: 35813, positive samples: 7163, negative samples: 28650
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 7163, negative samples per round: 28650
Desired positive proportion: 0.20001116912852876 Actual positive proportion:  0.20001116912852876
Fitting model on 35813 samples, positive samples: 7163, negative samples: 28650, number of features 100
Model fitted.
TASK: 1_assay_CHEMBL4296188_Inhibition_percentage_activity_percentile_5_ORGANISM_1.csv
Total samples: 14321, positive samples: 714, negative samples: 13607
Positive proportion: 0.05
Sampling rounds: 1, positive samples per round: 714, negative samples per round: 13607
Desired positive proportion: 0.049856853571677955 Actual positive proportion:  0.049856853571677955


[I 2025-06-05 16:58:53,111] A new study created in memory with name: no-name-35ceb1dc-ac1b-42c6-a614-6a2b60af0d19
[I 2025-06-05 16:58:53,253] Trial 0 finished with value: 0.7432266484434009 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.7432266484434009.


Fitting model on 14321 samples, positive samples: 714, negative samples: 13607, number of features 100
Fitting...


[I 2025-06-05 16:58:54,150] Trial 1 finished with value: 0.6869280609177744 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.7432266484434009.
[I 2025-06-05 16:58:54,274] Trial 2 finished with value: 0.5 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.452048365748842e-05, 'subsample': 0.8281407691144109, 'colsample_bytree': 0.3597390257266878, 'max_depth': 7, 'min_child_weight': 7, 'eta': 2.3528990899815284e-08, 'gamma': 0.0007250347382396634, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.7432266484434009.
[I 2025-06-05 16:58:54,792] Trial 3 finished with value: 0.7434707100394096 and parameters: {'booster': 'gblinear', 'lambda': 2.734702913886802e-06, 'alpha

Best AUROC: 0.7449
Model fitted.
Total samples: 14321, positive samples: 714, negative samples: 13607
Positive proportion: 0.05
Sampling rounds: 1, positive samples per round: 714, negative samples per round: 13607
Desired positive proportion: 0.049856853571677955 Actual positive proportion:  0.049856853571677955
Fitting model on 14321 samples, positive samples: 714, negative samples: 13607, number of features 100
Model fitted.
Total samples: 14321, positive samples: 715, negative samples: 13606
Positive proportion: 0.05
Sampling rounds: 1, positive samples per round: 715, negative samples per round: 13606
Desired positive proportion: 0.04992668109768871 Actual positive proportion:  0.04992668109768871


[I 2025-06-05 17:00:38,924] A new study created in memory with name: no-name-2d3ea260-3342-4438-9eee-9f73239f1376


Fitting model on 14321 samples, positive samples: 715, negative samples: 13606, number of features 100
Fitting...


[I 2025-06-05 17:00:39,509] Trial 0 finished with value: 0.7488657558459175 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.7488657558459175.
[I 2025-06-05 17:00:40,896] Trial 1 finished with value: 0.7042795558592766 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.7488657558459175.
[I 2025-06-05 17:00:41,104] Trial 2 finished with value: 0.5 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.452048365748842e-05, 'subsample': 0.8281407691144109, 'colsample_bytree': 0.3597390257266878, 'max_depth': 7, 'min_child_wei

Best AUROC: 0.7591
Model fitted.
Total samples: 14321, positive samples: 715, negative samples: 13606
Positive proportion: 0.05
Sampling rounds: 1, positive samples per round: 715, negative samples per round: 13606
Desired positive proportion: 0.04992668109768871 Actual positive proportion:  0.04992668109768871
Fitting model on 14321 samples, positive samples: 715, negative samples: 13606, number of features 100
Model fitted.
Total samples: 14322, positive samples: 715, negative samples: 13607
Positive proportion: 0.05
Sampling rounds: 1, positive samples per round: 715, negative samples per round: 13607
Desired positive proportion: 0.04992319508448541 Actual positive proportion:  0.04992319508448541


[I 2025-06-05 17:02:31,443] A new study created in memory with name: no-name-df2f942b-ade0-47b7-8e2e-265c59f066de


Fitting model on 14322 samples, positive samples: 715, negative samples: 13607, number of features 100
Fitting...


[I 2025-06-05 17:02:31,753] Trial 0 finished with value: 0.7325367505382201 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.7325367505382201.
[I 2025-06-05 17:02:32,026] Trial 1 finished with value: 0.723064591543651 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.7325367505382201.
[I 2025-06-05 17:02:33,119] Trial 2 finished with value: 0.508107983126352 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.452048365748842e-05, 'subsample': 0.8281407691144109, 'colsample_bytree': 0.3597390257266878, 'max_depth': 7, '

Best AUROC: 0.7747
Model fitted.
Total samples: 14322, positive samples: 715, negative samples: 13607
Positive proportion: 0.05
Sampling rounds: 1, positive samples per round: 715, negative samples per round: 13607
Desired positive proportion: 0.04992319508448541 Actual positive proportion:  0.04992319508448541
Fitting model on 14322 samples, positive samples: 715, negative samples: 13607, number of features 100
Model fitted.
TASK: 1_assay_CHEMBL4296188_MIC_pchembl_percentile_10_ORGANISM_3.csv
Total samples: 343, positive samples: 68, negative samples: 275
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 68, negative samples per round: 275
Desired positive proportion: 0.19825072886297376 Actual positive proportion:  0.19825072886297376


[I 2025-06-05 17:05:34,973] A new study created in memory with name: no-name-9e76c5cd-2d58-4f38-9daa-6ca29ac72cbd
[I 2025-06-05 17:05:35,100] Trial 0 finished with value: 0.6649350649350649 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.6649350649350649.
[I 2025-06-05 17:05:35,110] Trial 1 finished with value: 0.5285714285714286 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.6649350649350649.
[I 2025-06-05 17:05:35,132] Trial 2 finished with value: 0.5 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.4520483657

Fitting model on 343 samples, positive samples: 68, negative samples: 275, number of features 100
Fitting...


[I 2025-06-05 17:05:35,230] Trial 5 finished with value: 0.7207792207792209 and parameters: {'booster': 'gblinear', 'lambda': 0.5710537951126793, 'alpha': 0.01588775693167255, 'subsample': 0.9515991532513512, 'colsample_bytree': 0.915861880342119}. Best is trial 4 with value: 0.725974025974026.
[I 2025-06-05 17:05:35,237] Trial 6 finished with value: 0.6584415584415585 and parameters: {'booster': 'gblinear', 'lambda': 3.697114486625506e-07, 'alpha': 2.300479202014574e-08, 'subsample': 0.4602642646106115, 'colsample_bytree': 0.5109418317515857}. Best is trial 4 with value: 0.725974025974026.
[I 2025-06-05 17:05:35,250] Trial 7 finished with value: 0.6610389610389611 and parameters: {'booster': 'gblinear', 'lambda': 1.7679748286442581e-06, 'alpha': 0.0002195678075127562, 'subsample': 0.31273937997981016, 'colsample_bytree': 0.8417575846032317}. Best is trial 4 with value: 0.725974025974026.
[I 2025-06-05 17:05:35,328] Trial 8 finished with value: 0.6636363636363636 and parameters: {'boos

Best AUROC: 0.8519
Model fitted.
Total samples: 343, positive samples: 68, negative samples: 275
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 68, negative samples per round: 275
Desired positive proportion: 0.19825072886297376 Actual positive proportion:  0.19825072886297376
Fitting model on 343 samples, positive samples: 68, negative samples: 275, number of features 100
Model fitted.
Total samples: 343, positive samples: 69, negative samples: 274
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 69, negative samples per round: 274
Desired positive proportion: 0.20116618075801748 Actual positive proportion:  0.20116618075801748


[I 2025-06-05 17:06:45,367] A new study created in memory with name: no-name-898632d8-4961-4ca0-8e9d-de58ad07905c
[I 2025-06-05 17:06:45,498] Trial 0 finished with value: 0.8441558441558442 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.8441558441558442.
[I 2025-06-05 17:06:45,511] Trial 1 finished with value: 0.7103896103896105 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.8441558441558442.
[I 2025-06-05 17:06:45,538] Trial 2 finished with value: 0.5 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.4520483657

Fitting model on 343 samples, positive samples: 69, negative samples: 274, number of features 100
Fitting...


[I 2025-06-05 17:06:45,607] Trial 3 finished with value: 0.8337662337662338 and parameters: {'booster': 'gblinear', 'lambda': 2.734702913886802e-06, 'alpha': 6.044730070370796e-08, 'subsample': 0.7473864212097256, 'colsample_bytree': 0.5521219949916811}. Best is trial 0 with value: 0.8441558441558442.
[I 2025-06-05 17:06:45,621] Trial 4 finished with value: 0.8376623376623377 and parameters: {'booster': 'gblinear', 'lambda': 0.1881755597772026, 'alpha': 1.1755466083160747e-06, 'subsample': 0.7300178274831857, 'colsample_bytree': 0.4493688608715288}. Best is trial 0 with value: 0.8441558441558442.
[I 2025-06-05 17:06:45,632] Trial 5 finished with value: 0.6857142857142857 and parameters: {'booster': 'gblinear', 'lambda': 0.5710537951126793, 'alpha': 0.01588775693167255, 'subsample': 0.9515991532513512, 'colsample_bytree': 0.915861880342119}. Best is trial 0 with value: 0.8441558441558442.
[I 2025-06-05 17:06:45,642] Trial 6 finished with value: 0.8324675324675326 and parameters: {'boost

Best AUROC: 0.9052
Model fitted.
Total samples: 343, positive samples: 69, negative samples: 274
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 69, negative samples per round: 274
Desired positive proportion: 0.20116618075801748 Actual positive proportion:  0.20116618075801748
Fitting model on 343 samples, positive samples: 69, negative samples: 274, number of features 100
Model fitted.
Total samples: 344, positive samples: 69, negative samples: 275
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 69, negative samples per round: 275
Desired positive proportion: 0.2005813953488372 Actual positive proportion:  0.2005813953488372


[I 2025-06-05 17:07:48,802] A new study created in memory with name: no-name-fdb191ad-7c19-4983-99df-1e84874b491d


Fitting model on 344 samples, positive samples: 69, negative samples: 275, number of features 100
Fitting...


[I 2025-06-05 17:07:49,097] Trial 0 finished with value: 0.7376623376623377 and parameters: {'booster': 'gblinear', 'lambda': 0.0006155564318973012, 'alpha': 1.77071686435378e-07, 'subsample': 0.32479561626896214, 'colsample_bytree': 0.24646688973455957}. Best is trial 0 with value: 0.7376623376623377.
[I 2025-06-05 17:07:49,106] Trial 1 finished with value: 0.6616883116883117 and parameters: {'booster': 'gbtree', 'lambda': 1.4610865886287176e-08, 'alpha': 0.574485163632042, 'subsample': 0.8659541126403374, 'colsample_bytree': 0.36987128854262097, 'max_depth': 3, 'min_child_weight': 3, 'eta': 2.716051144654844e-06, 'gamma': 0.00015777981883364995, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.7376623376623377.
[I 2025-06-05 17:07:49,118] Trial 2 finished with value: 0.5 and parameters: {'booster': 'gbtree', 'lambda': 8.528933855762793e-06, 'alpha': 4.452048365748842e-05, 'subsample': 0.8281407691144109, 'colsample_bytree': 0.3597390257266878, 'max_depth': 7, 'min_child_wei

Best AUROC: 0.8805
Model fitted.
Total samples: 344, positive samples: 69, negative samples: 275
Positive proportion: 0.20
Sampling rounds: 1, positive samples per round: 69, negative samples per round: 275
Desired positive proportion: 0.2005813953488372 Actual positive proportion:  0.2005813953488372
Fitting model on 344 samples, positive samples: 69, negative samples: 275, number of features 100
Model fitted.


: 