<a href="https://colab.research.google.com/github/epicskills1/Final_yr_Project/blob/main/Finalyr_proj_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from scipy import io
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import warnings
warnings.filterwarnings("ignore")

# --- Load data ---
y_tr = pd.read_csv('/content/tox21_labels_train.csv.gz', index_col=0, compression="gzip")
y_te = pd.read_csv('/content/tox21_labels_test.csv.gz', index_col=0, compression="gzip")
x_tr_dense = pd.read_csv('/content/tox21_dense_train.csv.gz', index_col=0, compression="gzip").values
x_te_dense = pd.read_csv('/content/tox21_dense_test.csv.gz', index_col=0, compression="gzip").values
x_tr_sparse = io.mmread('/content/tox21_sparse_train.mtx.gz').tocsc()
x_te_sparse = io.mmread('/content/tox21_sparse_test.mtx.gz').tocsc()

# --- Combine dense + sparse features ---
sparse_col_idx = np.asarray((x_tr_sparse > 0).mean(0) > 0.05).ravel()
x_tr = np.hstack([x_tr_dense, x_tr_sparse[:, sparse_col_idx].toarray()])
x_te = np.hstack([x_te_dense, x_te_sparse[:, sparse_col_idx].toarray()])

# --- Normalize ---
scaler = StandardScaler()
x_tr = np.tanh(scaler.fit_transform(x_tr))
x_te = np.tanh(scaler.transform(x_te))

# --- Define NN model ---
def create_model(input_dim):
    model = Sequential([
        Dense(256, input_dim=input_dim, kernel_regularizer=l2(1e-5)),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),
        Dropout(0.4),
        Dense(128, kernel_regularizer=l2(1e-5)),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    return model

# --- Callbacks ---
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

# --- Store metrics ---
auc_nn, auc_xgb, auc_lgb, auc_weighted, auc_stacking = [], [], [], [], []

for target in y_tr.columns:
    print(f"\n🔬 Training on assay: {target}")
    valid_rows = np.isfinite(y_tr[target]).values
    x_target, y_target = x_tr[valid_rows], y_tr[target][valid_rows]

    # SMOTE balancing
    x_balanced, y_balanced = SMOTE(random_state=42).fit_resample(x_target, y_target)
    x_train, x_val, y_train, y_val = train_test_split(x_balanced, y_balanced, test_size=0.2, random_state=42)

    # --- NN ---
    model = create_model(input_dim=x_tr.shape[1])
    model.compile(optimizer=Adam(learning_rate=1e-3), loss='binary_crossentropy', metrics=['AUC'])
    model.fit(x_train, y_train, validation_data=(x_val, y_val),
              epochs=100, batch_size=32, callbacks=[early_stopping, reduce_lr], verbose=0)

    # --- XGBoost ---
    xgb_model = XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.05,
                              subsample=0.8, colsample_bytree=0.8,
                              use_label_encoder=False, eval_metric='logloss', random_state=42)
    xgb_model.fit(x_train, y_train)

    # --- LightGBM ---
    lgb_model = LGBMClassifier(n_estimators=300, learning_rate=0.05, max_depth=6, random_state=42)
    lgb_model.fit(x_train, y_train)

    # --- Test set ---
    valid_test_rows = np.isfinite(y_te[target]).values
    x_test_target = x_te[valid_test_rows]
    y_test = y_te[target][valid_test_rows].values

    y_pred_nn = model.predict(x_test_target).ravel()
    y_pred_xgb = xgb_model.predict_proba(x_test_target)[:, 1]
    y_pred_lgb = lgb_model.predict_proba(x_test_target)[:, 1]

    # --- Weighted Ensemble ---
    auc_nn_val = roc_auc_score(y_val, model.predict(x_val).ravel())
    auc_xgb_val = roc_auc_score(y_val, xgb_model.predict_proba(x_val)[:, 1])
    auc_lgb_val = roc_auc_score(y_val, lgb_model.predict_proba(x_val)[:, 1])

    sum_auc = auc_nn_val + auc_xgb_val + auc_lgb_val
    w_nn = auc_nn_val / sum_auc
    w_xgb = auc_xgb_val / sum_auc
    w_lgb = auc_lgb_val / sum_auc

    y_pred_weighted = w_nn * y_pred_nn + w_xgb * y_pred_xgb + w_lgb * y_pred_lgb

    # --- Stacking Ensemble ---
    stack_train = np.vstack([model.predict(x_val).ravel(),
                             xgb_model.predict_proba(x_val)[:, 1],
                             lgb_model.predict_proba(x_val)[:, 1]]).T
    meta_model = LogisticRegression().fit(stack_train, y_val)
    stack_test = np.vstack([y_pred_nn, y_pred_xgb, y_pred_lgb]).T
    y_pred_stack = meta_model.predict_proba(stack_test)[:, 1]

    # --- AUCs ---
    auc_nn.append(roc_auc_score(y_test, y_pred_nn))
    auc_xgb.append(roc_auc_score(y_test, y_pred_xgb))
    auc_lgb.append(roc_auc_score(y_test, y_pred_lgb))
    auc_weighted.append(roc_auc_score(y_test, y_pred_weighted))
    auc_stacking.append(roc_auc_score(y_test, y_pred_stack))

    print(f"{target}:")
    print(f"  ➤ NN AUC         = {auc_nn[-1]:.3f}")
    print(f"  ➤ XGBoost AUC    = {auc_xgb[-1]:.3f}")
    print(f"  ➤ LightGBM AUC   = {auc_lgb[-1]:.3f}")
    print(f"  ➤ Weighted AUC   = {auc_weighted[-1]:.3f}")
    print(f"  ➤ Stacking AUC   = {auc_stacking[-1]:.3f}")

# --- Final Summary ---
print("\n📊 Final Average AUCs:")
print(f"  ➤ Neural Network : {np.mean(auc_nn):.3f}")
print(f"  ➤ XGBoost        : {np.mean(auc_xgb):.3f}")
print(f"  ➤ LightGBM       : {np.mean(auc_lgb):.3f}")
print(f"  ➤ Weighted Ens.  : {np.mean(auc_weighted):.3f}")
print(f"  ➤ Stacking Ens.  : {np.mean(auc_stacking):.3f}")



🔬 Training on assay: NR.AhR


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 5946, number of negative: 5990
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.354879 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 336670
[LightGBM] [Info] Number of data points in the train set: 11936, number of used features: 1629
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498157 -> initscore=-0.007373
[LightGBM] [Info] Start training from score -0.007373
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m78/94[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 1ms/step



[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m 1/94[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 17ms/step



[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step




NR.AhR:
  ➤ NN AUC         = 0.898
  ➤ XGBoost AUC    = 0.905
  ➤ LightGBM AUC   = 0.904
  ➤ Weighted AUC   = 0.911
  ➤ Stacking AUC   = 0.909

🔬 Training on assay: NR.AR


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 7443, number of negative: 7390
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.765685 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 373926
[LightGBM] [Info] Number of data points in the train set: 14833, number of used features: 1619
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501787 -> initscore=0.007146
[LightGBM] [Info] Start training from score 0.007146
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m 80/116[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 1ms/step



[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m  1/116[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 15ms/step



[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step




NR.AR:
  ➤ NN AUC         = 0.717
  ➤ XGBoost AUC    = 0.717
  ➤ LightGBM AUC   = 0.745
  ➤ Weighted AUC   = 0.736
  ➤ Stacking AUC   = 0.735

🔬 Training on assay: NR.AR.LBD


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 6864, number of negative: 6808
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.577818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 337280
[LightGBM] [Info] Number of data points in the train set: 13672, number of used features: 1629
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.502048 -> initscore=0.008192
[LightGBM] [Info] Start training from score 0.008192
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m 38/107[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m0s[0m 1ms/step 



[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m  1/107[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 23ms/step



[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step




NR.AR.LBD:
  ➤ NN AUC         = 0.867
  ➤ XGBoost AUC    = 0.830
  ➤ LightGBM AUC   = 0.805
  ➤ Weighted AUC   = 0.853
  ➤ Stacking AUC   = 0.854

🔬 Training on assay: NR.Aromatase


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 5627, number of negative: 5672
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.279448 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 383073
[LightGBM] [Info] Number of data points in the train set: 11299, number of used features: 1619
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498009 -> initscore=-0.007965
[LightGBM] [Info] Start training from score -0.007965
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
[1m69/89[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 1ms/step



[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m 1/89[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 16ms/step



[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
NR.Aromatase:
  ➤ NN AUC         = 0.730
  ➤ XGBoost AUC    = 0.790
  ➤ LightGBM AUC   = 0.792
  ➤ Weighted AUC   = 0.795
  ➤ Stacking AUC   = 0.794

🔬 Training on assay: NR.ER


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 5604, number of negative: 5592
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.250000 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325511
[LightGBM] [Info] Number of data points in the train set: 11196, number of used features: 1629
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500536 -> initscore=0.002144
[LightGBM] [Info] Start training from score 0.002144
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
[1m77/88[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 1ms/step



[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m30/88[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 2ms/step 



[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step




NR.ER:
  ➤ NN AUC         = 0.739
  ➤ XGBoost AUC    = 0.771
  ➤ LightGBM AUC   = 0.775
  ➤ Weighted AUC   = 0.772
  ➤ Stacking AUC   = 0.768

🔬 Training on assay: NR.ER.LBD


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 6892, number of negative: 6842
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.684728 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 350453
[LightGBM] [Info] Number of data points in the train set: 13734, number of used features: 1629
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501820 -> initscore=0.007281
[LightGBM] [Info] Start training from score 0.007281
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
[1m 76/108[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 1ms/step



[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m  1/108[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 16ms/step



[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step




NR.ER.LBD:
  ➤ NN AUC         = 0.713
  ➤ XGBoost AUC    = 0.761
  ➤ LightGBM AUC   = 0.810
  ➤ Weighted AUC   = 0.796
  ➤ Stacking AUC   = 0.794

🔬 Training on assay: NR.PPAR.gamma


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 6575, number of negative: 6567
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.379499 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 386672
[LightGBM] [Info] Number of data points in the train set: 13142, number of used features: 1630
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500304 -> initscore=0.001217
[LightGBM] [Info] Start training from score 0.001217
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m 76/103[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 1ms/step



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m  1/103[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 15ms/step



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step




NR.PPAR.gamma:
  ➤ NN AUC         = 0.842
  ➤ XGBoost AUC    = 0.814
  ➤ LightGBM AUC   = 0.785
  ➤ Weighted AUC   = 0.824
  ➤ Stacking AUC   = 0.826

🔬 Training on assay: SR.ARE


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 5006, number of negative: 5002
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.279956 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 318298
[LightGBM] [Info] Number of data points in the train set: 10008, number of used features: 1619
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500200 -> initscore=0.000799
[LightGBM] [Info] Start training from score 0.000799
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
[1m64/79[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 2ms/step



[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m38/79[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m0s[0m 1ms/step 



[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step




SR.ARE:
  ➤ NN AUC         = 0.756
  ➤ XGBoost AUC    = 0.789
  ➤ LightGBM AUC   = 0.801
  ➤ Weighted AUC   = 0.797
  ➤ Stacking AUC   = 0.795

🔬 Training on assay: SR.ATAD5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 7206, number of negative: 7194
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.499862 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 380829
[LightGBM] [Info] Number of data points in the train set: 14400, number of used features: 1629
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500417 -> initscore=0.001667
[LightGBM] [Info] Start training from score 0.001667
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
[1m 78/113[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 1ms/step



[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m  1/113[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 16ms/step



[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step




SR.ATAD5:
  ➤ NN AUC         = 0.771
  ➤ XGBoost AUC    = 0.834
  ➤ LightGBM AUC   = 0.824
  ➤ Weighted AUC   = 0.829
  ➤ Stacking AUC   = 0.830

🔬 Training on assay: SR.HSE


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 6398, number of negative: 6368
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.449643 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377662
[LightGBM] [Info] Number of data points in the train set: 12766, number of used features: 1630
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501175 -> initscore=0.004700
[LightGBM] [Info] Start training from score 0.004700
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m 64/100[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 2ms/step



[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m  1/100[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 21ms/step



[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step




SR.HSE:
  ➤ NN AUC         = 0.849
  ➤ XGBoost AUC    = 0.804
  ➤ LightGBM AUC   = 0.810
  ➤ Weighted AUC   = 0.844
  ➤ Stacking AUC   = 0.847

🔬 Training on assay: SR.MMP


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 5082, number of negative: 5122
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.349302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314129
[LightGBM] [Info] Number of data points in the train set: 10204, number of used features: 1619
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498040 -> initscore=-0.007840
[LightGBM] [Info] Start training from score -0.007840
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step
[1m77/80[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step



[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m38/80[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m0s[0m 1ms/step 



[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step




SR.MMP:
  ➤ NN AUC         = 0.923
  ➤ XGBoost AUC    = 0.945
  ➤ LightGBM AUC   = 0.938
  ➤ Weighted AUC   = 0.948
  ➤ Stacking AUC   = 0.947

🔬 Training on assay: SR.p53


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 6700, number of negative: 6640
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.477958 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 392839
[LightGBM] [Info] Number of data points in the train set: 13340, number of used features: 1630
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.502249 -> initscore=0.008996
[LightGBM] [Info] Start training from score 0.008996
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
[1m 30/105[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 2ms/step 



[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step




[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
SR.p53:
  ➤ NN AUC         = 0.823
  ➤ XGBoost AUC    = 0.829
  ➤ LightGBM AUC   = 0.846
  ➤ Weighted AUC   = 0.856
  ➤ Stacking AUC   = 0.856

📊 Final Average AUCs:
  ➤ Neural Network : 0.802
  ➤ XGBoost        : 0.816
  ➤ LightGBM       : 0.820
  ➤ Weighted Ens.  : 0.830
  ➤ Stacking Ens.  : 0.830




In [None]:
pip install -U xgboost


Collecting xgboost
  Downloading xgboost-3.0.1-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.1-py3-none-manylinux_2_28_x86_64.whl (253.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.9/253.9 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xgboost
  Attempting uninstall: xgboost
    Found existing installation: xgboost 2.1.4
    Uninstalling xgboost-2.1.4:
      Successfully uninstalled xgboost-2.1.4
Successfully installed xgboost-3.0.1
