In [1]:
# importing libraries 
import pandas as pd
import torch
import random
from tqdm import tqdm
from torch.utils.data import DataLoader
import itertools
from transformers import Trainer, TrainingArguments, pipeline, set_seed, AutoTokenizer, BioGptModel, AutoModelForMaskedLM, DataCollatorForLanguageModeling, AutoModel
from torch.utils.data import TensorDataset
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold
import pickle
from sklearn.metrics import roc_auc_score, average_precision_score
import xgboost as xgb
import numpy as np
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import os
from datasets import Dataset

if tf.test.is_gpu_available():
    print('GPU is available')
else:
    print('GPU is not available, running on CPU')

# df=pd.read_csv("data.csv")
df=pd.read_csv("df.csv")
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model = AutoModelForMaskedLM.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

# preparing the clinical notes for training and fine-tuning
def tokenize(batch):
    tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
    return tokenizer(batch["text"], padding=True, truncation=True, max_length=36)

def prepare_data(df):
    texts=list(df["AN_PROC_NAME"])
    # Tokenize texts and create a dataset
    data = {"text": texts}
    dataset = Dataset.from_dict(data)
    # Preprocess the dataset
    dataset = dataset.map(tokenize, batched=True, batch_size=len(texts))
    # Set the format of the dataset
    dataset.set_format("torch", columns=["input_ids", "attention_mask"])
    return(dataset)


  from .autonotebook import tqdm as notebook_tqdm
2023-10-02 20:15:03.997116: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU is available


2023-10-02 20:15:06.267801: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-02 20:15:06.268034: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-02 20:15:06.269243: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [2]:
len(df)

84875

In [3]:
# this here fine-tunes the models with the given parameters
def get_model(name, train_dataset,val_dataset):
    training_args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=4,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=2500,
        # weight_decay=0.001,
        learning_rate=0.0001,
        logging_dir=None,
        logging_steps=1000,
        save_strategy="epoch",)
    # Load the ClinicalBERT model with a masked language modeling head, used NSP as well if there are >1 sentence in the text
    model = AutoModelForMaskedLM.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
    # Define the data collator for masked language modeling
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
    # Instantiate the Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset, 
        tokenizer=tokenizer,
        data_collator=data_collator,)
    trainer.train()
    trainer.model.save_pretrained(name)
    trainer.tokenizer.save_pretrained(name) 

In [4]:
# this get the embeddings of the fine-tuned model
def get_bioclinicalBERT_updated(texts, name):
    notes = list(texts)
    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModel.from_pretrained(name)

    # Check if CUDA is available and if so, move the model to GPU
    if torch.cuda.is_available():
        model = model.to("cuda")

    # Tokenize texts and create input IDs and attention masks
    input_ids, attention_masks = [], []
    for text in notes:
        encoded_input = tokenizer.encode_plus(text, max_length=296, padding='max_length', truncation=True,
                                              return_attention_mask=True, return_tensors='pt')  # change max_length if needed [NOTE: max_length may differ across task]
        input_ids.append(encoded_input['input_ids'])
        attention_masks.append(encoded_input['attention_mask'])

    # Convert lists to tensors
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)

    # If CUDA is available, move the tensors to GPU
    if torch.cuda.is_available():
        input_ids = input_ids.to("cuda")
        attention_masks = attention_masks.to("cuda")

    # Batch size for processing
    batch_size = 32
    # Initialize an empty list to store embeddings
    embeddings = []
    # Process input IDs and attention masks in batches
    for i in tqdm(range(0, len(input_ids), batch_size)):
        input_ids_batch = input_ids[i:i+batch_size]
        attention_masks_batch = attention_masks[i:i+batch_size]

        with torch.no_grad():
            # Get the model's output
            output = model(input_ids=input_ids_batch, attention_mask=attention_masks_batch)

        # Retrieve the last_hidden_state from the output
        last_hidden_state = output.last_hidden_state

        # Optionally, you can obtain the pooled_output by averaging or max-pooling the last_hidden_state
        pooled_output = torch.mean(last_hidden_state, dim=1)

        # If CUDA is available, move the embeddings back to CPU
        if torch.cuda.is_available():
            pooled_output = pooled_output.to("cpu")

        # Add the embeddings to the list
        embeddings.extend(pooled_output.numpy())

    embeddings = np.array(embeddings)
    return embeddings


In [5]:
def K_fold_val(outcome_col, df):
    #print(outcome_col)
    # Initialize the StratifiedKFold class
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # Initialize lists to hold scores
    auroc_scores = []
    auprc_scores = []

    param_grid = {
        'learning_rate': [0.1,0.15,0.3],
        'max_depth': [4,5,6,7,8],
        'min_child_weight':[1,2,4]
        }
    i=1
    # Perform 5-fold cross validation
    for train_index, test_index in tqdm(skf.split(df,df["PE"])):
        # Split the data into train/test sets
        train, test = df.iloc[train_index], df.iloc[test_index]
        # Create a new column for embeddings based on the training set only
        if outcome_col=="postop_del":
            name=f"{outcome_col}_fold_{i}"
        else:
            name=f"fold_{i}"
        #name=f"{outcome_col}_fold_{i}"
        i=i+1
        train_data=((train.sample(frac=7/8,random_state=42)))
        val_data = train[~train.index.isin(train_data.index)]
        train_data=train_data.reset_index(drop=True)
        val_data=val_data.reset_index(drop=True)
        train_dataset = prepare_data(train_data)
        val_dataset = prepare_data(val_data)
        if os.path.isdir(name):
            pass
        else:
            get_model(name, train_dataset,val_dataset)
        # print(f"{name}.pickle")
        if os.path.isfile(f"{name}.pickle"): 
            # print("file present")
            with open(f"{name}.pickle", 'rb') as file:
                data = pickle.load(file)
                X_train=data[0]
                X_test=data[1]
        else:
            X_train=get_bioclinicalBERT_updated(train["AN_PROC_NAME"], name)
            X_test = get_bioclinicalBERT_updated(test["AN_PROC_NAME"], name)
        
            with open(f"{name}.pickle", 'wb') as f:
                pickle.dump([X_train,X_test], f)


        y_train = train[outcome_col]
        y_test = test[outcome_col]
        # Prepare training and testing data w XGBoost
        model = xgb.XGBClassifier(random_state=42,tree_method = "hist", device = "cuda")
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='average_precision',verbose=2)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict_proba(X_test)[:, 1]

        # Compute the AUROC and AUPRC
        auroc = roc_auc_score(y_test, y_pred)
        auprc = average_precision_score(y_test, y_pred)

        # Save the scores
        auroc_scores.append(auroc)
        auprc_scores.append(auprc)
        with open(f"update_{outcome_col}.pickle", 'wb') as f:
            pickle.dump({"auprc":auprc_scores, "auroc":auroc_scores}, f)

    # Now you can calculate the mean and standard deviation
    mean_auroc = np.mean(auroc_scores)
    std_auroc = np.std(auroc_scores)
    mean_auprc = np.mean(auprc_scores)
    std_auprc = np.std(auprc_scores)
    
    return(f"metrics: Mean AUROC: {mean_auroc}, SD AUROC: {std_auroc}, Mean AUPRC: {mean_auprc}, SD AUPRC: {std_auprc}")


In [6]:
results=K_fold_val("death_in_30", df)

Map: 100%|██████████| 59412/59412 [00:03<00:00, 15385.60 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 15931.05 examples/s]
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1000,1.4115
2000,0.9024
3000,0.828
4000,0.7437
5000,0.6808
6000,0.6168
7000,0.5667
8000,0.5209
9000,0.4854
10000,0.4456


Some weights of BertModel were not initialized from the model checkpoint at fold_1 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2122/2122 [08:51<00:00,  3.99it/s]
Some weights of BertModel were not initialized from the model checkpoint at fold_1 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 531/531 [02:13<00:00,  3.98it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   7.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   3.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   6.4s
[CV] END .learning_rate=0.1,

Map: 100%|██████████| 59412/59412 [00:03<00:00, 14895.96 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 8646.20 examples/s]


Step,Training Loss
1000,1.4891
2000,0.9143
3000,0.8334
4000,0.7417
5000,0.681
6000,0.6115
7000,0.5557
8000,0.5176
9000,0.4778
10000,0.4564


Some weights of BertModel were not initialized from the model checkpoint at fold_2 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2122/2122 [08:53<00:00,  3.98it/s]
Some weights of BertModel were not initialized from the model checkpoint at fold_2 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 531/531 [02:13<00:00,  3.99it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   6.3s
[CV] END .learning_rate=0.1, max_depth=4, min_c

Map: 100%|██████████| 59412/59412 [00:03<00:00, 15143.26 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 15794.06 examples/s]


Step,Training Loss
1000,1.4182
2000,0.889
3000,0.816
4000,0.7598
5000,0.651
6000,0.5874
7000,0.5694
8000,0.4977
9000,0.4569
10000,0.4519


Some weights of BertModel were not initialized from the model checkpoint at fold_3 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2122/2122 [08:49<00:00,  4.01it/s]
Some weights of BertModel were not initialized from the model checkpoint at fold_3 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 531/531 [02:12<00:00,  4.00it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   6.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   5.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   6.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   6.6s
[CV] END .learning_rate=0.1, max_depth=4, min_c

Map: 100%|██████████| 59412/59412 [00:04<00:00, 14553.70 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 8539.87 examples/s]


Step,Training Loss
1000,1.3937
2000,0.9082
3000,0.8358
4000,0.7332
5000,0.6852
6000,0.6033
7000,0.5513
8000,0.5052
9000,0.4721
10000,0.4654


Some weights of BertModel were not initialized from the model checkpoint at fold_4 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2122/2122 [04:57<00:00,  7.14it/s]
Some weights of BertModel were not initialized from the model checkpoint at fold_4 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 531/531 [01:13<00:00,  7.23it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

51 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Step,Training Loss
1000,1.4055
2000,0.8958
3000,0.8606
4000,0.7438
5000,0.6769
6000,0.6211
7000,0.556
8000,0.522
9000,0.4754
10000,0.4512


Some weights of BertModel were not initialized from the model checkpoint at fold_5 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2122/2122 [04:56<00:00,  7.17it/s]
Some weights of BertModel were not initialized from the model checkpoint at fold_5 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 531/531 [01:13<00:00,  7.26it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

52 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

In [7]:
print(results) 

metrics: Mean AUROC: 0.8610873655737621, SD AUROC: 0.00850186953989924, Mean AUPRC: 0.16340601972168414, SD AUPRC: 0.015351359736232511


## Part II: DVT

In [8]:
results=K_fold_val("DVT", df)

Map: 100%|██████████| 59412/59412 [00:03<00:00, 16216.11 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 12191.75 examples/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

25 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

32 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

28 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

28 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

25 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

In [9]:
print(results) 

metrics: Mean AUROC: 0.7629512712309828, SD AUROC: 0.03543834780066562, Mean AUPRC: 0.02202233849185817, SD AUPRC: 0.005522316357953097


## Part III: PE

In [10]:
results=K_fold_val("PE", df)

Map: 100%|██████████| 59412/59412 [00:03<00:00, 15990.83 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 15561.51 examples/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

11 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

16 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

11 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

12 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

14 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

In [11]:
print(results) # over

metrics: Mean AUROC: 0.7145868468925766, SD AUROC: 0.034104754055222536, Mean AUPRC: 0.011327283606716913, SD AUPRC: 0.0024857416167540663


## Part 4: PNA outcome

In [12]:
results=K_fold_val("PNA", df)

Map: 100%|██████████| 59412/59412 [00:03<00:00, 17431.32 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 9139.60 examples/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

26 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

22 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

23 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

20 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

22 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

In [13]:
print(results) 

# over

metrics: Mean AUROC: 0.808726823138197, SD AUROC: 0.019185846220815112, Mean AUPRC: 0.047891530820670644, SD AUPRC: 0.016561461398690847


## Part 5: post-AKI status

In [14]:
results=K_fold_val("post_aki_status", df)

Map: 100%|██████████| 59412/59412 [00:03<00:00, 16010.09 examples/s]
Map: 100%|██████████| 8488/8488 [00:00<00:00, 15529.57 examples/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

62 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

66 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

85 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

90 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

65 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/charles/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/sklearn.py", line 1515, in fit
    self._Booster = train(
  File "/home/charles/.local/lib/python3.8/site-packages/xgboost/core.py", line 729, in inner_f
    return func(**kwargs)
  File "/home/charles/.local/lib/python3.8/

In [15]:
print(results)

metrics: Mean AUROC: 0.8357733235047885, SD AUROC: 0.0014214578084946836, Mean AUPRC: 0.477597746347425, SD AUPRC: 0.009435111064072004


## Part 6: Delirium

In [16]:
df = df.dropna(subset=['postop_del']).reset_index(drop=True)
df["postop_del"]=df["postop_del"].astype(int)
results=K_fold_val("postop_del", df)

Map: 100%|██████████| 8524/8524 [00:00<00:00, 12813.31 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 3447.63 examples/s]


Step,Training Loss


Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_1 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 305/305 [00:43<00:00,  7.05it/s]
Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_1 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 77/77 [00:10<00:00,  7.13it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_c

Map: 100%|██████████| 8524/8524 [00:01<00:00, 8478.97 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 3411.93 examples/s]


Step,Training Loss
1000,1.2434
2000,0.7475


Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_2 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 305/305 [00:43<00:00,  7.04it/s]
Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_2 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 77/77 [00:10<00:00,  7.13it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_c

Map: 100%|██████████| 8524/8524 [00:00<00:00, 12206.26 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 1718.46 examples/s]


Step,Training Loss
1000,1.2729
2000,0.7677


Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_3 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 305/305 [00:43<00:00,  7.06it/s]
Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_3 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 77/77 [00:10<00:00,  7.16it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_c

Map: 100%|██████████| 8525/8525 [00:00<00:00, 8708.46 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 3635.37 examples/s]


Step,Training Loss
1000,1.2322
2000,0.7496


Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_4 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 305/305 [00:43<00:00,  7.06it/s]
Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_4 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 77/77 [00:10<00:00,  7.16it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_c

Map: 100%|██████████| 8525/8525 [00:00<00:00, 13257.34 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 3243.41 examples/s]


Step,Training Loss
1000,1.2251
2000,0.7505


Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_5 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 305/305 [00:43<00:00,  7.07it/s]
Some weights of BertModel were not initialized from the model checkpoint at postop_del_fold_5 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 77/77 [00:10<00:00,  7.17it/s]


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   0.4s
[CV] END .learning_rate=0.1, max_depth=4, min_c

5it [58:42, 704.44s/it]


In [17]:
print(results) 

metrics: Mean AUROC: 0.6906354796374291, SD AUROC: 0.009145536497729072, Mean AUPRC: 0.6638814655539581, SD AUPRC: 0.015404237362584157
