In [1]:
# importing libraries 
import pandas as pd
import torch
import random
from tqdm import tqdm
from torch.utils.data import DataLoader
import itertools
from transformers import Trainer, TrainingArguments, pipeline, set_seed, BioGptTokenizer, BioGptForCausalLM, AutoTokenizer, BioGptModel
from transformers import Trainer, TrainingArguments
from torch.utils.data import TensorDataset
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold
import pickle
from sklearn.metrics import roc_auc_score, average_precision_score
import xgboost as xgb
import numpy as np
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import os
from sklearn.model_selection import KFold
if tf.test.is_gpu_available():
    print('GPU is available')
else:
    print('GPU is not available, running on CPU')

df=pd.read_csv("df.csv")
tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")
model = BioGptForCausalLM.from_pretrained("microsoft/biogpt")
# preparing the clinical notes for training and fine-tuning with BioGPT
class preparing(torch.utils.data.Dataset):
    def __init__(self, notes, tokenizer, max_length=36):
        self.notes = notes
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __getitem__(self, idx):
        note = self.notes[idx]

        # Tokenize the input note
        encoding = self.tokenizer(note, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')

        # Split the input sequence into input and label sequences
        input_ids = encoding['input_ids'][0][:-1]
        label_ids = encoding['input_ids'][0][1:]

        # Return the input and label sequences
        return {
            'input_ids': input_ids,
            'attention_mask': encoding['attention_mask'][0][:-1],
            'labels': label_ids
        }

    def __len__(self):
        return len(self.notes)


  from .autonotebook import tqdm as notebook_tqdm
2023-10-04 17:06:00.749461: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-04 17:06:01.214841: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-10-04 17:06:01.214882: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU is available


In [2]:
len(df)

84875

In [3]:
# this finetunes the model with the given parameters
def get_model(name, train_dataset,val_dataset):
    model = BioGptForCausalLM.from_pretrained("microsoft/biogpt")
    training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=2,      # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,          # number of warmup steps for learning rate scheduler, note increasing does not help
    # weight_decay=0.1,          # strength of weight decay
    learning_rate=0.001,
    logging_dir=None,               # Do not store logs
    logging_steps=1000,         # Effectively disables logging
    save_strategy="steps")  
    # Create the Trainer object
    trainer = Trainer(
        model=model,                               # the instantiated BioGPT model to be trained
        args=training_args,                        # training arguments, defined above
        train_dataset=train_dataset,               # training dataset
        eval_dataset=val_dataset                   # evaluation dataset
    )
    # Fine-tune BioGPT on the training dataset
    trainer.train()
    # saving the newly pre-trained model. 
    trainer.model.save_pretrained(name)    

In [4]:
# this get the emebeddings of the fine-tuned model
def get_biogpt_updated(texts, name):
    batch_size=200
    max_length=296
    texts = list(texts)

    # get the tokenizer and model from BioGPT
    model = BioGptForCausalLM.from_pretrained(name, output_hidden_states=True)  # name of your newly trained gpt embeddings
    tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")

    # Move the model to the GPU if available
    if torch.cuda.is_available():
        model = model.to("cuda")

    # tokenize the input text in batches
    all_embeddings = []
    for i in (range(0, len(texts), batch_size)):
        print("Progress", (i/len(texts))*100)
        batch_texts = texts[i:i + batch_size]
        batch_encoded = tokenizer(batch_texts, padding='max_length', truncation=True, return_tensors="pt",
                                  max_length=max_length)
        input_ids = batch_encoded["input_ids"]
        attention_mask = batch_encoded["attention_mask"]

        # Move the tensors to the GPU if available
        if torch.cuda.is_available():
            input_ids = input_ids.to("cuda")
            attention_mask = attention_mask.to("cuda")

        # process each batch separately
        batch_embeddings = []
        for j in (range(input_ids.shape[0])):
            with torch.no_grad():
                outputs = model(input_ids[j:j + 1], attention_mask=attention_mask[j:j + 1])
                last_hidden_state = outputs.hidden_states[-1]
                masked_hidden_state = last_hidden_state * attention_mask[j:j + 1].unsqueeze(-1)
                embedding = torch.mean(masked_hidden_state, dim=1)
                embedding = F.normalize(embedding, p=2, dim=1)
            
            # If you used GPU, move the embeddings back to CPU for further operations (like converting to numpy array)
            if torch.cuda.is_available():
                embedding = embedding.to("cpu")
            batch_embeddings.append(embedding)

        # concatenate the embeddings from all examples in the batch
        embeddings = torch.cat(batch_embeddings, dim=0)
        all_embeddings.append(embeddings)

    # concatenate the embeddings from all batches
    embeddings = torch.cat(all_embeddings, dim=0)

    # convert the embeddings to a numpy array
    X = embeddings.detach().numpy()

    return (X)



In [5]:
def K_fold_val(outcome_col, df):
    # Initialize the StratifiedKFold class
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # Initialize lists to hold scores
    auroc_scores = []
    auprc_scores = []

    # Define the parameter grid
    param_grid = {
        'learning_rate': [0.1,0.15,0.3],
        'max_depth': [4,5,6,7,8],
        'min_child_weight':[1,2,4]
        }
    i=1
    # Perform 5-fold cross validation
    for train_index, test_index in tqdm(skf.split(df,df["PE"])):
        # Split the data into train/test sets
        train, test = df.iloc[train_index], df.iloc[test_index]
        # Create a new column for embeddings based on the training set only
        if outcome_col=="postop_del":
            name=f"fold_{i}_postopdel"
        else:
            name=f"fold_{i}"
        # name=f"fold_{i}"
        i=i+1
        train_new=((train.sample(frac=7/8,random_state=42)))
        val_new = train[~train.index.isin(train_new.index)]
        train_notes=(train_new.reset_index(drop=True))["AN_PROC_NAME"]
        val_notes=(val_new.reset_index(drop=True))["AN_PROC_NAME"]
        tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")
        train_dataset = preparing(train_notes, tokenizer)
        val_dataset = preparing(val_notes, tokenizer)
        if os.path.isdir(name):
            pass
        else:
            get_model(name, train_dataset,val_dataset)

        if os.path.isfile(f'{name}.pickle'):
            # print("file present")
            with open(f'{name}.pickle', 'rb') as file:
                data = pickle.load(file)
                X_train=data[0]
                X_test=data[1]
        else:
            X_train=get_biogpt_updated(train["AN_PROC_NAME"], name)
            X_test = get_biogpt_updated(test["AN_PROC_NAME"], name)
        
            with open(f'{name}.pickle', 'wb') as f:
                pickle.dump([X_train,X_test], f)


        # Prepare training and testing data
        y_train = train[outcome_col]
        y_test = test[outcome_col]

        if y_train.isna().any() or y_test.isna().any(): 
            na_indices = y_train.isna()  
            X_train = (X_train[~na_indices])
            y_train = (((y_train.dropna())).astype(int)).reset_index(drop=True)
            na_indices = y_test.isna()  
            X_test = X_test[~na_indices]  
            y_test = ((y_test.dropna()).astype(int)).reset_index(drop=True)
        else:
            pass
        

        model = xgb.XGBClassifier(random_state=42,tree_method='gpu_hist')
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='average_precision',verbose=2) # roc_auc
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict_proba(X_test)[:, 1]

        # Compute the AUROC and AUPRC
        auroc = roc_auc_score(y_test, y_pred)
        auprc = average_precision_score(y_test, y_pred)

        # Save the scores
        auroc_scores.append(auroc)
        auprc_scores.append(auprc)
        with open(f"update_{outcome_col}.pickle", 'wb') as f:
            pickle.dump({"auprc":auprc_scores, "auroc":auroc_scores}, f)

    # Now you can calculate the mean and standard deviation
    mean_auroc = np.mean(auroc_scores)
    std_auroc = np.std(auroc_scores)
    mean_auprc = np.mean(auprc_scores)
    std_auprc = np.std(auprc_scores)
    
    return(f"metrics: Mean AUROC: {mean_auroc}, SD AUROC: {std_auroc}, Mean AUPRC: {mean_auprc}, SD AUPRC: {std_auprc}")

In [6]:
results=K_fold_val("death_in_30", df) 



Step,Training Loss
1000,1.6209
2000,1.1402
3000,1.0727
4000,1.0177
5000,0.9984
6000,0.9695
7000,0.9663
8000,0.9139
9000,0.9114
10000,0.8907


Progress 0.0
Progress 0.29455081001472755
Progress 0.5891016200294551
Progress 0.8836524300441826
Progress 1.1782032400589102
Progress 1.4727540500736376
Progress 1.7673048600883652
Progress 2.0618556701030926
Progress 2.3564064801178204
Progress 2.6509572901325478
Progress 2.945508100147275
Progress 3.2400589101620034
Progress 3.5346097201767304
Progress 3.829160530191458
Progress 4.123711340206185
Progress 4.418262150220913
Progress 4.712812960235641
Progress 5.007363770250368
Progress 5.3019145802650955
Progress 5.596465390279824
Progress 5.89101620029455
Progress 6.185567010309279
Progress 6.480117820324007
Progress 6.774668630338733
Progress 7.069219440353461
Progress 7.363770250368189
Progress 7.658321060382916
Progress 7.952871870397643
Progress 8.24742268041237
Progress 8.541973490427099
Progress 8.836524300441827
Progress 9.131075110456553
Progress 9.425625920471282
Progress 9.72017673048601
Progress 10.014727540500736
Progress 10.309278350515463
Progress 10.603829160530191
Pr



Step,Training Loss
1000,1.6213
2000,1.1511
3000,1.0657
4000,1.0207
5000,1.0027
6000,0.9608
7000,0.955
8000,0.9226
9000,0.8935
10000,0.8775


Progress 0.0
Progress 0.29455081001472755
Progress 0.5891016200294551
Progress 0.8836524300441826
Progress 1.1782032400589102
Progress 1.4727540500736376
Progress 1.7673048600883652
Progress 2.0618556701030926
Progress 2.3564064801178204
Progress 2.6509572901325478
Progress 2.945508100147275
Progress 3.2400589101620034
Progress 3.5346097201767304
Progress 3.829160530191458
Progress 4.123711340206185
Progress 4.418262150220913
Progress 4.712812960235641
Progress 5.007363770250368
Progress 5.3019145802650955
Progress 5.596465390279824
Progress 5.89101620029455
Progress 6.185567010309279
Progress 6.480117820324007
Progress 6.774668630338733
Progress 7.069219440353461
Progress 7.363770250368189
Progress 7.658321060382916
Progress 7.952871870397643
Progress 8.24742268041237
Progress 8.541973490427099
Progress 8.836524300441827
Progress 9.131075110456553
Progress 9.425625920471282
Progress 9.72017673048601
Progress 10.014727540500736
Progress 10.309278350515463
Progress 10.603829160530191
Pr



Step,Training Loss
1000,1.6245
2000,1.1327
3000,1.0715
4000,1.0243
5000,0.997
6000,0.9692
7000,0.9512
8000,0.9215
9000,0.8949
10000,0.8807


Progress 0.0
Progress 0.29455081001472755
Progress 0.5891016200294551
Progress 0.8836524300441826
Progress 1.1782032400589102
Progress 1.4727540500736376
Progress 1.7673048600883652
Progress 2.0618556701030926
Progress 2.3564064801178204
Progress 2.6509572901325478
Progress 2.945508100147275
Progress 3.2400589101620034
Progress 3.5346097201767304
Progress 3.829160530191458
Progress 4.123711340206185
Progress 4.418262150220913
Progress 4.712812960235641
Progress 5.007363770250368
Progress 5.3019145802650955
Progress 5.596465390279824
Progress 5.89101620029455
Progress 6.185567010309279
Progress 6.480117820324007
Progress 6.774668630338733
Progress 7.069219440353461
Progress 7.363770250368189
Progress 7.658321060382916
Progress 7.952871870397643
Progress 8.24742268041237
Progress 8.541973490427099
Progress 8.836524300441827
Progress 9.131075110456553
Progress 9.425625920471282
Progress 9.72017673048601
Progress 10.014727540500736
Progress 10.309278350515463
Progress 10.603829160530191
Pr



Step,Training Loss
1000,1.6256
2000,1.1344
3000,1.0757
4000,1.0452
5000,0.9894
6000,0.9772
7000,0.9539
8000,0.9242
9000,0.8993
10000,0.8919


Progress 0.0
Progress 0.29455081001472755
Progress 0.5891016200294551
Progress 0.8836524300441826
Progress 1.1782032400589102
Progress 1.4727540500736376
Progress 1.7673048600883652
Progress 2.0618556701030926
Progress 2.3564064801178204
Progress 2.6509572901325478
Progress 2.945508100147275
Progress 3.2400589101620034
Progress 3.5346097201767304
Progress 3.829160530191458
Progress 4.123711340206185
Progress 4.418262150220913
Progress 4.712812960235641
Progress 5.007363770250368
Progress 5.3019145802650955
Progress 5.596465390279824
Progress 5.89101620029455
Progress 6.185567010309279
Progress 6.480117820324007
Progress 6.774668630338733
Progress 7.069219440353461
Progress 7.363770250368189
Progress 7.658321060382916
Progress 7.952871870397643
Progress 8.24742268041237
Progress 8.541973490427099
Progress 8.836524300441827
Progress 9.131075110456553
Progress 9.425625920471282
Progress 9.72017673048601
Progress 10.014727540500736
Progress 10.309278350515463
Progress 10.603829160530191
Pr



Step,Training Loss
1000,1.6261
2000,1.1472
3000,1.0814
4000,1.0294
5000,1.0025
6000,0.9737
7000,0.9599
8000,0.9344
9000,0.8906
10000,0.89


Progress 0.0
Progress 0.29455081001472755
Progress 0.5891016200294551
Progress 0.8836524300441826
Progress 1.1782032400589102
Progress 1.4727540500736376
Progress 1.7673048600883652
Progress 2.0618556701030926
Progress 2.3564064801178204
Progress 2.6509572901325478
Progress 2.945508100147275
Progress 3.2400589101620034
Progress 3.5346097201767304
Progress 3.829160530191458
Progress 4.123711340206185
Progress 4.418262150220913
Progress 4.712812960235641
Progress 5.007363770250368
Progress 5.3019145802650955
Progress 5.596465390279824
Progress 5.89101620029455
Progress 6.185567010309279
Progress 6.480117820324007
Progress 6.774668630338733
Progress 7.069219440353461
Progress 7.363770250368189
Progress 7.658321060382916
Progress 7.952871870397643
Progress 8.24742268041237
Progress 8.541973490427099
Progress 8.836524300441827
Progress 9.131075110456553
Progress 9.425625920471282
Progress 9.72017673048601
Progress 10.014727540500736
Progress 10.309278350515463
Progress 10.603829160530191
Pr

5it [10:14:47, 7377.46s/it]


In [7]:
print(results)

metrics: Mean AUROC: 0.8675332808060843, SD AUROC: 0.008262728883199361, Mean AUPRC: 0.17571917594447797, SD AUPRC: 0.017811667263782017


## Part II: DVT

In [8]:
results=K_fold_val("DVT", df)

0it [00:00, ?it/s]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_c

1it [08:23, 503.40s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

2it [16:45, 502.70s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

3it [25:10, 503.77s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.2s
[CV] END .learning_rate=0.1, max_depth=4, min_c

4it [33:32, 503.15s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

5it [41:53, 502.64s/it]


In [9]:
print(results) 

metrics: Mean AUROC: 0.7814653564307743, SD AUROC: 0.024981010639473118, Mean AUPRC: 0.0272090817812775, SD AUPRC: 0.006873638763628147


## Part III: PE

In [10]:
results=K_fold_val("PE", df)

0it [00:00, ?it/s]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

1it [08:02, 482.40s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_c

2it [15:58, 478.52s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_c

3it [24:02, 481.18s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_c

4it [31:55, 477.89s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_c

5it [39:45, 477.08s/it]


In [11]:
print(results)

metrics: Mean AUROC: 0.7285013527556965, SD AUROC: 0.04405566553594967, Mean AUPRC: 0.011580502938871163, SD AUPRC: 0.0021694812073709477


## Part IV: PNA

In [12]:
results=K_fold_val("PNA", df)

0it [00:00, ?it/s]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_c

1it [08:15, 495.44s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

2it [16:36, 498.89s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_c

3it [25:00, 501.13s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_c

4it [33:24, 502.28s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_c

5it [41:52, 502.51s/it]


In [13]:
print(results)

metrics: Mean AUROC: 0.8202219585685064, SD AUROC: 0.018009449341238413, Mean AUPRC: 0.05174231130861042, SD AUPRC: 0.009312933274666639


## PArt 5: post_

In [14]:
results=K_fold_val("post_aki_status", df)

0it [00:00, ?it/s]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_c

1it [10:29, 629.12s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_c

2it [21:10, 636.11s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.9s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.8s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.6s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   2.2s
[CV] END .learning_rate=0.1, max_depth=4, min_c

3it [31:47, 636.59s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.7s
[CV] END .learning_rate=0.1, max_depth=4, min_c

4it [42:25, 637.29s/it]

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.5s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   2.0s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=1; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.1s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   1.2s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.3s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=2; total time=   2.4s
[CV] END .learning_rate=0.1, max_depth=4, min_child_weight=4; total time=   1.4s
[CV] END .learning_rate=0.1, max_depth=4, min_c

5it [53:12, 638.58s/it]


In [15]:
print(results)

metrics: Mean AUROC: 0.8385658634949216, SD AUROC: 0.0021714534148637526, Mean AUPRC: 0.48795773838087975, SD AUPRC: 0.008199369393836507


## Part 6: delirium

In [16]:
df = df.dropna(subset=['postop_del']).reset_index(drop=True)
df["postop_del"]=df["postop_del"].astype(int)
results=K_fold_val("postop_del", df)



Step,Training Loss


Progress 0.0
Progress 2.0529665366454526
Progress 4.105933073290905
Progress 6.158899609936358
Progress 8.21186614658181
Progress 10.264832683227263
Progress 12.317799219872716
Progress 14.37076575651817
Progress 16.42373229316362
Progress 18.476698829809074
Progress 20.529665366454527
Progress 22.58263190309998
Progress 24.635598439745433
Progress 26.688564976390882
Progress 28.74153151303634
Progress 30.79449804968179
Progress 32.84746458632724
Progress 34.9004311229727
Progress 36.95339765961815
Progress 39.0063641962636
Progress 41.05933073290905
Progress 43.1122972695545
Progress 45.16526380619996
Progress 47.218230342845416
Progress 49.271196879490866
Progress 51.324163416136315
Progress 53.377129952781765
Progress 55.43009648942723
Progress 57.48306302607268
Progress 59.53602956271813
Progress 61.58899609936358
Progress 63.64196263600903
Progress 65.69492917265448
Progress 67.74789570929994
Progress 69.8008622459454
Progress 71.85382878259084
Progress 73.9067953192363
Progress 7



Step,Training Loss
1000,1.6126
2000,1.0712


Progress 0.0
Progress 2.0529665366454526
Progress 4.105933073290905
Progress 6.158899609936358
Progress 8.21186614658181
Progress 10.264832683227263
Progress 12.317799219872716
Progress 14.37076575651817
Progress 16.42373229316362
Progress 18.476698829809074
Progress 20.529665366454527
Progress 22.58263190309998
Progress 24.635598439745433
Progress 26.688564976390882
Progress 28.74153151303634
Progress 30.79449804968179
Progress 32.84746458632724
Progress 34.9004311229727
Progress 36.95339765961815
Progress 39.0063641962636
Progress 41.05933073290905
Progress 43.1122972695545
Progress 45.16526380619996
Progress 47.218230342845416
Progress 49.271196879490866
Progress 51.324163416136315
Progress 53.377129952781765
Progress 55.43009648942723
Progress 57.48306302607268
Progress 59.53602956271813
Progress 61.58899609936358
Progress 63.64196263600903
Progress 65.69492917265448
Progress 67.74789570929994
Progress 69.8008622459454
Progress 71.85382878259084
Progress 73.9067953192363
Progress 7



Step,Training Loss
1000,1.6305
2000,1.0621


Progress 0.0
Progress 2.0529665366454526
Progress 4.105933073290905
Progress 6.158899609936358
Progress 8.21186614658181
Progress 10.264832683227263
Progress 12.317799219872716
Progress 14.37076575651817
Progress 16.42373229316362
Progress 18.476698829809074
Progress 20.529665366454527
Progress 22.58263190309998
Progress 24.635598439745433
Progress 26.688564976390882
Progress 28.74153151303634
Progress 30.79449804968179
Progress 32.84746458632724
Progress 34.9004311229727
Progress 36.95339765961815
Progress 39.0063641962636
Progress 41.05933073290905
Progress 43.1122972695545
Progress 45.16526380619996
Progress 47.218230342845416
Progress 49.271196879490866
Progress 51.324163416136315
Progress 53.377129952781765
Progress 55.43009648942723
Progress 57.48306302607268
Progress 59.53602956271813
Progress 61.58899609936358
Progress 63.64196263600903
Progress 65.69492917265448
Progress 67.74789570929994
Progress 69.8008622459454
Progress 71.85382878259084
Progress 73.9067953192363
Progress 7



Step,Training Loss
1000,1.646
2000,1.0644


Progress 0.0
Progress 2.052755824694653
Progress 4.105511649389306
Progress 6.158267474083957
Progress 8.211023298778612
Progress 10.263779123473263
Progress 12.316534948167915
Progress 14.369290772862566
Progress 16.422046597557223
Progress 18.474802422251873
Progress 20.527558246946526
Progress 22.580314071641176
Progress 24.63306989633583
Progress 26.685825721030483
Progress 28.738581545725133
Progress 30.79133737041979
Progress 32.844093195114446
Progress 34.896849019809096
Progress 36.949604844503746
Progress 39.002360669198396
Progress 41.05511649389305
Progress 43.1078723185877
Progress 45.16062814328235
Progress 47.21338396797701
Progress 49.26613979267166
Progress 51.31889561736631
Progress 53.371651442060966
Progress 55.424407266755615
Progress 57.477163091450265
Progress 59.52991891614492
Progress 61.58267474083958
Progress 63.635430565534236
Progress 65.68818639022889
Progress 67.74094221492354
Progress 69.79369803961819
Progress 71.84645386431285
Progress 73.89920968900749



Step,Training Loss
1000,1.6152
2000,1.0605


Progress 0.0
Progress 2.052755824694653
Progress 4.105511649389306
Progress 6.158267474083957
Progress 8.211023298778612
Progress 10.263779123473263
Progress 12.316534948167915
Progress 14.369290772862566
Progress 16.422046597557223
Progress 18.474802422251873
Progress 20.527558246946526
Progress 22.580314071641176
Progress 24.63306989633583
Progress 26.685825721030483
Progress 28.738581545725133
Progress 30.79133737041979
Progress 32.844093195114446
Progress 34.896849019809096
Progress 36.949604844503746
Progress 39.002360669198396
Progress 41.05511649389305
Progress 43.1078723185877
Progress 45.16062814328235
Progress 47.21338396797701
Progress 49.26613979267166
Progress 51.31889561736631
Progress 53.371651442060966
Progress 55.424407266755615
Progress 57.477163091450265
Progress 59.52991891614492
Progress 61.58267474083958
Progress 63.635430565534236
Progress 65.68818639022889
Progress 67.74094221492354
Progress 69.79369803961819
Progress 71.84645386431285
Progress 73.89920968900749

5it [1:55:33, 1386.60s/it]


In [17]:
print(results)

metrics: Mean AUROC: 0.694036871431746, SD AUROC: 0.008471313423997028, Mean AUPRC: 0.6719362546882006, SD AUPRC: 0.015067294275197688
