# Predicting Diabetes patient risk to develop Heart Failure with Med-BERT

In [1]:
### Required Packages
from termcolor import colored
import math
from sklearn.model_selection import train_test_split
import pandas as pd
import random
import numpy as np
from datetime import datetime
import pickle as pkl
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch import optim
import tqdm
import time
import transformers
from sklearn.metrics import roc_auc_score  
from sklearn.metrics import roc_curve 
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
%matplotlib inline
use_cuda = torch.cuda.is_available()
import transformers
from transformers import BertForSequenceClassification


I0520 21:21:25.325698 140179506145024 file_utils.py:39] PyTorch version 1.5.0+cu101 available.


#### Load Data from pickled list

The pickled list is a list of lists where each sublist represent a patient record that looks like 
[pt_id,label, seq_list , segment_list ]
where
    Label: 1: pt developed HF (case) , 0 control
    seq_list: list of all medical codes in all visits
    segment list: the visit number mapping to each code in the sequence list
 

In [3]:
train_f=pkl.load( open('../Diab_HF/preprocessed_data/lr_dhf.combined_BertFT.train', 'rb'), encoding='bytes')
valid_f=pkl.load( open('../Diab_HF/preprocessed_data/lr_dhf.combined_BertFT.valid', 'rb'), encoding='bytes')
test_f=pkl.load( open('../Diab_HF/preprocessed_data/lr_dhf.combined_BertFT.test', 'rb'), encoding='bytes')
test_f2=pkl.load( open('../Diab_HF/preprocessed_data/lr_dhf_s5k.combined_BertFT.test', 'rb'), encoding='bytes')

In [5]:
### Below are key functions for  Data prepartion ,formating input data into features, and model defintion 

class PaddingInputExample(object):
  """Fake example so the num input examples is a multiple of the batch size.

  When running eval/predict on the TPU, we need to pad the number of examples
  to be a multiple of the batch size, because the TPU requires a fixed batch
  size. The alternative is to drop the last batch, which is bad because it means
  the entire output data won't be generated.

  We use this class instead of `None` because treating `None` as padding
  battches could cause silent errors.
  """

class InputFeatures(object):
  """A single set of features of data."""

  def __init__(self,
               input_ids,
               input_mask,
               segment_ids,
               label_id,
               is_real_example=True):
    self.input_ids = input_ids
    self.input_mask = input_mask
    self.segment_ids = segment_ids
    self.label_id = label_id
    self.is_real_example = is_real_example
    

    
def convert_EHRexamples_to_features(examples,max_seq_length):
    """Convert a set of `InputExample`s to a list of `InputFeatures`."""

    features = []
    for (ex_index, example) in enumerate(examples):
        feature = convert_singleEHR_example(ex_index, example, max_seq_length)
        features.append(feature)
    return features

### This is the EHR version

def convert_singleEHR_example(ex_index, example, max_seq_length):
    if isinstance(example, PaddingInputExample):
        return InputFeatures(
        input_ids=[0] * max_seq_length,
        input_mask=[0] * max_seq_length,
        segment_ids=[0] * max_seq_length,
        label_id=0,
        is_real_example=False)
    
    input_ids=example[2]
    segment_ids=example[3]
    label_id=example[1]
    

  # The mask has 1 for real tokens and 0 for padding tokens. Only real
  # tokens are attended to.
    input_mask = [1] * len(input_ids)

   
  # LR 5/13 Left Truncate longer sequence 
    while len(input_ids) > max_seq_length:
        input_ids= input_ids[-max_seq_length:] 
        input_mask= input_mask[-max_seq_length:]
        segment_ids= segment_ids[-max_seq_length:]
 
    
    
  # Zero-pad up to the sequence length.
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)

    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length

  
    feature =[input_ids,input_mask,segment_ids,label_id,True]
    return feature




In [6]:
class BERTdataEHR(Dataset):
    def __init__(self, Features):
           
        self.data= Features
  
                                     
    def __getitem__(self, idx, seeDescription = False):

        sample = self.data[idx]
   
        return sample

    def __len__(self):
        return len(self.data)     

         
#customized parts for EHRdataloader
def my_collate(batch):
        all_input_ids = []
        all_input_mask = []
        all_segment_ids = []
        all_label_ids = []

        for feature in batch:
            all_input_ids.append(feature[0])
            all_input_mask.append(feature[1])
            all_segment_ids.append(feature[2])
            all_label_ids.append(feature[3])
        return [all_input_ids, all_input_mask,all_segment_ids,all_label_ids]
            

class BERTdataEHRloader(DataLoader):
    def __init__(self, dataset, batch_size=128, shuffle=False, sampler=None, batch_sampler=None,
                 num_workers=0, collate_fn=my_collate, pin_memory=False, drop_last=False,
                 timeout=0, worker_init_fn=None):
        DataLoader.__init__(self, dataset, batch_size=batch_size, shuffle=False, sampler=None, batch_sampler=None,
                 num_workers=0, collate_fn=my_collate, pin_memory=False, drop_last=False,
                 timeout=0, worker_init_fn=None)
        self.collate_fn = collate_fn

 

##### Model Definition

In [7]:
class EHR_BERT_LR(nn.Module):
    def __init__(self, input_size,embed_dim, hidden_size, n_layers=1,dropout_r=0.1,cell_type='LSTM',bi=False ,time=False, preTrainEmb=''):
        super(EHR_BERT_LR, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embed_dim = embed_dim
        self.dropout_r = dropout_r
        self.cell_type = cell_type
        self.preTrainEmb=preTrainEmb
        self.time=time
        
        if bi: self.bi=2 
        else: self.bi=1
        
        self.PreBERTmodel=BertForSequenceClassification.from_pretrained("pretrained_py_models/45M_chk")
        if use_cuda:
           self.PreBERTmodel.cuda()
        input_size=self.PreBERTmodel.bert.config.vocab_size
        self.in_size= self.PreBERTmodel.bert.config.hidden_size
       
        self.dropout = nn.Dropout(p=self.dropout_r)
        self.out = nn.Linear(self.in_size,1)
        self.sigmoid = nn.Sigmoid()
        self.softmax=nn.Softmax()
        if use_cuda:
            self.flt_typ=torch.cuda.FloatTensor
            self.lnt_typ=torch.cuda.LongTensor
        else: 
            self.lnt_typ=torch.LongTensor
            self.flt_typ=torch.FloatTensor

    def forward(self, sequence):
        token_t=torch.from_numpy(np.asarray(sequence[0],dtype=int)).type(self.lnt_typ)
        seg_t=torch.from_numpy(np.asarray(sequence[2],dtype=int)).type(self.lnt_typ)
        Label_t=torch.from_numpy(np.asarray(sequence[3],dtype=int)).type(self.lnt_typ)
        Bert_out=self.PreBERTmodel.bert(input_ids=token_t, attention_mask=torch.from_numpy(np.asarray(sequence[1],dtype=int)).type(self.lnt_typ),
                                    token_type_ids=seg_t)
        output=self.sigmoid(self.out(Bert_out[1]))
        return output.squeeze(),Label_t.type(self.flt_typ)

In [8]:
def timeSince(since):
   now = time.time()
   s = now - since
   m = math.floor(s / 60)
   s -= m * 60
   return '%dm %ds' % (m, s)

def trainsample(sample, model, optimizer, criterion = nn.BCELoss()): 
    model.train() 
    model.zero_grad()
    output,label_tensor = model(sample)   
    loss = criterion(output, label_tensor)    
    loss.backward()   
    optimizer.step()
    return output, loss.item()


#train with loaders

def trainbatches(mbs_list, model, optimizer, shuffle = True):
    current_loss = 0
    all_losses =[]
    plot_every = 5
    n_iter = 0 
    if shuffle: 
        random.shuffle(mbs_list)
    for i,batch in enumerate(mbs_list):
        output, loss = trainsample(batch, model, optimizer, criterion = nn.BCELoss())
        current_loss += loss
        n_iter +=1
    
        if n_iter % plot_every == 0:
            all_losses.append(current_loss/plot_every)
            current_loss = 0    
    return current_loss, all_losses 


def calculate_auc(model, mbs_list, shuffle = True): 
    model.eval() 
    y_real =[]
    y_hat= []
    if shuffle: 
        random.shuffle(mbs_list)
    for i,batch in enumerate(mbs_list):
        output,label_tensor = model(batch)
        y_hat.extend(output.cpu().data.view(-1).numpy())  
        y_real.extend(label_tensor.cpu().data.view(-1).numpy())       
    auc = roc_auc_score(y_real, y_hat)
    return auc, y_real, y_hat 

    
#define the final epochs running, use the different names

def epochs_run(epochs, train, valid, test1,test2, model, optimizer, shuffle = True,  patience = 20, output_dir = '../models/', model_prefix = 'dhf.train', model_customed= ''):  
    bestValidAuc = 0.0
    bestTestAuc1 = 0.0
    bestTestAuc2 = 0.0
    bestValidEpoch = 0
    #header = 'BestValidAUC|TestAUC|atEpoch'
    #logFile = output_dir + model_prefix + model_customed +'EHRmodel.log'
    #print2file(header, logFile)
    #writer = SummaryWriter(output_dir+'/tsb_runs/') ## LR added 9/27 for tensorboard integration
    for ep in range(epochs):
        print (ep)
        start = time.time()
        current_loss, train_loss = trainbatches(mbs_list = train, model= model, optimizer = optimizer)
        train_time = timeSince(start)
        #epoch_loss.append(train_loss)
        avg_loss = np.mean(train_loss)
        #writer.add_scalar('Loss/train', avg_loss, ep) ## LR added 9/27 
        valid_start = time.time()
        train_auc, _, _ = calculate_auc(model = model, mbs_list = train, shuffle = shuffle)
        valid_auc, _, _ = calculate_auc(model = model, mbs_list = valid, shuffle = shuffle)
        valid_time = timeSince(valid_start)
        #writer.add_scalar('train_auc', train_auc, ep) ## LR added 9/27 
        #writer.add_scalar('valid_auc', valid_auc, ep) ## LR added 9/27 
        print(colored('\n Epoch (%s): Train_auc (%s), Valid_auc (%s) ,Training Average_loss (%s), Train_time (%s), Eval_time (%s)'%(ep, train_auc, valid_auc , avg_loss,train_time, valid_time), 'green'))
        if valid_auc > bestValidAuc: 
              bestValidAuc = valid_auc
              bestValidEpoch = ep
              best_model= model 
              bestTrainAuc = train_auc  
              if test:      
                      testeval_start = time.time()
                      bestTestAuc1, _, _ = calculate_auc(model = best_model, mbs_list = test1,  shuffle = shuffle) 
                      bestTestAuc2, _, _ = calculate_auc(model = best_model, mbs_list = test2,  shuffle = shuffle) 

                        #writer.add_scalar('test_auc', valid_auc, ep) ## LR added 9/27 
                      print(colored('\n Test_AUC1 (%s) ,Test_AUC2 (%s) , Test_eval_time (%s) '%(bestTestAuc1,bestTestAuc2, timeSince(testeval_start)), 'yellow')) 
                      #print(best_model,model) ## to verify that the hyperparameters already impacting the model definition
                      #print(optimizer)
        if ep - bestValidEpoch > patience:
              break

    #writer.close()
    #if not os.path.exists(output_dir):
    #    os.makedirs(output_dir)
    ###save model & parameters
    #torch.save(best_model, output_dir + model_prefix + model_customed + 'EHRmodel.pth')
    #torch.save(best_model.state_dict(), output_dir + model_prefix + model_customed + 'EHRmodel.st')

    if test:
        print(colored('BestValidAuc %f has a TestAuc of %f at epoch %d ' % (bestValidAuc, bestTestAuc1, bestValidEpoch),'green'))
        return bestTrainAuc,bestValidAuc, bestTestAuc1, bestTestAuc2, bestValidEpoch
    else: 
        print(colored('BestValidAuc %f at epoch %d ' % (bestValidAuc,  bestValidEpoch),'green'))
        print('No Test Accuracy')
    
    print(colored('Details see ../models/%sEHRmodel.log' %(model_prefix + model_customed),'green'))
        


In [9]:
MAX_SEQ_LENGTH = 64
BATCH_SIZE = 100
EARNING_RATE = 1e-5
bert_config_file= "config.json"

results=[]

#### Data Preparation
train_features = convert_EHRexamples_to_features(train_f, MAX_SEQ_LENGTH) 
test_features = convert_EHRexamples_to_features(test_f, MAX_SEQ_LENGTH)
test_features2 = convert_EHRexamples_to_features(test_f2, MAX_SEQ_LENGTH)
valid_features = convert_EHRexamples_to_features(valid_f, MAX_SEQ_LENGTH)
train = BERTdataEHR(train_features)
test = BERTdataEHR(test_features)
test2 = BERTdataEHR(test_features2)
valid = BERTdataEHR(valid_features)
print (' creating the list of training minibatches')
train_mbs = list(BERTdataEHRloader(train, batch_size = BATCH_SIZE))
print (' creating the list of test minibatches')
test_mbs = list(BERTdataEHRloader(test, batch_size = BATCH_SIZE))
print (' creating the list of test2 minibatches')
test_mbs2 = list(BERTdataEHRloader(test2, batch_size = BATCH_SIZE))
print (' creating the list of valid minibatches')
valid_mbs = list(BERTdataEHRloader(valid, batch_size = BATCH_SIZE))

for run in range(10):### to average the results on 10 runs
    for model_type in ['Bert only']:              
            ehr_model = EHR_BERT_LR(input_size= 90000, embed_dim=192, hidden_size=192) 
            if use_cuda:
                ehr_model.cuda()
            optimizer = optim.Adam(ehr_model.parameters(), lr=LEARNING_RATE)
            out_dir_name='test_LR_Bert_BiGRU_FC'#+ str(i)
            trauc,vauc,testauc1,testauc2,bep=epochs_run(500,train = train_mbs, 
                                  valid = valid_mbs, 
                                  test1 = test_mbs,test2=test_mbs2, 
                                  model = ehr_model, 
                                  optimizer = optimizer,
                                  shuffle = True, 
                                  #batch_size = args.batch_size, 
                                  patience = 20,
                                  output_dir = out_dir_name,
                                  model_prefix = 'first_run')
            results.append([model_type,run,len(train_features),len(test_features),len(valid_features),trauc,vauc,testauc1,testauc2,bep])


 creating the list of training minibatches


I0520 21:22:05.684562 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 21:22:05.686757 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 21:22:05.687871 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


 creating the list of test minibatches
 creating the list of test2 minibatches
 creating the list of valid minibatches


I0520 21:22:06.574445 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 21:22:06.575920 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8344208078615551), Valid_auc (0.8184748806805653) ,Training Average_loss (0.6015739386081695), Train_time (0m 23s), Eval_time (0m 5s)[0m
[33m
 Test_AUC1 (0.813739276864951) ,Test_AUC2 (0.8129291783088921) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8689255080828232), Valid_auc (0.8328357126090946) ,Training Average_loss (0.5050303135514259), Train_time (0m 22s), Eval_time (0m 5s)[0m
[33m
 Test_AUC1 (0.827368265712256) ,Test_AUC2 (0.8259503103980095) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8945819412477846), Valid_auc (0.8348941240068568) ,Training Average_loss (0.46658780950307843), Train_time (0m 21s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8300147372468025) ,Test_AUC2 (0.8279952299876525) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.919262215204665), Valid_auc (0.8296378450661146) ,Training Average_loss (0.4308578492999077), Train_time (0m 22s), Eval_time (0m 5s)[0m
4
[32m
 Epoch (4)

I0520 21:33:03.378924 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 21:33:03.380889 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 21:33:03.381677 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.9989485929027963), Valid_auc (0.7634038936189886) ,Training Average_loss (0.08579230152210222), Train_time (0m 21s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.834894 has a TestAuc of 0.830015 at epoch 2 [0m


I0520 21:33:03.975311 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 21:33:03.976801 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8350525299439123), Valid_auc (0.819059243610809) ,Training Average_loss (0.5963910191655158), Train_time (0m 20s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8146679703468753) ,Test_AUC2 (0.8143491257990754) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8694366997728258), Valid_auc (0.8323152593730662) ,Training Average_loss (0.5031238191723824), Train_time (0m 22s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8282307458502527) ,Test_AUC2 (0.8270044908069418) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8966716240842965), Valid_auc (0.8316490621599129) ,Training Average_loss (0.4666782378256321), Train_time (0m 21s), Eval_time (0m 4s)[0m
3
[32m
 Epoch (3): Train_auc (0.9195386161673412), Valid_auc (0.826901713505324) ,Training Average_loss (0.4309304870665073), Train_time (0m 17s), Eval_time (0m 4s)[0m
4
[32m
 Epoch (4): Train_auc (0.9370681588984008), Valid_auc (0.8214501845805859) ,Training Average_loss (0.394378570

I0520 21:42:45.312570 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 21:42:45.314456 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 21:42:45.315254 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (22): Train_auc (0.9985865488926994), Valid_auc (0.7576536571139989) ,Training Average_loss (0.09320290236780421), Train_time (0m 20s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.832315 has a TestAuc of 0.828231 at epoch 1 [0m


I0520 21:42:45.903815 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 21:42:45.904995 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8330959640179064), Valid_auc (0.8176760272852073) ,Training Average_loss (0.5994077806472777), Train_time (0m 21s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8140763702522191) ,Test_AUC2 (0.8134981493241707) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8656440647107173), Valid_auc (0.8324247629615404) ,Training Average_loss (0.507281952381134), Train_time (0m 19s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8275206301810785) ,Test_AUC2 (0.8253920595902737) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8916863324212285), Valid_auc (0.8335593467483231) ,Training Average_loss (0.47075086289644247), Train_time (0m 21s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8290833237644428) ,Test_AUC2 (0.8260134316200362) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.9170662550155178), Valid_auc (0.8287508624430076) ,Training Average_loss (0.43584743118286134), Train_time (0m 21s), Eval_time (0m 4s)[0m
4
[32m
 Epoch 

I0520 21:53:26.369896 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 21:53:26.372846 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 21:53:26.373793 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.9989646444737132), Valid_auc (0.7651444636493608) ,Training Average_loss (0.08868849254818632), Train_time (0m 23s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.833559 has a TestAuc of 0.829083 at epoch 2 [0m


I0520 21:53:26.927020 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 21:53:26.928144 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8348611087802745), Valid_auc (0.8182579006892431) ,Training Average_loss (0.597093949317932), Train_time (0m 22s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8136349657371501) ,Test_AUC2 (0.8130792612144971) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8685117242984548), Valid_auc (0.8311971420239137) ,Training Average_loss (0.5056465235352516), Train_time (0m 22s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8283004080836209) ,Test_AUC2 (0.8263982390699084) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8954337473846572), Valid_auc (0.8325071307143518) ,Training Average_loss (0.4667284547388554), Train_time (0m 22s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8298485772202169) ,Test_AUC2 (0.8264550401695777) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.91975767294692), Valid_auc (0.8284334122868788) ,Training Average_loss (0.4315815242826938), Train_time (0m 22s), Eval_time (0m 4s)[0m
4
[32m
 Epoch (4):

I0520 22:03:30.106102 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 22:03:30.107589 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 22:03:30.108331 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.9988704656036285), Valid_auc (0.7587428603944832) ,Training Average_loss (0.08895108831021935), Train_time (0m 16s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.832507 has a TestAuc of 0.829849 at epoch 2 [0m


I0520 22:03:30.662605 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 22:03:30.663679 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8379883161012923), Valid_auc (0.8203654980119354) ,Training Average_loss (0.5848884753584861), Train_time (0m 15s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8146202192281239) ,Test_AUC2 (0.8131664629027219) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8683454193669756), Valid_auc (0.8311485962628655) ,Training Average_loss (0.5035987617969513), Train_time (0m 16s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8265046300185186) ,Test_AUC2 (0.822951132333922) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8949668453254593), Valid_auc (0.8327145438121047) ,Training Average_loss (0.4661092875003814), Train_time (0m 16s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8288906037336077) ,Test_AUC2 (0.8251059740516576) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.9174870545116629), Valid_auc (0.8298718249649688) ,Training Average_loss (0.4338305563330651), Train_time (0m 16s), Eval_time (0m 4s)[0m
4
[32m
 Epoch (4

I0520 22:11:48.597309 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 22:11:48.598996 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 22:11:48.599742 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.9989565950865331), Valid_auc (0.7632044470050999) ,Training Average_loss (0.08301712638000026), Train_time (0m 16s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.832715 has a TestAuc of 0.828891 at epoch 2 [0m


I0520 22:11:49.154232 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 22:11:49.155282 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.838662646091777), Valid_auc (0.8208756019318723) ,Training Average_loss (0.5861842641830445), Train_time (0m 16s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8158333927555651) ,Test_AUC2 (0.8144028068383404) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8706608994747661), Valid_auc (0.8324763317186977) ,Training Average_loss (0.5030413975119591), Train_time (0m 16s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8276131368625465) ,Test_AUC2 (0.8246087644256792) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8970667649087132), Valid_auc (0.833283720632482) ,Training Average_loss (0.46614590415358537), Train_time (0m 16s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8274166657199998) ,Test_AUC2 (0.824605084354433) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.9198462946116224), Valid_auc (0.8288082993690828) ,Training Average_loss (0.4292477672100067), Train_time (0m 16s), Eval_time (0m 4s)[0m
4
[32m
 Epoch (4)

I0520 22:20:55.465837 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 22:20:55.467652 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 22:20:55.468709 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.998796325795337), Valid_auc (0.7590093819573367) ,Training Average_loss (0.08781277851620689), Train_time (0m 17s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.833284 has a TestAuc of 0.827417 at epoch 2 [0m


I0520 22:20:56.063505 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 22:20:56.064536 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8343471704899152), Valid_auc (0.81815529664483) ,Training Average_loss (0.6024545248150825), Train_time (0m 21s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8123951788721175) ,Test_AUC2 (0.8119798799304754) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8674745446391027), Valid_auc (0.8327873802360071) ,Training Average_loss (0.5085749540925025), Train_time (0m 20s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8288468615043867) ,Test_AUC2 (0.827998750055801) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8918132864821717), Valid_auc (0.8338979578772165) ,Training Average_loss (0.4682834231257438), Train_time (0m 20s), Eval_time (0m 5s)[0m
[33m
 Test_AUC1 (0.8304875817669021) ,Test_AUC2 (0.8298523059406429) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.9163713387233201), Valid_auc (0.82762125770864) ,Training Average_loss (0.43542053022980687), Train_time (0m 21s), Eval_time (0m 5s)[0m
4
[32m
 Epoch (4): 

I0520 22:31:19.639662 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 22:31:19.641436 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 22:31:19.642180 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.9987877571702797), Valid_auc (0.7511952570969279) ,Training Average_loss (0.08451054384559392), Train_time (0m 20s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.833898 has a TestAuc of 0.830488 at epoch 2 [0m


I0520 22:31:20.218342 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 22:31:20.219385 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8339869994164438), Valid_auc (0.8210636322898661) ,Training Average_loss (0.5941779354810715), Train_time (0m 20s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.812851623389593) ,Test_AUC2 (0.8121201226455744) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8660068647759662), Valid_auc (0.8330637887743708) ,Training Average_loss (0.5046506046652794), Train_time (0m 20s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.826829127848216) ,Test_AUC2 (0.8243316390605321) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8916466855290988), Valid_auc (0.835479980652825) ,Training Average_loss (0.47032388734817504), Train_time (0m 21s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8287144437054221) ,Test_AUC2 (0.8253458986965988) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.9155322487140534), Valid_auc (0.830075005868169) ,Training Average_loss (0.43308531907200803), Train_time (0m 22s), Eval_time (0m 4s)[0m
4
[32m
 Epoch (4)

I0520 22:41:55.789855 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 22:41:55.791754 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 22:41:55.792600 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.9983602491847694), Valid_auc (0.7588791441720192) ,Training Average_loss (0.0879961883528158), Train_time (0m 20s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.835480 has a TestAuc of 0.828714 at epoch 2 [0m


I0520 22:41:56.449581 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 22:41:56.450545 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8301259289621652), Valid_auc (0.818085518781697) ,Training Average_loss (0.6035055741071701), Train_time (0m 20s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8115126720642497) ,Test_AUC2 (0.8108060972060419) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8634336498671435), Valid_auc (0.834423674682941) ,Training Average_loss (0.5069363422989845), Train_time (0m 21s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8277637324421974) ,Test_AUC2 (0.8258984693943674) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.8900960220125369), Valid_auc (0.8371665990938124) ,Training Average_loss (0.4710833481550217), Train_time (0m 20s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8305882662274559) ,Test_AUC2 (0.8276773038326022) , Test_eval_time (0m 1s) [0m
3
[32m
 Epoch (3): Train_auc (0.9150307129284221), Valid_auc (0.831788653450839) ,Training Average_loss (0.43605574980378153), Train_time (0m 21s), Eval_time (0m 4s)[0m
4
[32m
 Epoch (4)

I0520 22:52:06.101148 140179506145024 configuration_utils.py:283] loading configuration file pretrained_py_models/orig_45M_chkp_transcli/config.json
I0520 22:52:06.103366 140179506145024 configuration_utils.py:321] Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 192,
  "initializer_range": 0.02,
  "intermediate_size": 64,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 6,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "type_vocab_size": 1000,
  "vocab_size": 82603
}

I0520 22:52:06.104133 140179506145024 modeling_utils.py:615] loading weights file pretrained_py_models/orig_45M_chkp_transcli/pytorch_model.bin


[32m
 Epoch (23): Train_auc (0.9986690437104654), Valid_auc (0.763382021353022) ,Training Average_loss (0.0882242513142992), Train_time (0m 21s), Eval_time (0m 4s)[0m
[32mBestValidAuc 0.837167 has a TestAuc of 0.830588 at epoch 2 [0m


I0520 22:52:06.758632 140179506145024 modeling_utils.py:708] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
I0520 22:52:06.759719 140179506145024 modeling_utils.py:714] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']


0
[32m
 Epoch (0): Train_auc (0.8376651413265975), Valid_auc (0.8220339429116077) ,Training Average_loss (0.600064018368721), Train_time (0m 17s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8151119259734637) ,Test_AUC2 (0.8135775108606104) , Test_eval_time (0m 1s) [0m
1
[32m
 Epoch (1): Train_auc (0.8723321461875344), Valid_auc (0.8351496560897368) ,Training Average_loss (0.5025597510933876), Train_time (0m 19s), Eval_time (0m 4s)[0m
[33m
 Test_AUC1 (0.8295651371748665) ,Test_AUC2 (0.8254028597993658) , Test_eval_time (0m 1s) [0m
2
[32m
 Epoch (2): Train_auc (0.9000102316263685), Valid_auc (0.8342374581226126) ,Training Average_loss (0.4621628828644754), Train_time (0m 19s), Eval_time (0m 4s)[0m
3
[32m
 Epoch (3): Train_auc (0.9241180102213152), Valid_auc (0.8307160944312855) ,Training Average_loss (0.42433583843708045), Train_time (0m 18s), Eval_time (0m 4s)[0m
4
[32m
 Epoch (4): Train_auc (0.9435316327907235), Valid_auc (0.8228363527729765) ,Training Average_loss (0.3839782

In [11]:
df=pd.DataFrame(results)
df.columns=['Model','Run','Train_size','Test_size','Valid_size','Train_AUC','Valid_AUC','Test_AUC1','Test_AUC2','Best_Epoch']

In [12]:
df

Unnamed: 0,Model,Run,Train_size,Test_size,Valid_size,Train_AUC,Valid_AUC,Test_AUC1,Test_AUC2,Best_Epoch
0,Bert only,0,49999,15000,7500,0.894582,0.834894,0.830015,0.827995,2
1,Bert only,1,49999,15000,7500,0.869437,0.832315,0.828231,0.827004,1
2,Bert only,2,49999,15000,7500,0.891686,0.833559,0.829083,0.826013,2
3,Bert only,3,49999,15000,7500,0.895434,0.832507,0.829849,0.826455,2
4,Bert only,4,49999,15000,7500,0.894967,0.832715,0.828891,0.825106,2
5,Bert only,5,49999,15000,7500,0.897067,0.833284,0.827417,0.824605,2
6,Bert only,6,49999,15000,7500,0.891813,0.833898,0.830488,0.829852,2
7,Bert only,7,49999,15000,7500,0.891647,0.83548,0.828714,0.825346,2
8,Bert only,8,49999,15000,7500,0.890096,0.837167,0.830588,0.827677,2
9,Bert only,9,49999,15000,7500,0.872332,0.83515,0.829565,0.825403,1


In [None]:
#df.to_csv('DHF_RNN_multirun_shuffled_1.csv')

In [14]:
desc2=df[['Model','Train_size','Test_AUC1']].groupby(['Model','Train_size']).describe()
desc3=df[['Model','Train_size','Test_AUC2']].groupby(['Model','Train_size']).describe()

In [15]:
desc3

Unnamed: 0_level_0,Unnamed: 1_level_0,Test_AUC2,Test_AUC2,Test_AUC2,Test_AUC2,Test_AUC2,Test_AUC2,Test_AUC2,Test_AUC2
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Model,Train_size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Bert only,49999,10.0,0.826546,0.001613,0.824605,0.82536,0.826234,0.827509,0.829852
