This notebook was created to support the data preparation required to support our CS 598 DLH project.  The paper we have chosen for the reproducibility project is:
***Ensembling Classical Machine Learning and Deep Learning Approaches for Morbidity Identification from Clinical Notes ***



 

The data cannot be shared publicly due to the agreements required to obtain the data so we are storing the data locally and not putting in GitHub.

In [183]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
import datetime
from datetime import timedelta
from tqdm import tqdm
import torchtext
from torch.utils.data import SubsetRandomSampler
from sklearn.model_selection import KFold

# set seed
seed = 24
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
# define data path
DATA_PATH = './obesity_data/'
RESULTS_PATH = './results/'
MODELS_PATH = './models/'

if os.path.exists(RESULTS_PATH) == False:
    os.mkdir(RESULTS_PATH)
if os.path.exists(MODELS_PATH) == False:
    os.mkdir(MODELS_PATH)


#test_df = pd.read_pickle(DATA_PATH + '/test.pkl') 
#train_df = pd.read_pickle(DATA_PATH + '/train.pkl') 
#corpus = pd.read_pickle(DATA_PATH + '/corpus.pkl')
all_df = pd.read_pickle(DATA_PATH + '/all_df.pkl') 
all_df_expanded = pd.read_pickle(DATA_PATH + '/all_df_expanded.pkl')

#Get this from the create embeddings file
max_tokens = 1416

disease_list = test_df['disease'].unique().tolist()
embedding_list = ['GloVe', "FastText"]

In [184]:
#rebuild the vocabulary
#import torchtext, torch, torch.nn.functional as F
#from torchtext.data.utils import get_tokenizer
#from torchtext.vocab import build_vocab_from_iterator

#tokenizer = get_tokenizer("basic_english")
#tokens = [tokenizer(doc) for doc in corpus]

#voc = build_vocab_from_iterator(tokens, specials = ['<pad>'])

**Deep Learning - Bag of Words - All Feature Selections - Averaged**

![DL BagOfWords AllFeatures Averaged](images\DL-BagOfWords-ByFeatureSelection.gif)

****DL Model using word embeddings****

First we start by creating a dataset.  Note this will have to take the disease as part of the init and filter just for those records.

In [185]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class ClinicalNoteDataset(Dataset):

    def __init__(self, dataframe, disease, dataformat):
        """
        TODO: init the Dataset instance.  datafomat is just the column to use from the dataframe 'vector_tokenized' , 'one_hot'
        """
        # your code here
        self.disease = disease
        self.dataformat = dataformat
        self.df = dataframe[dataframe['disease'] == disease].copy()
        self.df = self.df.reset_index()

    def __len__(self):
        """
        TODO: Denotes the total number of samples
        """
        return len(self.df)

    def __getitem__(self, i):
        """
        TODO: Generates one sample of data
            return X, y for the i-th data.
        """
        #Cannot make tensors yet, will need to happen in collate
        Y = self.df.iloc[i]['judgment']
        X = self.df.iloc[i][self.dataformat]

        return X,Y
        
def vectorize_batch_GloVe(batch):
    embedding_size_used = 300
    vec = torchtext.vocab.GloVe(name='6B', dim=embedding_size_used)    
    Xi, Yi = batch[0]
    batch_size = len(batch)

    X = torch.zeros(batch_size, len(Xi), embedding_size_used, dtype=torch.float)
    Y = torch.zeros((batch_size), dtype=torch.long)
    
    for i in range(len(batch)):
        x, y = batch[i]
        #vectors = vec.get_vecs_by_tokens(voc.lookup_tokens(x.tolist()))
        vectors = vec.get_vecs_by_tokens(x)

        X[i] = vectors.float()
        Y[i] = torch.tensor(float(y == True))

    return X,Y

def vectorize_batch_FastText(batch):
    embedding_size_used = 300
    vec = torchtext.vocab.FastText()
    Xi, Yi = batch[0]
    batch_size = len(batch)

    X = torch.zeros(batch_size, len(Xi), embedding_size_used, dtype=torch.float)
    Y = torch.zeros((batch_size), dtype=torch.long)

    for i in range(len(batch)):
        x, y = batch[i]
        #vectors = vec.get_vecs_by_tokens(voc.lookup_tokens(x.tolist()))
        vectors = vec.get_vecs_by_tokens(x)

        X[i] = vectors.float()
        Y[i] = torch.tensor(float(y == True))

    return X,Y 
 
          


In [186]:
##Test DataLoader
#batch_size = 128
#train_loader = torch.utils.data.DataLoader(ClinicalNoteDataset(train_df, 'Asthma', 'vector_tokenized'), batch_size = batch_size, shuffle=True, collate_fn=vectorize_batch_FastText)
#val_loader = torch.utils.data.DataLoader(ClinicalNoteDataset(test_df, 'Asthma', 'vector_tokenized'), batch_size = batch_size, shuffle=False, collate_fn=vectorize_batch_FastText)

#print("# of train batches:", len(train_loader))
#print("# of val batches:", len(val_loader))

#train_iter = iter(train_loader)
#x,y = next(train_iter)

#print(x.shape)
#print(y.shape)


In [187]:
class ClincalNoteEmbeddingNet(nn.Module):
    def __init__(self, embedding_type, max_tokens):
        super(ClincalNoteEmbeddingNet, self).__init__()
        
        self.embedding_type = embedding_type
        self.max_tokens = max_tokens

        if(embedding_type == 'USE'):
            self.embedding_dimension = 512
        else:
            self.embedding_dimension = 300

        self.hidden_dim1 = 128
        self.hidden_dim2 = 64
        self.num_layers = 1

        #Because it is bidirectional, the output from LTSM is coming in twice the size of the hidden states required.
        #input is (batch, #of tokens * embedding_dimension)
        self.bilstm1 = nn.LSTM(input_size = self.embedding_dimension, hidden_size = self.hidden_dim1, bidirectional = True, batch_first = True, num_layers = self.num_layers, bias = False) 
        self.bilstm2 = nn.LSTM(input_size = self.hidden_dim1 * 2, hidden_size = self.hidden_dim2, bidirectional = True, batch_first = True, num_layers=self.num_layers, bias = False)
 
        self.fc1 = nn.Linear(self.hidden_dim2 * self.max_tokens * 2, 2)
        #self.fc2 = nn.Linear(self.hidden_dim2 , 2)
 
    def forward(self, x):
        x, states = self.bilstm1(x)
        x, states = self.bilstm2(x)
        
        batch_size = x.shape[0]
        x = x.reshape(batch_size, -1)
        x = self.fc1(x)
        ##x = self.fc2(x)
        #x = F.softmax(x, dim=1) #Used with NLLLLoss()

        return x #F.sigmoid(x).squeeze(dim=-1)
        #return torch.nan_to_num(x) #F.sigmoid(x).squeeze(dim=-1)



In [188]:
eps=1e-10

def train_model(tmodel, train_dataloader, n_epoch=5, lr=0.003, device=None, model_name='unk'):
    import torch.optim as optim
    
    device = device or torch.device('cpu')
    tmodel.train()

    loss_history = []

    # your code here
    optimizer = optim.Adam(tmodel.parameters(), lr=lr)
    # want to decay the learning rate as teh number of epochs get larger
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma = 0.1)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
        factor=0.1, patience=10, threshold=0.0001, threshold_mode='abs')

    #loss_func = nn.BCELoss()
    loss_func = nn.CrossEntropyLoss()
    #loss_func = nn.NLLLoss()

    for epoch in range(n_epoch):
        epoch = epoch+1
        curr_epoch_loss = []
        start = time.time()
        for X, Y in tqdm(train_dataloader,desc=f"Training {model_name}-Lr{str(lr)}-Epoch{epoch}..."):
            # your code here
            optimizer.zero_grad()

            y_hat = tmodel(X)

            loss = loss_func(y_hat, Y)
            #loss = loss_func(torch.log(y_hat+ eps), Y)
            
            loss.backward()
            optimizer.step()
            scheduler.step(loss)
            
            curr_epoch_loss.append(loss.cpu().data.numpy())


        end = time.time()
        #if epoch % 10 == 0:
        print(f"epoch{epoch}: curr_epoch_loss={np.mean(curr_epoch_loss)},execution_time={str(datetime.timedelta(seconds = (end-start)))},lr={optimizer.param_groups[0]['lr']}")

        #scheduler.step()
        loss_history += curr_epoch_loss
    return tmodel, loss_history

def eval_model(emodel, dataloader, device=None, model_name='unk'):
    """
    :return:
        pred_all: prediction of model on the dataloder.
        Y_test: truth labels. Should be an numpy array of ints
    TODO:
        evaluate the model using on the data in the dataloder.
        Add all the prediction and truth to the corresponding list
        Convert pred_all and Y_test to numpy arrays 
    """
    device = device or torch.device('cpu')
    emodel.eval()
    pred_all = []
    Y_test = []
    for X, Y in tqdm(dataloader, desc=f"Evaluating {model_name}..."):
        # your code here
        y_hat = emodel(X)
        
        pred_all.append(y_hat.detach().to('cpu'))
        Y_test.append(Y.detach().to('cpu'))
        
    pred_all = np.concatenate(pred_all, axis=0)
    Y_test = np.concatenate(Y_test, axis=0)

    return pred_all, Y_test

In [189]:
def evaluate_predictions(truth, pred):
    """
    TODO: Evaluate the performance of the predictoin via AUROC, and F1 score
    each prediction in pred is a vector representing [p_0, p_1].
    When defining the scores we are interesed in detecting class 1 only
    (Hint: use roc_auc_score and f1_score from sklearn.metrics, be sure to read their documentation)
    return: auroc, f1
    """
    from sklearn.metrics import roc_auc_score, f1_score

    # your code here
    auroc = roc_auc_score(truth, pred[:,1])
    f1 = f1_score(truth, np.argmax(pred,axis=1))
    f1_macro = f1_score(truth, np.argmax(pred,axis=1),average='macro')
    f1_micro = f1_score(truth, np.argmax(pred,axis=1),average='micro')

    return auroc, f1, f1_macro, f1_micro

Need to create a loop to train and evaluate

In [190]:
#Results will be stored as rows:diseases, cols:embedding,results(4 metrics), runtime
result_cols = ['Batch','Disease','Embedding','AUROC','F1','F1_MACRO', 'F1_MICRO', 'Exec Time', 'Total Run (secs)','Epochs', 'LR']
result_time = datetime.datetime.now()
#result_name = result_time.strftime("%Y-%m-%d-%H-%M-%S")
results_file = f'{RESULTS_PATH}DL_embedding_results.csv'
#results_file = f'{RESULTS_PATH}DL_embedding_results_{result_name}.csv'

def trainAndEvaluate(train_loader, val_loader, model, model_desc, batch_name, disease, lr,  dataformat, embedding, device, n_epoch):
            
    return_val = False

    start_train = time.time()
    model, loss_history = train_model(model, train_loader, n_epoch=n_epoch, lr = lr, device=device, model_name=model_desc)
    end_train = time.time()

    try:
        #Evaluate model
        start_eval = time.time()
        pred, truth = eval_model(model, val_loader, device=device, model_name=model_desc)
        end_eval = time.time()

        auroc, f1, f1_macro, f1_micro = evaluate_predictions(truth, pred)
        runtime = f"Trn,Eval,Ttl={str(datetime.timedelta(seconds = (end_train-start_train)))},{str(datetime.timedelta(seconds = (end_eval-start_eval)))},{str(datetime.timedelta(seconds = (end_eval-start_train)))}"
        runtime_sec = end_eval-start_train

        return_val = True

    except:
        auroc = -1
        f1=-1
        f1_macro = -1
        f1_micro = -1
        runtime_sec = end_train-start_train
        runtime = 'Failure'
        print("Failure!")


    #Append to results
    if os.path.exists(results_file):
        results = pd.read_csv(results_file)
    else:
        results = pd.DataFrame(columns=result_cols)

    result = pd.DataFrame(columns=result_cols,data=[[batch_name, disease,embedding,auroc,f1,f1_macro,f1_micro,runtime,runtime_sec,n_epoch,lr]])
    results = pd.concat([results,result])

    #Save results - overwrite so we can see progress
    results.to_csv(results_file, index=False)

    return return_val

def iterateTrainAndEvaluate(df, k, disease_list, embedding_list, lr_list, 
                            batch_name, dataformat, device, max_tokens, n_epoch):

    for _,disease in enumerate(disease_list):
        for _,embedding in enumerate(embedding_list):
            for _,lr in enumerate(lr_list):
                #Create a name for the model
                model_name = f"{disease}_{embedding}_{batch_name}"

                #Create model
                model = ClincalNoteEmbeddingNet(embedding, max_tokens = max_tokens)
                model = model.to(device)

                ds = ClinicalNoteDataset(df, disease, dataformat)

                splits=KFold(n_splits=k,shuffle=True,random_state=seed)

                for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(len(ds)))):
                    #for now, let's keep the results at the fold level
                    
                    train_sampler = SubsetRandomSampler(train_idx)
                    val_sampler = SubsetRandomSampler(val_idx)
                    #Load Data 
                    if embedding == 'GloVe':
                        custom_collate=vectorize_batch_GloVe
                    if embedding == 'FastText':
                        custom_collate=vectorize_batch_FastText
                    train_loader = torch.utils.data.DataLoader(ds, batch_size = batch_size, sampler=train_sampler, collate_fn=custom_collate)
                    val_loader = torch.utils.data.DataLoader(ds, batch_size = batch_size, sampler=val_sampler, collate_fn=custom_collate)
                    
                    model_desc = f"{disease}_{embedding}_Fold{fold+1}"

                    trainAndEvaluate(train_loader, val_loader, model, model_desc, batch_name, disease, lr, dataformat, embedding, device, n_epoch)

                #Save model
                torch.save(model.state_dict(), f'{MODELS_PATH}{model_name}.pkl')

                #Delete model
                del model
                

In [193]:
#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
print(f'Using device: {device}')

#Override these if need be
#disease_list = ['Asthma', 'CAD', 'CHF', 'Depression', 'Diabetes', 'Gallstones', 'GERD', 'Gout', 'Hypercholesterolemia', 'Hypertension', 'Hypertriglyceridemia', 'OA', 'OSA', 'PVD', 'Venous Insufficiency', 'Obesity']
disease_list = ['Asthma']
embedding_list = ['GloVe']
#0.01 seems to be the most effective, added decay logic - starting at 0.1 seems to cause NaNs, if fix those it gets "stuck"
lr_list = [0.01]

#training parameters
n_epoch = 10
batch_size = 128
k = 10

#These should not change
dataformat = 'vector_tokenized'

result_time = datetime.datetime.now()
result_name = result_time.strftime("%Y-%m-%d-%H-%M-%S")
batch_name = f'DL_embedding_results_{result_name}'

iterateTrainAndEvaluate(all_df_expanded, k, disease_list, embedding_list, lr_list, batch_name, dataformat, device, max_tokens, n_epoch)



Using device: cpu


Training Asthma_GloVe_Fold1-Lr0.01-Epoch1...: 100%|██████████| 9/9 [02:24<00:00, 16.10s/it]


epoch1: curr_epoch_loss=71.01609802246094,execution_time=0:02:24.888387,lr=0.01


Training Asthma_GloVe_Fold1-Lr0.01-Epoch2...: 100%|██████████| 9/9 [02:00<00:00, 13.41s/it]


epoch2: curr_epoch_loss=8.45515251159668,execution_time=0:02:00.689727,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch3...: 100%|██████████| 9/9 [01:26<00:00,  9.62s/it]


epoch3: curr_epoch_loss=1.0363293886184692,execution_time=0:01:26.557317,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch4...: 100%|██████████| 9/9 [01:26<00:00,  9.64s/it]


epoch4: curr_epoch_loss=0.6811869740486145,execution_time=0:01:26.804046,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch5...: 100%|██████████| 9/9 [01:27<00:00,  9.75s/it]


epoch5: curr_epoch_loss=0.48714861273765564,execution_time=0:01:27.780100,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch6...: 100%|██████████| 9/9 [01:28<00:00,  9.78s/it]


epoch6: curr_epoch_loss=0.40845373272895813,execution_time=0:01:28.005036,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch7...: 100%|██████████| 9/9 [01:29<00:00,  9.95s/it]


epoch7: curr_epoch_loss=0.33146846294403076,execution_time=0:01:29.592533,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch8...: 100%|██████████| 9/9 [01:36<00:00, 10.73s/it]


epoch8: curr_epoch_loss=0.28315895795822144,execution_time=0:01:36.557632,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch9...: 100%|██████████| 9/9 [01:31<00:00, 10.19s/it]


epoch9: curr_epoch_loss=0.25402531027793884,execution_time=0:01:31.703923,lr=0.001


Training Asthma_GloVe_Fold1-Lr0.01-Epoch10...: 100%|██████████| 9/9 [01:31<00:00, 10.19s/it]


epoch10: curr_epoch_loss=0.21906030178070068,execution_time=0:01:31.682800,lr=0.001


Evaluating Asthma_GloVe_Fold1...: 100%|██████████| 1/1 [00:04<00:00,  4.23s/it]
Training Asthma_GloVe_Fold2-Lr0.01-Epoch1...: 100%|██████████| 9/9 [02:20<00:00, 15.59s/it]


epoch1: curr_epoch_loss=45.212608337402344,execution_time=0:02:20.271214,lr=0.01


Training Asthma_GloVe_Fold2-Lr0.01-Epoch2...: 100%|██████████| 9/9 [02:23<00:00, 15.90s/it]


epoch2: curr_epoch_loss=4.893838405609131,execution_time=0:02:23.098831,lr=0.001


Training Asthma_GloVe_Fold2-Lr0.01-Epoch3...: 100%|██████████| 9/9 [01:45<00:00, 11.75s/it]


epoch3: curr_epoch_loss=1.4456185102462769,execution_time=0:01:45.766044,lr=0.0001


Training Asthma_GloVe_Fold2-Lr0.01-Epoch4...: 100%|██████████| 9/9 [01:44<00:00, 11.64s/it]


epoch4: curr_epoch_loss=0.9366708397865295,execution_time=0:01:44.786191,lr=1e-05


Training Asthma_GloVe_Fold2-Lr0.01-Epoch5...: 100%|██████████| 9/9 [01:43<00:00, 11.48s/it]


epoch5: curr_epoch_loss=0.8043111562728882,execution_time=0:01:43.284419,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold2-Lr0.01-Epoch6...: 100%|██████████| 9/9 [01:47<00:00, 11.98s/it]


epoch6: curr_epoch_loss=0.8137482404708862,execution_time=0:01:47.810153,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold2-Lr0.01-Epoch7...: 100%|██████████| 9/9 [01:45<00:00, 11.77s/it]


epoch7: curr_epoch_loss=0.7807859182357788,execution_time=0:01:45.940063,lr=1.0000000000000002e-07


Training Asthma_GloVe_Fold2-Lr0.01-Epoch8...: 100%|██████████| 9/9 [01:46<00:00, 11.86s/it]


epoch8: curr_epoch_loss=0.8101566433906555,execution_time=0:01:46.755535,lr=1.0000000000000004e-08


Training Asthma_GloVe_Fold2-Lr0.01-Epoch9...: 100%|██████████| 9/9 [01:45<00:00, 11.72s/it]


epoch9: curr_epoch_loss=0.8027894496917725,execution_time=0:01:45.479833,lr=1.0000000000000004e-08


Training Asthma_GloVe_Fold2-Lr0.01-Epoch10...: 100%|██████████| 9/9 [01:47<00:00, 11.98s/it]


epoch10: curr_epoch_loss=0.813107967376709,execution_time=0:01:47.799401,lr=1.0000000000000004e-08


Evaluating Asthma_GloVe_Fold2...: 100%|██████████| 1/1 [00:03<00:00,  3.94s/it]
Training Asthma_GloVe_Fold3-Lr0.01-Epoch1...: 100%|██████████| 9/9 [02:22<00:00, 15.82s/it]


epoch1: curr_epoch_loss=21.693382263183594,execution_time=0:02:22.385430,lr=0.01


Training Asthma_GloVe_Fold3-Lr0.01-Epoch2...: 100%|██████████| 9/9 [03:24<00:00, 22.71s/it]


epoch2: curr_epoch_loss=5.9086174964904785,execution_time=0:03:24.431940,lr=0.001


Training Asthma_GloVe_Fold3-Lr0.01-Epoch3...: 100%|██████████| 9/9 [01:44<00:00, 11.56s/it]


epoch3: curr_epoch_loss=1.397965908050537,execution_time=0:01:44.020682,lr=0.001


Training Asthma_GloVe_Fold3-Lr0.01-Epoch4...: 100%|██████████| 9/9 [01:38<00:00, 10.94s/it]


epoch4: curr_epoch_loss=0.8790816068649292,execution_time=0:01:38.429315,lr=0.001


Training Asthma_GloVe_Fold3-Lr0.01-Epoch5...: 100%|██████████| 9/9 [01:39<00:00, 11.11s/it]


epoch5: curr_epoch_loss=0.380769819021225,execution_time=0:01:39.968047,lr=0.0001


Training Asthma_GloVe_Fold3-Lr0.01-Epoch6...: 100%|██████████| 9/9 [01:38<00:00, 10.97s/it]


epoch6: curr_epoch_loss=0.28922200202941895,execution_time=0:01:38.770595,lr=1e-05


Training Asthma_GloVe_Fold3-Lr0.01-Epoch7...: 100%|██████████| 9/9 [01:38<00:00, 10.99s/it]


epoch7: curr_epoch_loss=0.26320740580558777,execution_time=0:01:38.923374,lr=1e-05


Training Asthma_GloVe_Fold3-Lr0.01-Epoch8...: 100%|██████████| 9/9 [01:38<00:00, 10.96s/it]


epoch8: curr_epoch_loss=0.26850980520248413,execution_time=0:01:38.660562,lr=1e-05


Training Asthma_GloVe_Fold3-Lr0.01-Epoch9...: 100%|██████████| 9/9 [01:39<00:00, 11.01s/it]


epoch9: curr_epoch_loss=0.25710684061050415,execution_time=0:01:39.060937,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold3-Lr0.01-Epoch10...: 100%|██████████| 9/9 [01:39<00:00, 11.03s/it]


epoch10: curr_epoch_loss=0.25960177183151245,execution_time=0:01:39.247111,lr=1.0000000000000002e-07


Evaluating Asthma_GloVe_Fold3...: 100%|██████████| 1/1 [00:04<00:00,  4.27s/it]
Training Asthma_GloVe_Fold4-Lr0.01-Epoch1...: 100%|██████████| 9/9 [02:22<00:00, 15.84s/it]


epoch1: curr_epoch_loss=4.6411638259887695,execution_time=0:02:22.518221,lr=0.01


Training Asthma_GloVe_Fold4-Lr0.01-Epoch2...: 100%|██████████| 9/9 [02:06<00:00, 14.05s/it]


epoch2: curr_epoch_loss=0.35415923595428467,execution_time=0:02:06.419914,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch3...: 100%|██████████| 9/9 [01:56<00:00, 12.99s/it]


epoch3: curr_epoch_loss=0.17330637574195862,execution_time=0:01:56.950918,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch4...: 100%|██████████| 9/9 [01:55<00:00, 12.84s/it]


epoch4: curr_epoch_loss=0.09950761497020721,execution_time=0:01:55.601566,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch5...: 100%|██████████| 9/9 [01:59<00:00, 13.25s/it]


epoch5: curr_epoch_loss=0.07807744294404984,execution_time=0:01:59.276290,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch6...: 100%|██████████| 9/9 [01:55<00:00, 12.82s/it]


epoch6: curr_epoch_loss=0.05767979845404625,execution_time=0:01:55.409633,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch7...: 100%|██████████| 9/9 [01:55<00:00, 12.88s/it]


epoch7: curr_epoch_loss=0.04008650407195091,execution_time=0:01:55.896577,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch8...: 100%|██████████| 9/9 [01:57<00:00, 13.10s/it]


epoch8: curr_epoch_loss=0.03139793127775192,execution_time=0:01:57.872068,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch9...: 100%|██████████| 9/9 [02:02<00:00, 13.61s/it]


epoch9: curr_epoch_loss=0.026281826198101044,execution_time=0:02:02.517981,lr=0.001


Training Asthma_GloVe_Fold4-Lr0.01-Epoch10...: 100%|██████████| 9/9 [02:05<00:00, 13.89s/it]


epoch10: curr_epoch_loss=0.022954968735575676,execution_time=0:02:05.007643,lr=0.001


Evaluating Asthma_GloVe_Fold4...: 100%|██████████| 1/1 [00:04<00:00,  4.25s/it]
Training Asthma_GloVe_Fold5-Lr0.01-Epoch1...: 100%|██████████| 9/9 [01:08<00:00,  7.61s/it]


epoch1: curr_epoch_loss=2.5760397911071777,execution_time=0:01:08.529227,lr=0.01


Training Asthma_GloVe_Fold5-Lr0.01-Epoch2...: 100%|██████████| 9/9 [01:25<00:00,  9.49s/it]


epoch2: curr_epoch_loss=0.07633360475301743,execution_time=0:01:25.458908,lr=0.01


Training Asthma_GloVe_Fold5-Lr0.01-Epoch3...: 100%|██████████| 9/9 [02:14<00:00, 14.96s/it]


epoch3: curr_epoch_loss=0.35409343242645264,execution_time=0:02:14.668126,lr=0.001


Training Asthma_GloVe_Fold5-Lr0.01-Epoch4...: 100%|██████████| 9/9 [02:16<00:00, 15.16s/it]


epoch4: curr_epoch_loss=0.02381928265094757,execution_time=0:02:16.410319,lr=0.001


Training Asthma_GloVe_Fold5-Lr0.01-Epoch5...: 100%|██████████| 9/9 [02:03<00:00, 13.68s/it]


epoch5: curr_epoch_loss=0.0071824598126113415,execution_time=0:02:03.153841,lr=0.001


Training Asthma_GloVe_Fold5-Lr0.01-Epoch6...: 100%|██████████| 9/9 [02:03<00:00, 13.74s/it]


epoch6: curr_epoch_loss=0.004649631213396788,execution_time=0:02:03.668820,lr=0.001


Training Asthma_GloVe_Fold5-Lr0.01-Epoch7...: 100%|██████████| 9/9 [02:13<00:00, 14.85s/it]


epoch7: curr_epoch_loss=0.0028192491736263037,execution_time=0:02:13.676764,lr=0.0001


Training Asthma_GloVe_Fold5-Lr0.01-Epoch8...: 100%|██████████| 9/9 [02:06<00:00, 14.00s/it]


epoch8: curr_epoch_loss=0.0026380433700978756,execution_time=0:02:06.005140,lr=1e-05


Training Asthma_GloVe_Fold5-Lr0.01-Epoch9...: 100%|██████████| 9/9 [02:06<00:00, 14.07s/it]


epoch9: curr_epoch_loss=0.0026664482429623604,execution_time=0:02:06.643783,lr=1e-05


Training Asthma_GloVe_Fold5-Lr0.01-Epoch10...: 100%|██████████| 9/9 [02:04<00:00, 13.79s/it]


epoch10: curr_epoch_loss=0.002618715399876237,execution_time=0:02:04.156503,lr=1.0000000000000002e-06


Evaluating Asthma_GloVe_Fold5...: 100%|██████████| 1/1 [00:04<00:00,  4.47s/it]
Training Asthma_GloVe_Fold6-Lr0.01-Epoch1...: 100%|██████████| 9/9 [02:30<00:00, 16.77s/it]


epoch1: curr_epoch_loss=2.6602578163146973,execution_time=0:02:30.953775,lr=0.01


Training Asthma_GloVe_Fold6-Lr0.01-Epoch2...: 100%|██████████| 9/9 [03:07<00:00, 20.84s/it]


epoch2: curr_epoch_loss=0.29885295033454895,execution_time=0:03:07.546874,lr=0.01


Training Asthma_GloVe_Fold6-Lr0.01-Epoch3...: 100%|██████████| 9/9 [03:18<00:00, 22.09s/it]


epoch3: curr_epoch_loss=0.1552000343799591,execution_time=0:03:18.792862,lr=0.01


Training Asthma_GloVe_Fold6-Lr0.01-Epoch4...: 100%|██████████| 9/9 [02:47<00:00, 18.57s/it]


epoch4: curr_epoch_loss=0.27278828620910645,execution_time=0:02:47.131604,lr=0.01


Training Asthma_GloVe_Fold6-Lr0.01-Epoch5...: 100%|██████████| 9/9 [02:38<00:00, 17.63s/it]


epoch5: curr_epoch_loss=0.062701515853405,execution_time=0:02:38.694330,lr=0.001


Training Asthma_GloVe_Fold6-Lr0.01-Epoch6...: 100%|██████████| 9/9 [02:31<00:00, 16.86s/it]


epoch6: curr_epoch_loss=0.011702585965394974,execution_time=0:02:31.760983,lr=0.0001


Training Asthma_GloVe_Fold6-Lr0.01-Epoch7...: 100%|██████████| 9/9 [02:32<00:00, 16.94s/it]


epoch7: curr_epoch_loss=0.008369348011910915,execution_time=0:02:32.433235,lr=1e-05


Training Asthma_GloVe_Fold6-Lr0.01-Epoch8...: 100%|██████████| 9/9 [02:31<00:00, 16.85s/it]


epoch8: curr_epoch_loss=0.007181448396295309,execution_time=0:02:31.647842,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold6-Lr0.01-Epoch9...: 100%|██████████| 9/9 [02:30<00:00, 16.68s/it]


epoch9: curr_epoch_loss=0.006192769855260849,execution_time=0:02:30.150136,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold6-Lr0.01-Epoch10...: 100%|██████████| 9/9 [02:22<00:00, 15.84s/it]


epoch10: curr_epoch_loss=0.008717436343431473,execution_time=0:02:22.600117,lr=1.0000000000000002e-07


Evaluating Asthma_GloVe_Fold6...: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]
Training Asthma_GloVe_Fold7-Lr0.01-Epoch1...: 100%|██████████| 9/9 [03:24<00:00, 22.67s/it]


epoch1: curr_epoch_loss=0.1423616111278534,execution_time=0:03:24.042566,lr=0.01


Training Asthma_GloVe_Fold7-Lr0.01-Epoch2...: 100%|██████████| 9/9 [03:14<00:00, 21.60s/it]


epoch2: curr_epoch_loss=0.05426415801048279,execution_time=0:03:14.375432,lr=0.001


Training Asthma_GloVe_Fold7-Lr0.01-Epoch3...: 100%|██████████| 9/9 [03:31<00:00, 23.49s/it]


epoch3: curr_epoch_loss=0.008602012880146503,execution_time=0:03:31.375959,lr=0.0001


Training Asthma_GloVe_Fold7-Lr0.01-Epoch4...: 100%|██████████| 9/9 [03:35<00:00, 23.98s/it]


epoch4: curr_epoch_loss=0.002024332992732525,execution_time=0:03:35.819896,lr=0.0001


Training Asthma_GloVe_Fold7-Lr0.01-Epoch5...: 100%|██████████| 9/9 [03:29<00:00, 23.28s/it]


epoch5: curr_epoch_loss=0.0010438247118145227,execution_time=0:03:29.515556,lr=1e-05


Training Asthma_GloVe_Fold7-Lr0.01-Epoch6...: 100%|██████████| 9/9 [03:32<00:00, 23.62s/it]


epoch6: curr_epoch_loss=0.001054993481375277,execution_time=0:03:32.562196,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold7-Lr0.01-Epoch7...: 100%|██████████| 9/9 [03:30<00:00, 23.34s/it]


epoch7: curr_epoch_loss=0.0009487658389844,execution_time=0:03:30.107804,lr=1.0000000000000002e-07


Training Asthma_GloVe_Fold7-Lr0.01-Epoch8...: 100%|██████████| 9/9 [03:32<00:00, 23.66s/it]


epoch8: curr_epoch_loss=0.0011624816106632352,execution_time=0:03:32.956838,lr=1.0000000000000002e-07


Training Asthma_GloVe_Fold7-Lr0.01-Epoch9...: 100%|██████████| 9/9 [03:32<00:00, 23.57s/it]


epoch9: curr_epoch_loss=0.0010731513611972332,execution_time=0:03:32.109352,lr=1.0000000000000004e-08


Training Asthma_GloVe_Fold7-Lr0.01-Epoch10...: 100%|██████████| 9/9 [03:33<00:00, 23.70s/it]


epoch10: curr_epoch_loss=0.000945442239753902,execution_time=0:03:33.308323,lr=1.0000000000000004e-08


Evaluating Asthma_GloVe_Fold7...: 100%|██████████| 1/1 [00:04<00:00,  4.58s/it]
Training Asthma_GloVe_Fold8-Lr0.01-Epoch1...: 100%|██████████| 9/9 [03:21<00:00, 22.42s/it]


epoch1: curr_epoch_loss=0.03969817981123924,execution_time=0:03:21.763937,lr=0.01


Training Asthma_GloVe_Fold8-Lr0.01-Epoch2...: 100%|██████████| 9/9 [03:05<00:00, 20.57s/it]


epoch2: curr_epoch_loss=0.032251954078674316,execution_time=0:03:05.175698,lr=0.01


Training Asthma_GloVe_Fold8-Lr0.01-Epoch3...: 100%|██████████| 9/9 [03:31<00:00, 23.45s/it]


epoch3: curr_epoch_loss=0.029665105044841766,execution_time=0:03:31.057213,lr=0.01


Training Asthma_GloVe_Fold8-Lr0.01-Epoch4...: 100%|██████████| 9/9 [03:34<00:00, 23.83s/it]


epoch4: curr_epoch_loss=0.0734778642654419,execution_time=0:03:34.494299,lr=0.001


Training Asthma_GloVe_Fold8-Lr0.01-Epoch5...: 100%|██████████| 9/9 [03:47<00:00, 25.26s/it]


epoch5: curr_epoch_loss=0.005172102712094784,execution_time=0:03:47.379024,lr=0.0001


Training Asthma_GloVe_Fold8-Lr0.01-Epoch6...: 100%|██████████| 9/9 [03:51<00:00, 25.67s/it]


epoch6: curr_epoch_loss=0.0034219962544739246,execution_time=0:03:51.025369,lr=1e-05


Training Asthma_GloVe_Fold8-Lr0.01-Epoch7...: 100%|██████████| 9/9 [03:46<00:00, 25.14s/it]


epoch7: curr_epoch_loss=0.0015488622011616826,execution_time=0:03:46.273649,lr=1e-05


Training Asthma_GloVe_Fold8-Lr0.01-Epoch8...: 100%|██████████| 9/9 [03:44<00:00, 24.99s/it]


epoch8: curr_epoch_loss=0.002057749778032303,execution_time=0:03:44.932025,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold8-Lr0.01-Epoch9...: 100%|██████████| 9/9 [03:45<00:00, 25.09s/it]


epoch9: curr_epoch_loss=0.0011950017651543021,execution_time=0:03:45.768919,lr=1.0000000000000002e-07


Training Asthma_GloVe_Fold8-Lr0.01-Epoch10...: 100%|██████████| 9/9 [03:53<00:00, 25.91s/it]


epoch10: curr_epoch_loss=0.00204699975438416,execution_time=0:03:53.203476,lr=1.0000000000000004e-08


Evaluating Asthma_GloVe_Fold8...: 100%|██████████| 1/1 [00:04<00:00,  4.45s/it]
Training Asthma_GloVe_Fold9-Lr0.01-Epoch1...: 100%|██████████| 9/9 [04:05<00:00, 27.24s/it]


epoch1: curr_epoch_loss=0.05793356895446777,execution_time=0:04:05.177924,lr=0.01


Training Asthma_GloVe_Fold9-Lr0.01-Epoch2...: 100%|██████████| 9/9 [03:58<00:00, 26.54s/it]


epoch2: curr_epoch_loss=0.053879182785749435,execution_time=0:03:58.834944,lr=0.001


Training Asthma_GloVe_Fold9-Lr0.01-Epoch3...: 100%|██████████| 9/9 [03:48<00:00, 25.34s/it]


epoch3: curr_epoch_loss=0.0207254309207201,execution_time=0:03:48.072902,lr=0.0001


Training Asthma_GloVe_Fold9-Lr0.01-Epoch4...: 100%|██████████| 9/9 [03:52<00:00, 25.78s/it]


epoch4: curr_epoch_loss=0.0012652085861191154,execution_time=0:03:52.029174,lr=0.0001


Training Asthma_GloVe_Fold9-Lr0.01-Epoch5...: 100%|██████████| 9/9 [03:48<00:00, 25.42s/it]


epoch5: curr_epoch_loss=0.0007483389344997704,execution_time=0:03:48.786752,lr=1e-05


Training Asthma_GloVe_Fold9-Lr0.01-Epoch6...: 100%|██████████| 9/9 [03:47<00:00, 25.25s/it]


epoch6: curr_epoch_loss=0.0007495496538467705,execution_time=0:03:47.264775,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold9-Lr0.01-Epoch7...: 100%|██████████| 9/9 [03:49<00:00, 25.45s/it]


epoch7: curr_epoch_loss=0.0007239136029966176,execution_time=0:03:49.053655,lr=1.0000000000000002e-07


Training Asthma_GloVe_Fold9-Lr0.01-Epoch8...: 100%|██████████| 9/9 [03:51<00:00, 25.71s/it]


epoch8: curr_epoch_loss=0.0007446889649145305,execution_time=0:03:51.395345,lr=1.0000000000000002e-07


Training Asthma_GloVe_Fold9-Lr0.01-Epoch9...: 100%|██████████| 9/9 [03:46<00:00, 25.16s/it]


epoch9: curr_epoch_loss=0.000721665273886174,execution_time=0:03:46.442793,lr=1.0000000000000004e-08


Training Asthma_GloVe_Fold9-Lr0.01-Epoch10...: 100%|██████████| 9/9 [03:47<00:00, 25.24s/it]


epoch10: curr_epoch_loss=0.0007281989674083889,execution_time=0:03:47.138356,lr=1.0000000000000004e-08


Evaluating Asthma_GloVe_Fold9...: 100%|██████████| 1/1 [00:04<00:00,  4.37s/it]
Training Asthma_GloVe_Fold10-Lr0.01-Epoch1...: 100%|██████████| 9/9 [04:09<00:00, 27.77s/it]


epoch1: curr_epoch_loss=0.04182969033718109,execution_time=0:04:09.949060,lr=0.01


Training Asthma_GloVe_Fold10-Lr0.01-Epoch2...: 100%|██████████| 9/9 [04:14<00:00, 28.25s/it]


epoch2: curr_epoch_loss=0.04186101630330086,execution_time=0:04:14.213107,lr=0.001


Training Asthma_GloVe_Fold10-Lr0.01-Epoch3...: 100%|██████████| 9/9 [04:17<00:00, 28.65s/it]


epoch3: curr_epoch_loss=0.010446008294820786,execution_time=0:04:17.861351,lr=0.0001


Training Asthma_GloVe_Fold10-Lr0.01-Epoch4...: 100%|██████████| 9/9 [04:06<00:00, 27.39s/it]


epoch4: curr_epoch_loss=5.3112406021682546e-05,execution_time=0:04:06.489917,lr=0.0001


Training Asthma_GloVe_Fold10-Lr0.01-Epoch5...: 100%|██████████| 9/9 [04:09<00:00, 27.75s/it]


epoch5: curr_epoch_loss=6.542445771628991e-05,execution_time=0:04:09.767438,lr=1e-05


Training Asthma_GloVe_Fold10-Lr0.01-Epoch6...: 100%|██████████| 9/9 [04:08<00:00, 27.62s/it]


epoch6: curr_epoch_loss=5.501125269802287e-05,execution_time=0:04:08.601711,lr=1.0000000000000002e-06


Training Asthma_GloVe_Fold10-Lr0.01-Epoch7...: 100%|██████████| 9/9 [04:07<00:00, 27.49s/it]


epoch7: curr_epoch_loss=5.344785677152686e-05,execution_time=0:04:07.387125,lr=1.0000000000000002e-07


Training Asthma_GloVe_Fold10-Lr0.01-Epoch8...: 100%|██████████| 9/9 [04:06<00:00, 27.33s/it]


epoch8: curr_epoch_loss=5.296784365782514e-05,execution_time=0:04:06.015519,lr=1.0000000000000004e-08


Training Asthma_GloVe_Fold10-Lr0.01-Epoch9...: 100%|██████████| 9/9 [04:10<00:00, 27.86s/it]


epoch9: curr_epoch_loss=5.709937249775976e-05,execution_time=0:04:10.768100,lr=1.0000000000000004e-08


Training Asthma_GloVe_Fold10-Lr0.01-Epoch10...: 100%|██████████| 9/9 [04:07<00:00, 27.54s/it]


epoch10: curr_epoch_loss=5.316020906320773e-05,execution_time=0:04:07.841782,lr=1.0000000000000004e-08


Evaluating Asthma_GloVe_Fold10...: 100%|██████████| 1/1 [00:04<00:00,  4.26s/it]


In [192]:
results = pd.read_csv(results_file)
results

Unnamed: 0,Batch,Disease,Embedding,AUROC,F1,F1_MACRO,F1_MICRO,Exec Time,Total Run (secs),Epochs,LR
0,DL_embedding_results_2023-04-02-10-35-44,Asthma,GloVe,0.469660,0.266667,0.135489,0.155393,"Trn,Eval,Ttl=0:01:25.259418,0:00:18.889262,0:0...",104.148680,1,0.0100
1,DL_embedding_results_2023-04-02-10-35-44,Asthma,GloVe,0.556387,0.000000,0.458416,0.846435,"Trn,Eval,Ttl=0:00:52.649260,0:00:18.946454,0:0...",71.595714,1,0.0010
2,DL_embedding_results_2023-04-02-10-35-44,Asthma,GloVe,0.549393,0.000000,0.458416,0.846435,"Trn,Eval,Ttl=0:00:53.392456,0:00:19.083064,0:0...",72.475520,1,0.0001
3,DL_embedding_results_2023-04-02-10-52-18,Asthma,GloVe,-1.000000,-1.000000,-1.000000,-1.000000,Failure,40.544514,1,0.0500
4,DL_embedding_results_2023-04-02-10-52-18,Asthma,GloVe,0.487144,0.265372,0.155795,0.170018,"Trn,Eval,Ttl=0:01:35.918692,0:00:19.401176,0:0...",115.319869,1,0.0100
...,...,...,...,...,...,...,...,...,...,...,...
145,DL_embedding_results_2023-04-04-09-08-23_fold1...,Asthma,FastText,0.443993,0.188277,0.235426,0.238333,"Trn,Eval,Ttl=0:02:55.939435,0:00:40.595950,0:0...",216.535385,2,0.0100
146,DL_embedding_results_2023-04-04-09-33-07,Asthma,GloVe,0.539820,0.000000,0.455535,0.836667,"Trn,Eval,Ttl=0:03:25.741429,0:00:21.904964,0:0...",227.646393,2,0.0100
147,DL_embedding_results_2023-04-04-09-33-07,Asthma,GloVe,0.541209,0.000000,0.467140,0.876667,"Trn,Eval,Ttl=0:03:05.441430,0:00:21.173208,0:0...",206.614637,2,0.0100
148,DL_embedding_results_2023-04-04-09-33-07,Asthma,FastText,0.493841,0.000000,0.455535,0.836667,"Trn,Eval,Ttl=0:02:20.059131,0:00:40.920382,0:0...",180.979512,2,0.0100


**Deep Learning - Word Embeddings - All Features - With Stop Words**

![DL BagOfWords AllFeatures Averaged](images\dl-we-swyes.gif)