In [1]:
!pip install -q transformers

[K     |████████████████████████████████| 2.6 MB 15.2 MB/s 
[K     |████████████████████████████████| 3.3 MB 58.5 MB/s 
[K     |████████████████████████████████| 636 kB 46.2 MB/s 
[K     |████████████████████████████████| 895 kB 53.4 MB/s 
[?25h

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%matplotlib inline
import os
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
from sklearn.metrics import classification_report
import re
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer
from transformers import AutoTokenizer, AutoModel
from transformers import BertForSequenceClassification, AdamW, BertConfig
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.data.sampler import SubsetRandomSampler
import transformers
from transformers import RobertaTokenizer, BertTokenizer, RobertaModel, BertModel, AdamW# get_linear_schedule_with_warmup
from transformers import get_linear_schedule_with_warmup
import time

!cp drive/MyDrive/Colab\ Notebooks/MSc-Individual-Project/utils.py .
from utils import *
!cp drive/MyDrive/Colab\ Notebooks/MSc-Individual-Project/Custom_Dataset_Class.py .
from Custom_Dataset_Class import CustomDataset
!cp drive/MyDrive/Colab\ Notebooks/MSc-Individual-Project/pytorchtools.py .
from pytorchtools import EarlyStopping
#from Bert_Classification import Bert_Classification_Model
#from RoBERT import RoBERT_Model

#from BERT_Hierarchical import BERT_Hierarchical_Model
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelBinarizer

In [4]:
import torch
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")


There are 1 GPU(s) available.
We will use the GPU: Tesla V100-SXM2-16GB


In [5]:
np.random.seed(123)
torch.manual_seed(123)
torch.cuda.manual_seed_all(123)

In [9]:
#change to where you store mimic3 data
MIMIC_3_DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/datasets_date'

train_df = pd.read_csv('%s/train_50_week.csv' % MIMIC_3_DIR)
eval_df = pd.read_csv('%s/dev_50_week.csv' % MIMIC_3_DIR)
test_df = pd.read_csv('%s/test_50_week.csv' % MIMIC_3_DIR)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,70220,163685,dnr,511.9;414.01;038.9;276.0;285.1;995.92;424.0;51...,1
1,61500,145120,title,414.01;038.9;599.0;244.9;427.31,1
2,73970,130239,title,285.9,1
3,29969,129547,title,276.0;424.1;486;272.0;403.91,1
4,2830,193970,title,401.9;285.9;276.2;518.81;244.9;276.1;584.9;427.89,1


In [10]:
full_df = pd.concat([train_df, eval_df, test_df], ignore_index=True)

In [11]:
# split labels by ";", then convert to list
def split_lab (x):
    #print(x)
    return x.split(";")

full_df['LABELS'] = full_df['LABELS'].apply(split_lab)
#full_df['TEXT'] = full_df['TEXT'].apply(split_lab)

full_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,70220,163685,dnr,"[511.9, 414.01, 038.9, 276.0, 285.1, 995.92, 4...",1
1,61500,145120,title,"[414.01, 038.9, 599.0, 244.9, 427.31]",1
2,73970,130239,title,[285.9],1
3,29969,129547,title,"[276.0, 424.1, 486, 272.0, 403.91]",1
4,2830,193970,title,"[401.9, 285.9, 276.2, 518.81, 244.9, 276.1, 58...",1


In [12]:
#load multi label binarizer for one-hot encoding
mlb = MultiLabelBinarizer(sparse_output=True)

#labels_onehot = mlb.fit_transform(train_df.pop('LABELS'))
#labels_onehot[0][1]

In [13]:
#change label to one-hot encoding per code
full_df = full_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(full_df.pop('LABELS')),
                columns=mlb.classes_))

full_df

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,length,038.9,244.9,250.00,272.0,272.4,276.0,276.1,276.2,285.1,285.9,287.5,305.1,311,327.23,401.9,403.90,403.91,410.71,412,414.01,424.0,424.1,427.31,427.89,428.0,486,493.90,496,507.0,511.9,518.0,518.81,530.81,584.5,584.9,585.9,599.0,774.2,785.52,995.92,997.1,V05.3,V15.82,V29.0,V30.00,V30.01,V45.81,V45.82,V58.61,V58.67
0,70220,163685,dnr,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,61500,145120,title,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,73970,130239,title,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,29969,129547,title,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2830,193970,title,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128733,91588,185914,chief complaint altered mental status hpi y o ...,1984,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
128734,95705,184353,admission date discharge date date of birth se...,2008,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
128735,90780,111145,admission date discharge date date of birth se...,2028,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
128736,99339,142289,chief complaint chief complaint increasing ple...,2086,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [14]:
full_df.HADM_ID.unique().shape

(5280,)

In [15]:
# Convert columns to list of one hot encoding
icd_classes_50 = mlb.classes_

full_df['labels'] = full_df[icd_classes_50].values.tolist()
#train_df.sort_values(['length'], ascending=False, inplace=True)
full_df.head()


Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,length,038.9,244.9,250.00,272.0,272.4,276.0,276.1,276.2,285.1,285.9,287.5,305.1,311,327.23,401.9,403.90,403.91,410.71,412,414.01,424.0,424.1,427.31,427.89,428.0,486,493.90,496,507.0,511.9,518.0,518.81,530.81,584.5,584.9,585.9,599.0,774.2,785.52,995.92,997.1,V05.3,V15.82,V29.0,V30.00,V30.01,V45.81,V45.82,V58.61,V58.67,labels
0,70220,163685,dnr,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, ..."
1,61500,145120,title,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,73970,130239,title,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
3,29969,129547,title,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,2830,193970,title,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, ..."


In [16]:
#full_df = full_df.drop(full_df[full_df['length']<300].index)

In [17]:
#full_df

In [18]:
train_df, test_df = train_test_split(full_df, test_size=0.2)

In [19]:
train_df, eval_df = train_test_split(train_df, test_size=0.2)

In [20]:
dev_df, test_df = train_test_split(test_df, test_size=0.5)

In [21]:
train_df.sort_values(['length'], inplace=True)
eval_df.sort_values(['length'], inplace=True)
dev_df.sort_values(['length'], inplace=True)
test_df.sort_values(['length'], inplace=True)


In [22]:
#convert into 2 columns dataframe
train_df = pd.DataFrame(train_df, columns=['TEXT', 'labels'])
train_df.columns=['text', 'labels']
train_df.head()

eval_df = pd.DataFrame(eval_df, columns=['TEXT', 'labels'])
eval_df.columns=['text', 'labels']
eval_df.head()

dev_df = pd.DataFrame(dev_df, columns=['HADM_ID', 'TEXT', 'labels'])
dev_df.columns=['id', 'text', 'labels']
dev_df.head()

test_df = pd.DataFrame(test_df, columns=['HADM_ID', 'TEXT', 'labels'])
test_df.columns=['id', 'text', 'labels']
test_df.head()

Unnamed: 0,id,text,labels
2,130239,title,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
5,135298,npn,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
110153,103440,dnr dni,"[1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, ..."
21,163685,dnr mrsa,"[1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, ..."
28,139183,sinus tachycardia,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ..."


In [23]:
train_df.reset_index(drop=True, inplace=True)
eval_df.reset_index(drop=True, inplace=True)
dev_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
test_df.head()

Unnamed: 0,id,text,labels
0,130239,title,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,135298,npn,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,103440,dnr dni,"[1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, ..."
3,163685,dnr mrsa,"[1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, ..."
4,139183,sinus tachycardia,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ..."


In [24]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model.

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        '''
            Load Pretrained model here
            Use return_dict=False for compatibility for 4.x

        '''
        self.l1 = transformers.AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", return_dict=False)
        #self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)


        self.l2 = torch.nn.Dropout(0.3)

        '''
            Changed Linear Output layer to 50 based on the class
        '''
        self.l3 = torch.nn.Linear(768, 50)

    def forward(self, ids, mask, token_type_ids):
#        print("ids: ", ids.size(), "mask: ", mask.size(), "token type ids: ", token_type_ids.size())
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

model = BERTClass()
model.to(device)

Downloading:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [25]:
# Defining some key variables to configure model training
MAX_LEN = 512
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 8
TEST_BATCH_SIZE = 8
EPOCHS = 10
LEARNING_RATE = 3e-05

#set tokenizer
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

#custom dataset for BERT class
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        
        '''
            set text as training data
            set labels as targets
        '''
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }



Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [26]:
#load df to dataset

training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
valid_set = CustomDataset(eval_df, tokenizer, MAX_LEN)
dev_set = CustomDataset(dev_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

In [27]:
#data loader
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': False
                }

val_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False
                }

dev_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

training_loader = DataLoader(training_set, **train_params)
valid_loader = DataLoader(valid_set, **val_params)
dev_loader = DataLoader(dev_set, **dev_params)
testing_loader = DataLoader(testing_set, **test_params)

In [28]:
#loss function
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

#optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [29]:
def train(epoch):
    model.train()
    for _,data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        
    print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')

In [30]:
# Evaluate the model

def validation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(valid_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    print(f'Epoch: {epoch}, Validation Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
start_epoch=0
DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'
resume = True     
if resume:
    if os.path.isfile(f"%s/models/model_firstweek_epoch{start_epoch}.pth" % DIR):
        print("Resume from checkpoint...")
        checkpoint = torch.load(f"%s/models/model_firstweek_epoch{start_epoch}.pth" % DIR)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        initepoch = checkpoint['epoch']
        print("====>loaded checkpoint (epoch{})".format(checkpoint['epoch']))
    else:
        print("====>no checkpoint found.")
        initepoch = 0

#patience = 3
#early_stopping = EarlyStopping(patience, verbose=True)


for epoch in tqdm(range(EPOCHS)):
    train(epoch)
    validation(epoch)

    if (epoch+start_epoch+1)%5 == 0:
        checkpoint = {"model_state_dict": model.state_dict(),
                      "optimizer_state_dict": optimizer.state_dict(),
                      "epoch": epoch+start_epoch+1}
        path_checkpoint = f"%s/models/model_firstweek_epoch{epoch+start_epoch+1}.pth" % DIR
        torch.save(checkpoint, path_checkpoint)

#

====>no checkpoint found.


  0%|          | 0/10 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.34688708186149597


 10%|█         | 1/10 [46:41<7:00:12, 2801.43s/it]

Epoch: 0, Validation Loss:  0.27
Epoch: 1, Training Loss:  0.3064427971839905


 20%|██        | 2/10 [1:33:15<6:12:56, 2797.11s/it]

Epoch: 1, Validation Loss:  0.25
Epoch: 2, Training Loss:  0.2517176866531372


 30%|███       | 3/10 [2:19:36<5:25:29, 2789.88s/it]

Epoch: 2, Validation Loss:  0.23
Epoch: 3, Training Loss:  0.1817236840724945


 40%|████      | 4/10 [3:05:48<4:38:15, 2782.57s/it]

Epoch: 3, Validation Loss:  0.22
Epoch: 4, Training Loss:  0.15562181174755096
Epoch: 4, Validation Loss:  0.21


 50%|█████     | 5/10 [3:52:02<3:51:38, 2779.61s/it]

Epoch: 5, Training Loss:  0.12540245056152344


 60%|██████    | 6/10 [4:38:11<3:05:03, 2775.94s/it]

Epoch: 5, Validation Loss:  0.20
Epoch: 6, Training Loss:  0.11689731478691101


 70%|███████   | 7/10 [5:24:24<2:18:45, 2775.10s/it]

Epoch: 6, Validation Loss:  0.20
Epoch: 7, Training Loss:  0.061554379761219025


 80%|████████  | 8/10 [6:10:38<1:32:29, 2774.66s/it]

Epoch: 7, Validation Loss:  0.20
Epoch: 8, Training Loss:  0.04829096794128418


 90%|█████████ | 9/10 [6:56:51<46:14, 2774.06s/it]  

Epoch: 8, Validation Loss:  0.19
Epoch: 9, Training Loss:  0.06523396819829941
Epoch: 9, Validation Loss:  0.20


100%|██████████| 10/10 [7:43:08<00:00, 2778.90s/it]


In [31]:
DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'

checkpoint = torch.load(f"%s/models/model_firstweek_epoch10.pth" % DIR)
model.load_state_dict(checkpoint['model_state_dict'])



<All keys matched successfully>

In [29]:
# Evaluate the model

def evaluation():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(dev_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
dev_out, dev_tar, losses = evaluation()

Loss:  0.20


### Normal evaluation

In [None]:
outputs = np.array(dev_out) >= 0.5
targets = dev_tar
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.29905235358086063
F1 Score (Micro) = 0.6555369192963295
F1 Score (Macro) = 0.6143248347914421


In [None]:
print(classification_report(targets, outputs, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.7885    0.5548    0.6514      1559
       244.9     0.7446    0.5350    0.6226      1215
      250.00     0.5819    0.5883    0.5851      2373
       272.0     0.6439    0.3704    0.4703      1069
       272.4     0.5978    0.6756    0.6343      2004
       276.0     0.7603    0.5636    0.6473      1306
       276.1     0.8103    0.4791    0.6022      1079
       276.2     0.7223    0.5617    0.6319      1695
       285.1     0.7207    0.5876    0.6474      1370
       285.9     0.7778    0.5014    0.6097      1773
       287.5     0.6865    0.5884    0.6337      1176
       305.1     0.8424    0.4691    0.6026       889
         311     0.7983    0.5094    0.6219       746
      327.23     0.8427    0.6096    0.7074       835
       401.9     0.7110    0.7009    0.7059      4611
      403.90     0.7246    0.6066    0.6604      1093
      403.91     0.7484    0.5066    0.6042       681
      410.71     0.6365    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, outputs, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, outputs, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7835662561893544
RUC AUC Score (Macro) = 0.7562855212812982


In [None]:
dev_df['prediction'] = dev_out
dev_df['tar'] = dev_tar

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar
0,182054,title,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ..."
1,188200,title,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
2,194698,clinician attending,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,149035,title image002 gif,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ..."
...,...,...,...,...,...
12869,117599,chief complaint septic shock hpi yo m w st mal...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.012328103, 0.0025165812, 0.011107826, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ..."
12870,150186,admission date discharge date date of birth se...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0035550706, 0.022705248, 0.000101734935, 3....","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
12871,190940,chief complaint tachypnea hypoxia hpi the pati...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.008461342, 0.004148343, 0.0069068624, 0.000...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
12872,168029,chief complaint s p fall and point hct drop hp...,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0117470585, 0.02852733, 0.0011915335, 0.017...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ..."


### Avg outputs

In [None]:
out_mean_dict = dev_df.groupby('id').prediction.apply(np.mean).to_dict()
dev_df['out_mean'] = dev_df['id'].map(out_mean_dict)
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean
0,182054,title,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","[0.02840265, 0.8170211, 0.011177352, 0.0099834..."
1,188200,title,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.027963744, 0.10101249, 0.0676346, 0.0386931..."
2,194698,clinician attending,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.025536604, 0.029549748, 0.021597313, 0.0034..."
3,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.006541031, 0.0021932223, 0.00036609953, 0.0..."
4,149035,title image002 gif,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[0.01408495, 0.01960959, 0.11027789, 0.0166721..."
...,...,...,...,...,...,...
12869,117599,chief complaint septic shock hpi yo m w st mal...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.012328103, 0.0025165812, 0.011107826, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.12551169, 0.095337026, 0.032988783, 0.00443..."
12870,150186,admission date discharge date date of birth se...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0035550706, 0.022705248, 0.000101734935, 3....","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413..."
12871,190940,chief complaint tachypnea hypoxia hpi the pati...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.008461342, 0.004148343, 0.0069068624, 0.000...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.07133718, 0.008270531, 0.0045067696, 0.0003..."
12872,168029,chief complaint s p fall and point hct drop hp...,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0117470585, 0.02852733, 0.0011915335, 0.017...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","[0.0075687943, 0.02179546, 0.01719817, 0.03478..."


In [None]:
loss_mean = [nn.BCELoss()(torch.tensor(dev_df['out_mean'][i]), torch.tensor(dev_df['tar'][i])) for i in dev_df.index]
np.mean(loss_mean)

0.1455823

In [None]:
out_mean = np.vstack([dev_df['out_mean'][i]>=0.5 for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_mean)
f1_score_micro = metrics.f1_score(targets, out_mean, average='micro')
f1_score_macro = metrics.f1_score(targets, out_mean, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.39016622650302935
F1 Score (Micro) = 0.7550193034633396
F1 Score (Macro) = 0.7013807282544029


In [None]:
print(classification_report(targets, out_mean, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.9492    0.6106    0.7432      1559
       244.9     0.9297    0.5984    0.7281      1215
      250.00     0.7589    0.6684    0.7107      2373
       272.0     0.8617    0.4079    0.5537      1069
       272.4     0.7846    0.7690    0.7767      2004
       276.0     0.9276    0.6378    0.7559      1306
       276.1     0.9382    0.5209    0.6698      1079
       276.2     0.9165    0.6218    0.7409      1695
       285.1     0.8807    0.6679    0.7597      1370
       285.9     0.9274    0.5691    0.7053      1773
       287.5     0.9203    0.6675    0.7738      1176
       305.1     0.9290    0.5006    0.6506       889
         311     0.9347    0.5952    0.7273       746
      327.23     0.9725    0.7210    0.8281       835
       401.9     0.8264    0.7849    0.8051      4611
      403.90     0.9035    0.7109    0.7957      1093
      403.91     0.9175    0.5228    0.6660       681
      410.71     0.8438    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_mean, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_mean, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.825463753865351
RUC AUC Score (Macro) = 0.7931480284713704


### Most freq 5 labels

In [None]:
out_sum_dict = dev_df.groupby('id').prediction.apply(np.sum).to_dict()
dev_df['out_sum'] = dev_df['id'].map(out_sum_dict)
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum
0,182054,title,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","[0.02840265, 0.8170211, 0.011177352, 0.0099834...","[0.2840265, 8.170211, 0.11177352, 0.09983445, ..."
1,188200,title,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.027963744, 0.10101249, 0.0676346, 0.0386931...","[0.22370996, 0.8080999, 0.5410768, 0.30954507,..."
2,194698,clinician attending,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.025536604, 0.029549748, 0.021597313, 0.0034...","[0.28090265, 0.32504722, 0.23757045, 0.0375854..."
3,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.006541031, 0.0021932223, 0.00036609953, 0.0...","[0.032705154, 0.010966111, 0.0018304976, 0.001..."
4,149035,title image002 gif,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[0.01408495, 0.01960959, 0.11027789, 0.0166721...","[0.04225485, 0.058828767, 0.33083367, 0.050016..."
...,...,...,...,...,...,...,...
12869,117599,chief complaint septic shock hpi yo m w st mal...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.012328103, 0.0025165812, 0.011107826, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.12551169, 0.095337026, 0.032988783, 0.00443...","[1.0040935, 0.7626962, 0.26391026, 0.035511784..."
12870,150186,admission date discharge date date of birth se...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0035550706, 0.022705248, 0.000101734935, 3....","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413...","[0.016167216, 0.6494984, 0.001086548, 0.026827..."
12871,190940,chief complaint tachypnea hypoxia hpi the pati...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.008461342, 0.004148343, 0.0069068624, 0.000...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.07133718, 0.008270531, 0.0045067696, 0.0003...","[0.21401154, 0.024811594, 0.013520309, 0.00109..."
12872,168029,chief complaint s p fall and point hct drop hp...,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0117470585, 0.02852733, 0.0011915335, 0.017...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","[0.0075687943, 0.02179546, 0.01719817, 0.03478...","[0.030275177, 0.08718184, 0.06879268, 0.139158..."


In [None]:
def freq_5(df, column): # column: out_sum
    df['freq_5'] = df[column]
    for idx in df.index:
      sorted = np.sort(df[column][idx])
      thres = sorted[-5] # position 5
      df['freq_5'][idx] = df[column][idx]>= thres


In [None]:
freq_5(dev_df, 'out_sum')

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5
0,182054,title,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","[0.02840265, 0.8170211, 0.011177352, 0.0099834...","[0.2840265, 8.170211, 0.11177352, 0.09983445, ...","[False, False, False, False, False, True, Fals..."
1,188200,title,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.027963744, 0.10101249, 0.0676346, 0.0386931...","[0.22370996, 0.8080999, 0.5410768, 0.30954507,...","[False, False, False, False, False, False, Fal..."
2,194698,clinician attending,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.025536604, 0.029549748, 0.021597313, 0.0034...","[0.28090265, 0.32504722, 0.23757045, 0.0375854...","[False, False, False, False, True, False, Fals..."
3,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.006541031, 0.0021932223, 0.00036609953, 0.0...","[0.032705154, 0.010966111, 0.0018304976, 0.001...","[False, False, False, False, False, False, Fal..."
4,149035,title image002 gif,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[0.01408495, 0.01960959, 0.11027789, 0.0166721...","[0.04225485, 0.058828767, 0.33083367, 0.050016...","[False, False, False, False, True, False, Fals..."
...,...,...,...,...,...,...,...,...
12869,117599,chief complaint septic shock hpi yo m w st mal...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.012328103, 0.0025165812, 0.011107826, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.12551169, 0.095337026, 0.032988783, 0.00443...","[1.0040935, 0.7626962, 0.26391026, 0.035511784...","[False, False, False, False, False, False, Fal..."
12870,150186,admission date discharge date date of birth se...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0035550706, 0.022705248, 0.000101734935, 3....","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413...","[0.016167216, 0.6494984, 0.001086548, 0.026827...","[False, True, False, False, False, True, False..."
12871,190940,chief complaint tachypnea hypoxia hpi the pati...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.008461342, 0.004148343, 0.0069068624, 0.000...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.07133718, 0.008270531, 0.0045067696, 0.0003...","[0.21401154, 0.024811594, 0.013520309, 0.00109...","[False, False, False, False, False, False, Fal..."
12872,168029,chief complaint s p fall and point hct drop hp...,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0117470585, 0.02852733, 0.0011915335, 0.017...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","[0.0075687943, 0.02179546, 0.01719817, 0.03478...","[0.030275177, 0.08718184, 0.06879268, 0.139158...","[False, False, False, False, True, False, Fals..."


In [None]:
out_freq_5 = np.vstack([dev_df['freq_5'][i] for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq_5)
f1_score_micro = metrics.f1_score(targets, out_freq_5, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq_5, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.05111076588472891
F1 Score (Micro) = 0.6223212501202893
F1 Score (Macro) = 0.5546045873902407


In [None]:
print(classification_report(targets, out_freq_5, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.7931    0.4426    0.5681      1559
       244.9     0.6991    0.4724    0.5639      1215
      250.00     0.4977    0.6481    0.5631      2373
       272.0     0.4794    0.3592    0.4107      1069
       272.4     0.5816    0.7769    0.6652      2004
       276.0     0.7181    0.4311    0.5388      1306
       276.1     0.6543    0.3753    0.4770      1079
       276.2     0.6171    0.5021    0.5537      1695
       285.1     0.5887    0.6007    0.5947      1370
       285.9     0.7056    0.4055    0.5150      1773
       287.5     0.7140    0.5264    0.6060      1176
       305.1     0.7038    0.4409    0.5422       889
         311     0.6867    0.3673    0.4786       746
      327.23     0.8436    0.4910    0.6207       835
       401.9     0.6476    0.8573    0.7378      4611
      403.90     0.7576    0.4346    0.5523      1093
      403.91     0.7668    0.5022    0.6069       681
      410.71     0.5863    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq_5, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq_5, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7776987106781915
RUC AUC Score (Macro) = 0.742212619277367


In [None]:
precision_micro = metrics.average_precision_score(targets, out_freq_5, average='micro')
precision_macro = metrics.average_precision_score(targets, out_freq_5, average='macro')

print(f"Average Precision Score (Micro) = {precision_micro}")
print(f"Average Precision Score (Macro) = {precision_macro}")

Average Precision Score (Micro) = 0.43270584657382694
Average Precision Score (Macro) = 0.39210169679023116


### Predicted percentage

In [None]:
note_count_dict = dev_df.groupby('id').size().to_dict()
dev_df['note_count'] = dev_df['id'].map(note_count_dict)

In [None]:
dev_df['out_bool'] = [(dev_df['prediction'][i]>=0.5).astype(int) for i in dev_df.index]

In [None]:

out_freq_dict = dev_df.groupby('id').out_bool.apply(np.sum).to_dict()
dev_df['num_pred'] = dev_df['id'].map(out_freq_dict)
dev_df['num_pred'] = [(dev_df['num_pred'][i]>=0.4*dev_df['note_count'][i]).astype(int) for i in dev_df.index]

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred
0,182054,title,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","[0.02840265, 0.8170211, 0.011177352, 0.0099834...","[0.2840265, 8.170211, 0.11177352, 0.09983445, ...","[False, False, False, False, False, True, Fals...",10,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
1,188200,title,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.027963744, 0.10101249, 0.0676346, 0.0386931...","[0.22370996, 0.8080999, 0.5410768, 0.30954507,...","[False, False, False, False, False, False, Fal...",8,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ..."
2,194698,clinician attending,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.025536604, 0.029549748, 0.021597313, 0.0034...","[0.28090265, 0.32504722, 0.23757045, 0.0375854...","[False, False, False, False, True, False, Fals...",11,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
3,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.006541031, 0.0021932223, 0.00036609953, 0.0...","[0.032705154, 0.010966111, 0.0018304976, 0.001...","[False, False, False, False, False, False, Fal...",5,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,149035,title image002 gif,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[0.01408495, 0.01960959, 0.11027789, 0.0166721...","[0.04225485, 0.058828767, 0.33083367, 0.050016...","[False, False, False, False, True, False, Fals...",3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
...,...,...,...,...,...,...,...,...,...,...,...
12869,117599,chief complaint septic shock hpi yo m w st mal...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.012328103, 0.0025165812, 0.011107826, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.12551169, 0.095337026, 0.032988783, 0.00443...","[1.0040935, 0.7626962, 0.26391026, 0.035511784...","[False, False, False, False, False, False, Fal...",8,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."
12870,150186,admission date discharge date date of birth se...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0035550706, 0.022705248, 0.000101734935, 3....","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413...","[0.016167216, 0.6494984, 0.001086548, 0.026827...","[False, True, False, False, False, True, False...",2,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
12871,190940,chief complaint tachypnea hypoxia hpi the pati...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.008461342, 0.004148343, 0.0069068624, 0.000...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.07133718, 0.008270531, 0.0045067696, 0.0003...","[0.21401154, 0.024811594, 0.013520309, 0.00109...","[False, False, False, False, False, False, Fal...",3,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ..."
12872,168029,chief complaint s p fall and point hct drop hp...,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0117470585, 0.02852733, 0.0011915335, 0.017...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","[0.0075687943, 0.02179546, 0.01719817, 0.03478...","[0.030275177, 0.08718184, 0.06879268, 0.139158...","[False, False, False, False, True, False, Fals...",4,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ..."


In [None]:
out_freq = np.vstack([dev_df['num_pred'][i] for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq)
f1_score_micro = metrics.f1_score(targets, out_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.3884573559111387
F1 Score (Micro) = 0.7555981088523227
F1 Score (Macro) = 0.716626217417402


In [None]:
print(classification_report(targets, out_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.8818    0.6844    0.7707      1559
       244.9     0.8549    0.6642    0.7476      1215
      250.00     0.6530    0.7320    0.6902      2373
       272.0     0.7549    0.4640    0.5747      1069
       272.4     0.6642    0.8034    0.7272      2004
       276.0     0.8606    0.7090    0.7775      1306
       276.1     0.8880    0.5876    0.7072      1079
       276.2     0.8311    0.6879    0.7527      1695
       285.1     0.7936    0.7212    0.7556      1370
       285.9     0.8567    0.6340    0.7287      1773
       287.5     0.8179    0.7219    0.7669      1176
       305.1     0.8998    0.5759    0.7023       889
         311     0.8707    0.6408    0.7382       746
      327.23     0.9314    0.7641    0.8395       835
       401.9     0.7404    0.8380    0.7862      4611
      403.90     0.8227    0.7557    0.7878      1093
      403.91     0.8443    0.6211    0.7157       681
      410.71     0.7162    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.8484387511369199
RUC AUC Score (Macro) = 0.8182555925341727


In [None]:
precision_micro = metrics.average_precision_score(targets, out_freq, average='micro')
precision_macro = metrics.average_precision_score(targets, out_freq, average='macro')

print(f"Average Precision Score (Micro) = {precision_micro}")
print(f"Average Precision Score (Macro) = {precision_macro}")

Average Precision Score (Micro) = 0.6031368370812864
Average Precision Score (Macro) = 0.5711892284893095


### Exponential moving average

In [None]:
def ewma(sub_df, window=3):
#    print(sub_df)
    alpha = 2 / (window + 1)
#    print(sub_df['ewma'])
    sub_df['ewma'] = sub_df['out_bool']
    for r in range(len(sub_df)):
        if r == 0:
            sub_df['ewma'].iloc[r] = sub_df['prediction'].iloc[r]
        else:
            sub_df['ewma'].iloc[r] = alpha*sub_df['prediction'].iloc[r] + (1-alpha)*sub_df['prediction'].iloc[r-1]
 #   print(type(sub_df['ewma']))
    return sub_df['ewma']

In [None]:

out_ewma_dict = dev_df.groupby('id', group_keys=False).apply(ewma).to_dict()
dev_df['out_ewma'] = pd.Series(dev_df.index, index=dev_df.index).map(out_ewma_dict)



In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred,out_ewma
0,182054,title,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","[0.02840265, 0.8170211, 0.011177352, 0.0099834...","[0.2840265, 8.170211, 0.11177352, 0.09983445, ...","[False, False, False, False, False, True, Fals...",10,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337..."
1,188200,title,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.027963744, 0.10101249, 0.0676346, 0.0386931...","[0.22370996, 0.8080999, 0.5410768, 0.30954507,...","[False, False, False, False, False, False, Fal...",8,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337..."
2,194698,clinician attending,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.025536604, 0.029549748, 0.021597313, 0.0034...","[0.28090265, 0.32504722, 0.23757045, 0.0375854...","[False, False, False, False, True, False, Fals...",11,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292..."
3,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.006541031, 0.0021932223, 0.00036609953, 0.0...","[0.032705154, 0.010966111, 0.0018304976, 0.001...","[False, False, False, False, False, False, Fal...",5,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00..."
4,149035,title image002 gif,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[0.01408495, 0.01960959, 0.11027789, 0.0166721...","[0.04225485, 0.058828767, 0.33083367, 0.050016...","[False, False, False, False, True, False, Fals...",3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159..."
...,...,...,...,...,...,...,...,...,...,...,...,...
12869,117599,chief complaint septic shock hpi yo m w st mal...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.012328103, 0.0025165812, 0.011107826, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.12551169, 0.095337026, 0.032988783, 0.00443...","[1.0040935, 0.7626962, 0.26391026, 0.035511784...","[False, False, False, False, False, False, Fal...",8,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.006912789, 0.0028108452, 0.091313206, 0.003..."
12870,150186,admission date discharge date date of birth se...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0035550706, 0.022705248, 0.000101734935, 3....","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413...","[0.016167216, 0.6494984, 0.001086548, 0.026827...","[False, True, False, False, False, True, False...",2,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413..."
12871,190940,chief complaint tachypnea hypoxia hpi the pati...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.008461342, 0.004148343, 0.0069068624, 0.000...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.07133718, 0.008270531, 0.0045067696, 0.0003...","[0.21401154, 0.024811594, 0.013520309, 0.00109...","[False, False, False, False, False, False, Fal...",3,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.03044403, 0.009985008, 0.0051079635, 0.0003..."
12872,168029,chief complaint s p fall and point hct drop hp...,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0117470585, 0.02852733, 0.0011915335, 0.017...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","[0.0075687943, 0.02179546, 0.01719817, 0.03478...","[0.030275177, 0.08718184, 0.06879268, 0.139158...","[False, False, False, False, True, False, Fals...",4,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0066120722, 0.017200183, 0.02455203, 0.0408..."


In [None]:
out_ewma = np.vstack([dev_df['out_ewma'][i]>0.5 for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_ewma)
f1_score_micro = metrics.f1_score(targets, out_ewma, average='micro')
f1_score_macro = metrics.f1_score(targets, out_ewma, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.2934596861892186
F1 Score (Micro) = 0.6778684278684278
F1 Score (Macro) = 0.6275370592911146


In [None]:
print(classification_report(targets, out_ewma, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.8519    0.5497    0.6682      1559
       244.9     0.8399    0.5267    0.6474      1215
      250.00     0.6571    0.5967    0.6254      2373
       272.0     0.7355    0.3564    0.4802      1069
       272.4     0.6731    0.6831    0.6781      2004
       276.0     0.8431    0.5597    0.6728      1306
       276.1     0.8798    0.4680    0.6110      1079
       276.2     0.8043    0.5528    0.6552      1695
       285.1     0.8064    0.5927    0.6832      1370
       285.9     0.8577    0.5065    0.6369      1773
       287.5     0.7984    0.5893    0.6781      1176
       305.1     0.8938    0.4353    0.5855       889
         311     0.8767    0.5054    0.6412       746
      327.23     0.9162    0.6024    0.7269       835
       401.9     0.7486    0.7116    0.7296      4611
      403.90     0.8106    0.6029    0.6915      1093
      403.91     0.8226    0.4699    0.5981       681
      410.71     0.7173    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_ewma, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_ewma, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7865051309092173
RUC AUC Score (Macro) = 0.7553030330468673


In [None]:
precision_micro = metrics.average_precision_score(targets, out_ewma, average='micro')
precision_macro = metrics.average_precision_score(targets, out_ewma, average='macro')

print(f"Average Precision Score (Micro) = {precision_micro}")
print(f"Average Precision Score (Macro) = {precision_macro}")

Average Precision Score (Micro) = 0.5141572875751793
Average Precision Score (Macro) = 0.47640283921549764


### Most frequent prediction

In [None]:

most_freq_dict = dev_df.groupby('id')['out_bool'].apply(lambda x: x.value_counts().index[0]).to_dict()
dev_df['most_freq'] = dev_df['id'].map(most_freq_dict)

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred,out_ewma,most_freq
0,182054,title,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","[0.02840265, 0.8170211, 0.011177352, 0.0099834...","[0.2840265, 8.170211, 0.11177352, 0.09983445, ...","[False, False, False, False, False, True, Fals...",10,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
1,188200,title,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.027963744, 0.10101249, 0.0676346, 0.0386931...","[0.22370996, 0.8080999, 0.5410768, 0.30954507,...","[False, False, False, False, False, False, Fal...",8,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0.08706212, 0.29305393, 0.04824462, 0.0873337...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ..."
2,194698,clinician attending,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.025536604, 0.029549748, 0.021597313, 0.0034...","[0.28090265, 0.32504722, 0.23757045, 0.0375854...","[False, False, False, False, True, False, Fals...",11,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.2794689, 0.19813865, 0.19990268, 0.02825292...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
3,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.006541031, 0.0021932223, 0.00036609953, 0.0...","[0.032705154, 0.010966111, 0.0018304976, 0.001...","[False, False, False, False, False, False, Fal...",5,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.01163709, 0.0028088212, 0.00066481216, 0.00...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,149035,title image002 gif,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[0.01408495, 0.01960959, 0.11027789, 0.0166721...","[0.04225485, 0.058828767, 0.33083367, 0.050016...","[False, False, False, False, True, False, Fals...",3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.025671, 0.049724683, 0.038684838, 0.0492159...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12869,117599,chief complaint septic shock hpi yo m w st mal...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.012328103, 0.0025165812, 0.011107826, 0.003...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.12551169, 0.095337026, 0.032988783, 0.00443...","[1.0040935, 0.7626962, 0.26391026, 0.035511784...","[False, False, False, False, False, False, Fal...",8,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.006912789, 0.0028108452, 0.091313206, 0.003...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."
12870,150186,admission date discharge date date of birth se...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0035550706, 0.022705248, 0.000101734935, 3....","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413...","[0.016167216, 0.6494984, 0.001086548, 0.026827...","[False, True, False, False, False, True, False...",2,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.008083608, 0.3247492, 0.000543274, 0.013413...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
12871,190940,chief complaint tachypnea hypoxia hpi the pati...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.008461342, 0.004148343, 0.0069068624, 0.000...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.07133718, 0.008270531, 0.0045067696, 0.0003...","[0.21401154, 0.024811594, 0.013520309, 0.00109...","[False, False, False, False, False, False, Fal...",3,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.03044403, 0.009985008, 0.0051079635, 0.0003...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ..."
12872,168029,chief complaint s p fall and point hct drop hp...,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0117470585, 0.02852733, 0.0011915335, 0.017...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","[0.0075687943, 0.02179546, 0.01719817, 0.03478...","[0.030275177, 0.08718184, 0.06879268, 0.139158...","[False, False, False, False, True, False, Fals...",4,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ...","[0.0066120722, 0.017200183, 0.02455203, 0.0408...","[0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, ..."


In [None]:
out_most_freq = np.vstack([dev_df['most_freq'][i] for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_most_freq)
f1_score_micro = metrics.f1_score(targets, out_most_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_most_freq, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.32049091191548856
F1 Score (Micro) = 0.6781921079153422
F1 Score (Macro) = 0.6382888095446764


In [None]:
print(classification_report(targets, out_most_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.8335    0.5843    0.6870      1559
       244.9     0.7620    0.5613    0.6464      1215
      250.00     0.6353    0.6380    0.6367      2373
       272.0     0.6573    0.3714    0.4746      1069
       272.4     0.6169    0.6861    0.6497      2004
       276.0     0.7844    0.5766    0.6646      1306
       276.1     0.8388    0.5014    0.6276      1079
       276.2     0.7438    0.6183    0.6753      1695
       285.1     0.7520    0.6131    0.6755      1370
       285.9     0.8254    0.5014    0.6239      1773
       287.5     0.6995    0.6573    0.6778      1176
       305.1     0.8172    0.4927    0.6147       889
         311     0.7662    0.4920    0.5992       746
      327.23     0.8328    0.6323    0.7189       835
       401.9     0.7266    0.7103    0.7184      4611
      403.90     0.7747    0.6231    0.6907      1093
      403.91     0.7937    0.5198    0.6282       681
      410.71     0.6450    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_most_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_most_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7965648896440649
RUC AUC Score (Macro) = 0.7700780638254362


In [None]:
precision_micro = metrics.average_precision_score(targets, out_most_freq, average='micro')
precision_macro = metrics.average_precision_score(targets, out_most_freq, average='macro')

print(f"Average Precision Score (Micro) = {precision_micro}")
print(f"Average Precision Score (Macro) = {precision_macro}")

Average Precision Score (Micro) = 0.5061865340810336
Average Precision Score (Macro) = 0.47669828707714834


In [None]:
def precision_at_5(df, column): # column: prediction
    df['p@5'] = 0
    for idx in df.index:
        idx_sorted = np.argsort(df[column][idx])
        idcs = idx_sorted[-3:]
        perc = (3 - sum(df['tar'][idx][idcs])) / 3
        df['p@5'][idx] = perc

### Testing

In [32]:
# Evaluate the model

# Evaluate the model

def testing():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [33]:
test_out, targets, losses = testing()
outputs = np.array(test_out) >= 0.5
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Loss:  0.20
F1 Score (Micro) = 0.6514807094820467
F1 Score (Macro) = 0.605931181537368


In [34]:
print(classification_report(targets, outputs, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.7672    0.5587    0.6466      1516
       244.9     0.7313    0.5230    0.6098      1176
      250.00     0.5920    0.5792    0.5856      2360
       272.0     0.6841    0.3753    0.4847      1079
       272.4     0.6074    0.6734    0.6387      2070
       276.0     0.7773    0.5715    0.6587      1307
       276.1     0.7974    0.4591    0.5828      1089
       276.2     0.7340    0.5495    0.6285      1607
       285.1     0.7176    0.5865    0.6455      1352
       285.9     0.7627    0.4805    0.5896      1692
       287.5     0.6851    0.5647    0.6191      1229
       305.1     0.8242    0.4441    0.5772       876
         311     0.8026    0.4766    0.5980       768
      327.23     0.8295    0.6133    0.7052       825
       401.9     0.7163    0.6958    0.7059      4626
      403.90     0.7198    0.5844    0.6451      1090
      403.91     0.7535    0.4888    0.5929       669
      410.71     0.6733    

In [35]:

ruc_auc_score_micro = metrics.roc_auc_score(targets, outputs, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, outputs, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7806303676118947
RUC AUC Score (Macro) = 0.7509955382766232


In [36]:

test_df['prediction'] = test_out
test_df['tar'] = targets

In [37]:
note_count_dict = test_df.groupby('id').size().to_dict()
test_df['note_count'] = test_df['id'].map(note_count_dict)

In [38]:
test_df['out_bool'] = [(test_df['prediction'][i]>=0.5).astype(int) for i in test_df.index]

In [39]:

out_freq_dict = test_df.groupby('id').out_bool.apply(np.sum).to_dict()
test_df['num_pred'] = test_df['id'].map(out_freq_dict)
test_df['num_pred'] = [(test_df['num_pred'][i]>=0.4*test_df['note_count'][i]).astype(int) for i in test_df.index]

In [40]:
df_freq = test_df.drop_duplicates('id')

In [41]:
out_freq = np.vstack([df_freq['num_pred'][i] for i in df_freq.index])
targets = np.vstack([df_freq['tar'][i] for i in df_freq.index])
#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq)
f1_score_micro = metrics.f1_score(targets, out_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.22522969952818475
F1 Score (Micro) = 0.6241882496752998
F1 Score (Macro) = 0.5734793566579839


In [42]:
print(classification_report(targets, out_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.7213    0.5254    0.6079       394
       244.9     0.6870    0.4681    0.5568       361
      250.00     0.5157    0.5922    0.5513       721
       272.0     0.6493    0.3703    0.4716       370
       272.4     0.5469    0.6809    0.6066       677
       276.0     0.6825    0.5165    0.5880       333
       276.1     0.7189    0.3844    0.5009       346
       276.2     0.6588    0.5068    0.5729       438
       285.1     0.6542    0.5622    0.6047       434
       285.9     0.7232    0.4122    0.5251       507
       287.5     0.6022    0.4925    0.5419       335
       305.1     0.8369    0.4111    0.5514       287
         311     0.7214    0.3992    0.5140       253
      327.23     0.8000    0.5421    0.6462       214
       401.9     0.6779    0.7575    0.7155      1567
      403.90     0.6255    0.5473    0.5838       296
      403.91     0.7153    0.4317    0.5385       227
      410.71     0.6113    

In [43]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7751293387553989
RUC AUC Score (Macro) = 0.7401569440848231
