In [1]:
!pip install -q transformers

[K     |████████████████████████████████| 2.6 MB 15.6 MB/s 
[K     |████████████████████████████████| 3.3 MB 53.9 MB/s 
[K     |████████████████████████████████| 895 kB 51.6 MB/s 
[K     |████████████████████████████████| 636 kB 62.5 MB/s 
[?25h

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%matplotlib inline
import os
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
from sklearn.metrics import classification_report
import re
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer
from transformers import AutoTokenizer, AutoModel
from transformers import BertForSequenceClassification, AdamW, BertConfig
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.data.sampler import SubsetRandomSampler
import transformers
from transformers import RobertaTokenizer, BertTokenizer, RobertaModel, BertModel, AdamW# get_linear_schedule_with_warmup
from transformers import get_linear_schedule_with_warmup
import time

!cp drive/MyDrive/Colab\ Notebooks/MSc-Individual-Project/utils.py .
from utils import *
!cp drive/MyDrive/Colab\ Notebooks/MSc-Individual-Project/Custom_Dataset_Class.py .
from Custom_Dataset_Class import CustomDataset
!cp drive/MyDrive/Colab\ Notebooks/MSc-Individual-Project/pytorchtools.py .
from pytorchtools import EarlyStopping
#from Bert_Classification import Bert_Classification_Model
#from RoBERT import RoBERT_Model

#from BERT_Hierarchical import BERT_Hierarchical_Model
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelBinarizer

In [4]:
import torch
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")


There are 1 GPU(s) available.
We will use the GPU: Tesla V100-SXM2-16GB


In [5]:
np.random.seed(123)
torch.manual_seed(123)
torch.cuda.manual_seed_all(123)

In [6]:
#change to where you store mimic3 data
MIMIC_3_DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/datasets_date'

train_df = pd.read_csv('%s/train_50_first.csv' % MIMIC_3_DIR)
eval_df = pd.read_csv('%s/dev_50_first.csv' % MIMIC_3_DIR)
test_df = pd.read_csv('%s/test_50_first.csv' % MIMIC_3_DIR)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,72671,188200,title,311;276.2;518.81;486;427.31,1
1,10502,145440,title,414.01;412;403.90;496;530.81;518.81;486;584.9;...,1
2,58515,125506,full code,414.01;038.9;585.9;403.90;276.2;995.92;518.81;...,2
3,12801,182897,sinus rhythm,401.9,2
4,15807,173079,atrial fibrillation,427.31;410.71,2


In [7]:
full_df = pd.concat([train_df, eval_df, test_df], ignore_index=True)

In [8]:
# split labels by ";", then convert to list
def split_lab (x):
    #print(x)
    return x.split(";")

full_df['LABELS'] = full_df['LABELS'].apply(split_lab)
#full_df['TEXT'] = full_df['TEXT'].apply(split_lab)

full_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,72671,188200,title,"[311, 276.2, 518.81, 486, 427.31]",1
1,10502,145440,title,"[414.01, 412, 403.90, 496, 530.81, 518.81, 486...",1
2,58515,125506,full code,"[414.01, 038.9, 585.9, 403.90, 276.2, 995.92, ...",2
3,12801,182897,sinus rhythm,[401.9],2
4,15807,173079,atrial fibrillation,"[427.31, 410.71]",2


In [9]:
#load multi label binarizer for one-hot encoding
mlb = MultiLabelBinarizer(sparse_output=True)

#labels_onehot = mlb.fit_transform(train_df.pop('LABELS'))
#labels_onehot[0][1]

In [10]:
#change label to one-hot encoding per code
full_df = full_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(full_df.pop('LABELS')),
                columns=mlb.classes_))

full_df

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,length,038.9,244.9,250.00,272.0,272.4,276.0,276.1,276.2,285.1,285.9,287.5,305.1,311,327.23,401.9,403.90,403.91,410.71,412,414.01,424.0,424.1,427.31,427.89,428.0,486,493.90,496,507.0,511.9,518.0,518.81,530.81,584.5,584.9,585.9,599.0,774.2,785.52,995.92,997.1,V05.3,V15.82,V29.0,V30.00,V30.01,V45.81,V45.82,V58.61,V58.67
0,72671,188200,title,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,10502,145440,title,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,58515,125506,full code,2,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0
3,12801,182897,sinus rhythm,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,15807,173079,atrial fibrillation,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19253,99339,142289,chief complaint chief complaint increasing ple...,1741,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
19254,95889,113433,title chief complaint s p cardiopulmonary arre...,1756,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
19255,95889,113433,title chief complaint s p cardiopulmonary arre...,1760,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
19256,90165,144543,chief complaint malaise hpi 47m with a history...,1789,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
# Convert columns to list of one hot encoding
icd_classes_50 = mlb.classes_

full_df['labels'] = full_df[icd_classes_50].values.tolist()
#train_df.sort_values(['length'], ascending=False, inplace=True)
full_df.head()


Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,length,038.9,244.9,250.00,272.0,272.4,276.0,276.1,276.2,285.1,285.9,287.5,305.1,311,327.23,401.9,403.90,403.91,410.71,412,414.01,424.0,424.1,427.31,427.89,428.0,486,493.90,496,507.0,511.9,518.0,518.81,530.81,584.5,584.9,585.9,599.0,774.2,785.52,995.92,997.1,V05.3,V15.82,V29.0,V30.00,V30.01,V45.81,V45.82,V58.61,V58.67,labels
0,72671,188200,title,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ..."
1,10502,145440,title,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,58515,125506,full code,2,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
3,12801,182897,sinus rhythm,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
4,15807,173079,atrial fibrillation,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [12]:
full_df.HADM_ID.unique().shape

(4857,)

In [13]:
train_df, test_df = train_test_split(full_df, test_size=0.2)

In [14]:
train_df, eval_df = train_test_split(train_df, test_size=0.2)

In [15]:
dev_df, test_df = train_test_split(test_df, test_size=0.5)

In [16]:
train_df.sort_values(['length'], inplace=True)
eval_df.sort_values(['length'], inplace=True)
dev_df.sort_values(['length'], inplace=True)
test_df.sort_values(['length'], inplace=True)


In [17]:
#convert into 2 columns dataframe
train_df = pd.DataFrame(train_df, columns=['TEXT', 'labels'])
train_df.columns=['text', 'labels']
train_df.head()

eval_df = pd.DataFrame(eval_df, columns=['TEXT', 'labels'])
eval_df.columns=['text', 'labels']
eval_df.head()

dev_df = pd.DataFrame(dev_df, columns=['HADM_ID', 'TEXT', 'labels'])
dev_df.columns=['id', 'text', 'labels']
dev_df.head()

test_df = pd.DataFrame(test_df, columns=['HADM_ID', 'TEXT', 'labels'])
test_df.columns=['id', 'text', 'labels']
test_df.head()

Unnamed: 0,id,text,labels
9,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
20,148761,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
51,104512,addendum hc is cm length is cm,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
67,142209,sinus tachycardia st junctional depression is ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
63,169259,sinus rhythm inferior lateral st t changes,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [18]:
train_df.reset_index(drop=True, inplace=True)
eval_df.reset_index(drop=True, inplace=True)
dev_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
test_df.head()

Unnamed: 0,id,text,labels
0,148371,wt 1510g,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,148761,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
2,104512,addendum hc is cm length is cm,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,142209,sinus tachycardia st junctional depression is ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,169259,sinus rhythm inferior lateral st t changes,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [19]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model.

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        '''
            Load Pretrained model here
            Use return_dict=False for compatibility for 4.x

        '''
        self.l1 = transformers.AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", return_dict=False)
        #self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)


        self.l2 = torch.nn.Dropout(0.3)

        '''
            Changed Linear Output layer to 50 based on the class
        '''
        self.l3 = torch.nn.Linear(768, 50)

    def forward(self, ids, mask, token_type_ids):
#        print("ids: ", ids.size(), "mask: ", mask.size(), "token type ids: ", token_type_ids.size())
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

model = BERTClass()
model.to(device)

Downloading:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [20]:
# Defining some key variables to configure model training
MAX_LEN = 512
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 8
TEST_BATCH_SIZE = 8
EPOCHS = 10
LEARNING_RATE = 3e-05

#set tokenizer
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

#custom dataset for BERT class
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        
        '''
            set text as training data
            set labels as targets
        '''
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }



Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [21]:
#load df to dataset

training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
valid_set = CustomDataset(eval_df, tokenizer, MAX_LEN)
dev_set = CustomDataset(dev_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

In [22]:
#data loader
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': False
                }

val_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False
                }

dev_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

training_loader = DataLoader(training_set, **train_params)
valid_loader = DataLoader(valid_set, **val_params)
dev_loader = DataLoader(dev_set, **dev_params)
testing_loader = DataLoader(testing_set, **test_params)

In [23]:
#loss function
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

#optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [24]:
def train(epoch):
    model.train()
    for _,data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        
    print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')

In [25]:
# Evaluate the model

def validation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(valid_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    print(f'Epoch: {epoch}, Validation Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
start_epoch=0
DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'
resume = True     
if resume:
    if os.path.isfile(f"%s/models/model_firstday_epoch{start_epoch}.pth" % DIR):
        print("Resume from checkpoint...")
        checkpoint = torch.load(f"%s/models/model_firstday_epoch{start_epoch}.pth" % DIR)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        initepoch = checkpoint['epoch']
        print("====>loaded checkpoint (epoch{})".format(checkpoint['epoch']))
    else:
        print("====>no checkpoint found.")
        initepoch = 0

#patience = 3
#early_stopping = EarlyStopping(patience, verbose=True)


for epoch in tqdm(range(EPOCHS)):
    train(epoch)
    validation(epoch)

    if (epoch+start_epoch+1)%5 == 0:
        checkpoint = {"model_state_dict": model.state_dict(),
                      "optimizer_state_dict": optimizer.state_dict(),
                      "epoch": epoch+start_epoch+1}
        path_checkpoint = f"%s/models/model_firstday_epoch{epoch+start_epoch+1}.pth" % DIR
        torch.save(checkpoint, path_checkpoint)

#

====>no checkpoint found.


  0%|          | 0/10 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.2965371310710907


 10%|█         | 1/10 [06:53<1:01:59, 413.27s/it]

Epoch: 0, Validation Loss:  0.29
Epoch: 1, Training Loss:  0.23691722750663757


 20%|██        | 2/10 [13:46<55:03, 412.97s/it]  

Epoch: 1, Validation Loss:  0.28
Epoch: 2, Training Loss:  0.24114012718200684


 30%|███       | 3/10 [20:39<48:11, 413.10s/it]

Epoch: 2, Validation Loss:  0.28
Epoch: 3, Training Loss:  0.22386956214904785


 40%|████      | 4/10 [27:30<41:14, 412.39s/it]

Epoch: 3, Validation Loss:  0.28
Epoch: 4, Training Loss:  0.2236071228981018
Epoch: 4, Validation Loss:  0.28


 50%|█████     | 5/10 [34:27<34:30, 414.08s/it]

Epoch: 5, Training Loss:  0.20151127874851227


 60%|██████    | 6/10 [41:18<27:32, 413.10s/it]

Epoch: 5, Validation Loss:  0.28
Epoch: 6, Training Loss:  0.17354914546012878


 70%|███████   | 7/10 [48:10<20:38, 412.72s/it]

Epoch: 6, Validation Loss:  0.28
Epoch: 7, Training Loss:  0.16346170008182526


 80%|████████  | 8/10 [55:02<13:44, 412.49s/it]

Epoch: 7, Validation Loss:  0.28
Epoch: 8, Training Loss:  0.15545330941677094


 90%|█████████ | 9/10 [1:01:53<06:51, 411.98s/it]

Epoch: 8, Validation Loss:  0.28
Epoch: 9, Training Loss:  0.13488472998142242
Epoch: 9, Validation Loss:  0.28


100%|██████████| 10/10 [1:08:50<00:00, 413.07s/it]


In [26]:
DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'

checkpoint = torch.load(f"%s/models/model_firstday_epoch10.pth" % DIR)
model.load_state_dict(checkpoint['model_state_dict'])



<All keys matched successfully>

In [None]:
# Evaluate the model

def evaluation():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(dev_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
dev_out, dev_tar, losses = evaluation()

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Loss:  0.29


### Normal evaluation

In [None]:
outputs = np.array(dev_out) >= 0.5
targets = dev_tar
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.024402907580477674
F1 Score (Micro) = 0.33344338065368107
F1 Score (Macro) = 0.21581363574187162


In [None]:
print(classification_report(targets, outputs, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.4191    0.2740    0.3314       208
       244.9     0.4286    0.0355    0.0656       169
      250.00     0.1818    0.0184    0.0334       326
       272.0     0.0000    0.0000    0.0000       178
       272.4     0.3687    0.2466    0.2955       296
       276.0     0.3905    0.2228    0.2837       184
       276.1     0.7333    0.0556    0.1033       198
       276.2     0.3902    0.1270    0.1916       252
       285.1     0.5603    0.2826    0.3757       230
       285.9     0.3214    0.0378    0.0677       238
       287.5     0.2812    0.0600    0.0989       150
       305.1     0.6000    0.0176    0.0343       170
         311     0.0000    0.0000    0.0000       141
      327.23     1.0000    0.0331    0.0640       121
       401.9     0.5382    0.4689    0.5012       691
      403.90     0.3882    0.2012    0.2651       164
      403.91     0.7500    0.1748    0.2835       103
      410.71     0.3636    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, outputs, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, outputs, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6099432255953408
RUC AUC Score (Macro) = 0.5791104348051366


In [None]:
dev_df['prediction'] = dev_out
dev_df['tar'] = dev_tar

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar
0,125506,full code,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.028138086, 0.06421181, 0.17812476, 0.030674...","[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
1,101685,fellow note,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.040344145, 0.031939287, 0.17625813, 0.08802...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,148308,ventricular paced rhythm,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.066668995, 0.035746764, 0.1727332, 0.165264...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
3,171359,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.021901356, 0.014694277, 0.047177106, 0.0742...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,166415,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.021901356, 0.014694277, 0.047177106, 0.0742...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...,...
1921,181458,title chief complaint hypotension nausea hpi m...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...","[0.8666995, 0.27741352, 0.1201146, 0.029148852...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ..."
1922,153703,chief complaint a fib with rvr and hypotension...,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.84345543, 0.16997926, 0.26213166, 0.0281212...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
1923,127867,chief complaint expressive aphasia hpi history...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0.003747494, 0.040779486, 0.106747955, 0.0660...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1924,183600,chief complaint hypotension brbpr hpi yo f wit...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0.085624255, 0.059087086, 0.07579275, 0.01742...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ..."


### Avg outputs

In [None]:
out_mean_dict = dev_df.groupby('id').prediction.apply(np.mean).to_dict()
dev_df['out_mean'] = dev_df['id'].map(out_mean_dict)
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean
0,125506,full code,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.028138086, 0.06421181, 0.17812476, 0.030674...","[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.02669367, 0.13270368, 0.24077472, 0.0381074..."
1,101685,fellow note,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.040344145, 0.031939287, 0.17625813, 0.08802...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.023675542, 0.020369032, 0.09173657, 0.04929..."
2,148308,ventricular paced rhythm,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.066668995, 0.035746764, 0.1727332, 0.165264...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.066668995, 0.035746764, 0.1727332, 0.165264..."
3,171359,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.021901356, 0.014694277, 0.047177106, 0.0742...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.021901356, 0.014694277, 0.047177106, 0.0742..."
4,166415,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.021901356, 0.014694277, 0.047177106, 0.0742...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.100293815, 0.037792653, 0.12295477, 0.05390..."
...,...,...,...,...,...,...
1921,181458,title chief complaint hypotension nausea hpi m...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...","[0.8666995, 0.27741352, 0.1201146, 0.029148852...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.8666995, 0.27741352, 0.1201146, 0.029148852..."
1922,153703,chief complaint a fib with rvr and hypotension...,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.84345543, 0.16997926, 0.26213166, 0.0281212...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.6747156, 0.12843497, 0.2416979, 0.01945784,..."
1923,127867,chief complaint expressive aphasia hpi history...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0.003747494, 0.040779486, 0.106747955, 0.0660...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.003408874, 0.038831823, 0.092087805, 0.0598..."
1924,183600,chief complaint hypotension brbpr hpi yo f wit...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0.085624255, 0.059087086, 0.07579275, 0.01742...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[0.04563705, 0.059322618, 0.17068402, 0.086488..."


In [None]:
loss_mean = [nn.BCELoss()(torch.tensor(dev_df['out_mean'][i]), torch.tensor(dev_df['tar'][i])) for i in dev_df.index]
np.mean(loss_mean)

0.27652425

In [None]:
out_mean = np.vstack([dev_df['out_mean'][i]>=0.5 for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_mean)
f1_score_micro = metrics.f1_score(targets, out_mean, average='micro')
f1_score_macro = metrics.f1_score(targets, out_mean, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.02232606438213915
F1 Score (Micro) = 0.32684131120559595
F1 Score (Macro) = 0.20189372592734223


In [None]:
print(classification_report(targets, out_mean, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.4902    0.2404    0.3226       208
       244.9     0.3333    0.0118    0.0229       169
      250.00     0.1481    0.0123    0.0227       326
       272.0     0.0000    0.0000    0.0000       178
       272.4     0.4025    0.2162    0.2813       296
       276.0     0.4762    0.2174    0.2985       184
       276.1     0.8333    0.0505    0.0952       198
       276.2     0.3824    0.1032    0.1625       252
       285.1     0.5918    0.2522    0.3537       230
       285.9     0.2222    0.0168    0.0312       238
       287.5     0.3077    0.0533    0.0909       150
       305.1     0.0000    0.0000    0.0000       170
         311     0.0000    0.0000    0.0000       141
      327.23     0.0000    0.0000    0.0000       121
       401.9     0.5554    0.4645    0.5059       691
      403.90     0.4478    0.1829    0.2597       164
      403.91     0.7619    0.1553    0.2581       103
      410.71     0.3571    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_mean, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_mean, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6054030276000599
RUC AUC Score (Macro) = 0.5742378245962798


### Most freq 5 labels

In [None]:
out_sum_dict = dev_df.groupby('id').prediction.apply(np.sum).to_dict()
dev_df['out_sum'] = dev_df['id'].map(out_sum_dict)
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred,out_ewma,most_freq
0,125506,full code,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.02813817, 0.06421183, 0.17812477, 0.0306744...","[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.026693698, 0.13270357, 0.2407746, 0.0381074...","[0.053387396, 0.26540715, 0.4815492, 0.0762148...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.10012276, 0.08435567, 0.13384493, 0.0472759...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,101685,fellow note,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0403443, 0.031939324, 0.17625839, 0.0880279...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02367562, 0.020369051, 0.091736704, 0.04929...","[0.04735124, 0.040738102, 0.18347341, 0.098596...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.17042506, 0.124258764, 0.25400022, 0.071789...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,148308,ventricular paced rhythm,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.04019823, 0.071467124, 0.20583339, 0.078409...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,171359,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.03139344, 0.04348312, 0.08821601, 0.0978097...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,166415,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.10029378, 0.03779263, 0.12295471, 0.0539075...","[0.20058756, 0.07558526, 0.24590942, 0.1078150...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.03139344, 0.04348312, 0.08821601, 0.0978097...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1921,181458,title chief complaint hypotension nausea hpi m...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.43703988, 0.23295334, 0.24010858, 0.0469193...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1922,153703,chief complaint a fib with rvr and hypotension...,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.84345543, 0.1699792, 0.26213166, 0.02812122...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.674716, 0.12843506, 0.24169803, 0.019457856...","[2.024148, 0.38530517, 0.7250941, 0.05837357, ...","[False, False, False, False, False, False, Fal...",3,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.45932263, 0.13441414, 0.19458751, 0.0321773...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1923,127867,chief complaint expressive aphasia hpi history...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0.003747494, 0.040779464, 0.10674791, 0.06609...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0034088728, 0.038831808, 0.09208786, 0.0598...","[0.0068177455, 0.077663615, 0.18417571, 0.1197...","[False, False, True, False, False, False, Fals...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.009916944, 0.053836778, 0.119768426, 0.0987...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1924,183600,chief complaint hypotension brbpr hpi yo f wit...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0.08562414, 0.05908706, 0.07579271, 0.0174266...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[0.04563699, 0.059322633, 0.1706842, 0.0864884...","[0.09127398, 0.118645266, 0.3413684, 0.1729769...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.041732088, 0.05788386, 0.11370826, 0.054489...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."


In [None]:
def freq_5(df, column): # column: out_sum
    df['freq_5'] = df[column]
    for idx in df.index:
      sorted = np.sort(df[column][idx])
      thres = sorted[-5] # position 5
      df['freq_5'][idx] = df[column][idx]>= thres


In [None]:
freq_5(dev_df, 'out_sum')

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred,out_ewma,most_freq
0,125506,full code,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.02813817, 0.06421183, 0.17812477, 0.0306744...","[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.026693698, 0.13270357, 0.2407746, 0.0381074...","[0.053387396, 0.26540715, 0.4815492, 0.0762148...","[False, False, False, False, True, False, Fals...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.10012276, 0.08435567, 0.13384493, 0.0472759...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,101685,fellow note,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0403443, 0.031939324, 0.17625839, 0.0880279...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02367562, 0.020369051, 0.091736704, 0.04929...","[0.04735124, 0.040738102, 0.18347341, 0.098596...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.17042506, 0.124258764, 0.25400022, 0.071789...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,148308,ventricular paced rhythm,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.04019823, 0.071467124, 0.20583339, 0.078409...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,171359,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.03139344, 0.04348312, 0.08821601, 0.0978097...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,166415,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.10029378, 0.03779263, 0.12295471, 0.0539075...","[0.20058756, 0.07558526, 0.24590942, 0.1078150...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.03139344, 0.04348312, 0.08821601, 0.0978097...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1921,181458,title chief complaint hypotension nausea hpi m...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[True, False, False, False, False, False, Fals...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.43703988, 0.23295334, 0.24010858, 0.0469193...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1922,153703,chief complaint a fib with rvr and hypotension...,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.84345543, 0.1699792, 0.26213166, 0.02812122...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.674716, 0.12843506, 0.24169803, 0.019457856...","[2.024148, 0.38530517, 0.7250941, 0.05837357, ...","[True, False, False, False, False, False, Fals...",3,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.45932263, 0.13441414, 0.19458751, 0.0321773...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1923,127867,chief complaint expressive aphasia hpi history...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0.003747494, 0.040779464, 0.10674791, 0.06609...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0034088728, 0.038831808, 0.09208786, 0.0598...","[0.0068177455, 0.077663615, 0.18417571, 0.1197...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.009916944, 0.053836778, 0.119768426, 0.0987...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1924,183600,chief complaint hypotension brbpr hpi yo f wit...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0.08562414, 0.05908706, 0.07579271, 0.0174266...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[0.04563699, 0.059322633, 0.1706842, 0.0864884...","[0.09127398, 0.118645266, 0.3413684, 0.1729769...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.041732088, 0.05788386, 0.11370826, 0.054489...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."


In [None]:
out_freq_5 = np.vstack([dev_df['freq_5'][i] for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq_5)
f1_score_micro = metrics.f1_score(targets, out_freq_5, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq_5, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.004153686396677051
F1 Score (Micro) = 0.3834107900585977
F1 Score (Macro) = 0.28094070735296556


In [None]:
print(classification_report(targets, out_freq_5, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.3886    0.3269    0.3551       208
       244.9     0.3061    0.0888    0.1376       169
      250.00     0.2234    0.1871    0.2037       326
       272.0     0.2586    0.0843    0.1271       178
       272.4     0.3015    0.4054    0.3458       296
       276.0     0.2899    0.3261    0.3069       184
       276.1     0.2857    0.1212    0.1702       198
       276.2     0.2263    0.2183    0.2222       252
       285.1     0.3697    0.4565    0.4086       230
       285.9     0.1827    0.0798    0.1111       238
       287.5     0.1954    0.1133    0.1435       150
       305.1     0.2832    0.1882    0.2261       170
         311     0.4000    0.0709    0.1205       141
      327.23     0.0000    0.0000    0.0000       121
       401.9     0.4745    0.6990    0.5652       691
      403.90     0.4231    0.2683    0.3284       164
      403.91     0.6136    0.2621    0.3673       103
      410.71     0.3125    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq_5, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq_5, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6527799418500501
RUC AUC Score (Macro) = 0.6171937563601021


In [None]:
precision_micro = metrics.average_precision_score(targets, out_freq_5, average='micro')
precision_macro = metrics.average_precision_score(targets, out_freq_5, average='macro')

print(f"Average Precision Score (Micro) = {precision_micro}")
print(f"Average Precision Score (Macro) = {precision_macro}")

Average Precision Score (Micro) = 0.21326952444049752
Average Precision Score (Macro) = 0.19916278826382666


### Predicted percentage

In [None]:
note_count_dict = dev_df.groupby('id').size().to_dict()
dev_df['note_count'] = dev_df['id'].map(note_count_dict)

In [None]:
dev_df['out_bool'] = [(dev_df['prediction'][i]>=0.5).astype(int) for i in dev_df.index]

In [None]:

out_freq_dict = dev_df.groupby('id').out_bool.apply(np.sum).to_dict()
dev_df['num_pred'] = dev_df['id'].map(out_freq_dict)
dev_df['num_pred'] = [(dev_df['num_pred'][i]>=0.4*dev_df['note_count'][i]).astype(int) for i in dev_df.index]

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred,out_ewma,most_freq
0,125506,full code,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.02813817, 0.06421183, 0.17812477, 0.0306744...","[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.026693698, 0.13270357, 0.2407746, 0.0381074...","[0.053387396, 0.26540715, 0.4815492, 0.0762148...","[False, False, False, False, True, False, Fals...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.10012276, 0.08435567, 0.13384493, 0.0472759...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,101685,fellow note,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0403443, 0.031939324, 0.17625839, 0.0880279...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02367562, 0.020369051, 0.091736704, 0.04929...","[0.04735124, 0.040738102, 0.18347341, 0.098596...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.17042506, 0.124258764, 0.25400022, 0.071789...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,148308,ventricular paced rhythm,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.04019823, 0.071467124, 0.20583339, 0.078409...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,171359,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.03139344, 0.04348312, 0.08821601, 0.0978097...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,166415,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.10029378, 0.03779263, 0.12295471, 0.0539075...","[0.20058756, 0.07558526, 0.24590942, 0.1078150...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.03139344, 0.04348312, 0.08821601, 0.0978097...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1921,181458,title chief complaint hypotension nausea hpi m...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[True, False, False, False, False, False, Fals...",1,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.43703988, 0.23295334, 0.24010858, 0.0469193...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1922,153703,chief complaint a fib with rvr and hypotension...,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.84345543, 0.1699792, 0.26213166, 0.02812122...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.674716, 0.12843506, 0.24169803, 0.019457856...","[2.024148, 0.38530517, 0.7250941, 0.05837357, ...","[True, False, False, False, False, False, Fals...",3,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.45932263, 0.13441414, 0.19458751, 0.0321773...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1923,127867,chief complaint expressive aphasia hpi history...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0.003747494, 0.040779464, 0.10674791, 0.06609...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0034088728, 0.038831808, 0.09208786, 0.0598...","[0.0068177455, 0.077663615, 0.18417571, 0.1197...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.009916944, 0.053836778, 0.119768426, 0.0987...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1924,183600,chief complaint hypotension brbpr hpi yo f wit...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0.08562414, 0.05908706, 0.07579271, 0.0174266...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[0.04563699, 0.059322633, 0.1706842, 0.0864884...","[0.09127398, 0.118645266, 0.3413684, 0.1729769...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.041732088, 0.05788386, 0.11370826, 0.054489...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."


In [None]:
out_freq = np.vstack([dev_df['num_pred'][i] for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq)
f1_score_micro = metrics.f1_score(targets, out_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.024402907580477674
F1 Score (Micro) = 0.3621564084903888
F1 Score (Macro) = 0.23273552701903955


In [None]:
print(classification_report(targets, out_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.4261    0.3606    0.3906       208
       244.9     0.3158    0.0355    0.0638       169
      250.00     0.1750    0.0215    0.0383       326
       272.0     0.0000    0.0000    0.0000       178
       272.4     0.3750    0.2939    0.3295       296
       276.0     0.3770    0.2500    0.3007       184
       276.1     0.7143    0.0758    0.1370       198
       276.2     0.4057    0.1706    0.2402       252
       285.1     0.5468    0.3304    0.4119       230
       285.9     0.2941    0.0420    0.0735       238
       287.5     0.2941    0.0667    0.1087       150
       305.1     0.6000    0.0176    0.0343       170
         311     0.0000    0.0000    0.0000       141
      327.23     1.0000    0.0579    0.1094       121
       401.9     0.5178    0.5051    0.5114       691
      403.90     0.3762    0.2317    0.2868       164
      403.91     0.7742    0.2330    0.3582       103
      410.71     0.2083    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6253291364921242
RUC AUC Score (Macro) = 0.5885372686054821


### Exponential moving average

In [None]:
def ewma(sub_df, window=3):
#    print(sub_df)
    alpha = 2 / (window + 1)
#    print(sub_df['ewma'])
    sub_df['ewma'] = sub_df['out_bool']
    for r in range(len(sub_df)):
        if r == 0:
            sub_df['ewma'].iloc[r] = sub_df['prediction'].iloc[r]
        else:
            sub_df['ewma'].iloc[r] = alpha*sub_df['prediction'].iloc[r] + (1-alpha)*sub_df['prediction'].iloc[r-1]
 #   print(type(sub_df['ewma']))
    return sub_df['ewma']

In [None]:

out_ewma_dict = dev_df.groupby('id', group_keys=False).apply(ewma).to_dict()
dev_df['out_ewma'] = pd.Series(dev_df.index, index=dev_df.index).map(out_ewma_dict)



In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred,out_ewma,most_freq
0,125506,full code,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.02813817, 0.06421183, 0.17812477, 0.0306744...","[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.026693698, 0.13270357, 0.2407746, 0.0381074...","[0.053387396, 0.26540715, 0.4815492, 0.0762148...","[False, False, False, False, True, False, Fals...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.02813817, 0.06421183, 0.17812477, 0.0306744...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,101685,fellow note,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0403443, 0.031939324, 0.17625839, 0.0880279...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02367562, 0.020369051, 0.091736704, 0.04929...","[0.04735124, 0.040738102, 0.18347341, 0.098596...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0403443, 0.031939324, 0.17625839, 0.0880279...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,148308,ventricular paced rhythm,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,171359,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,166415,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.10029378, 0.03779263, 0.12295471, 0.0539075...","[0.20058756, 0.07558526, 0.24590942, 0.1078150...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1921,181458,title chief complaint hypotension nausea hpi m...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[True, False, False, False, False, False, Fals...",1,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1922,153703,chief complaint a fib with rvr and hypotension...,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.84345543, 0.1699792, 0.26213166, 0.02812122...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.674716, 0.12843506, 0.24169803, 0.019457856...","[2.024148, 0.38530517, 0.7250941, 0.05837357, ...","[True, False, False, False, False, False, Fals...",3,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.8417505, 0.1576569, 0.29371238, 0.024810847...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1923,127867,chief complaint expressive aphasia hpi history...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0.003747494, 0.040779464, 0.10674791, 0.06609...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0034088728, 0.038831808, 0.09208786, 0.0598...","[0.0068177455, 0.077663615, 0.18417571, 0.1197...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0034088728, 0.038831808, 0.09208786, 0.0598...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1924,183600,chief complaint hypotension brbpr hpi yo f wit...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0.08562414, 0.05908706, 0.07579271, 0.0174266...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[0.04563699, 0.059322633, 0.1706842, 0.0864884...","[0.09127398, 0.118645266, 0.3413684, 0.1729769...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.04563699, 0.059322633, 0.1706842, 0.0864884...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."


In [None]:
out_ewma = np.vstack([dev_df['out_ewma'][i]>0.5 for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_ewma)
f1_score_micro = metrics.f1_score(targets, out_ewma, average='micro')
f1_score_macro = metrics.f1_score(targets, out_ewma, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.020768431983385256
F1 Score (Micro) = 0.3250134916351862
F1 Score (Macro) = 0.2033889358419901


In [None]:
print(classification_report(targets, out_ewma, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.4310    0.2404    0.3086       208
       244.9     0.4444    0.0237    0.0449       169
      250.00     0.1667    0.0153    0.0281       326
       272.0     0.0000    0.0000    0.0000       178
       272.4     0.3764    0.2264    0.2827       296
       276.0     0.4066    0.2011    0.2691       184
       276.1     0.7500    0.0455    0.0857       198
       276.2     0.3662    0.1032    0.1610       252
       285.1     0.5644    0.2478    0.3444       230
       285.9     0.2273    0.0210    0.0385       238
       287.5     0.2857    0.0533    0.0899       150
       305.1     0.3333    0.0059    0.0116       170
         311     0.0000    0.0000    0.0000       141
      327.23     1.0000    0.0083    0.0164       121
       401.9     0.5304    0.4674    0.4969       691
      403.90     0.4189    0.1890    0.2605       164
      403.91     0.7600    0.1845    0.2969       103
      410.71     0.3750    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_ewma, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_ewma, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6054279397143371
RUC AUC Score (Macro) = 0.5741954045079664


In [None]:
precision_micro = metrics.average_precision_score(targets, out_ewma, average='micro')
precision_macro = metrics.average_precision_score(targets, out_ewma, average='macro')

print(f"Average Precision Score (Micro) = {precision_micro}")
print(f"Average Precision Score (Macro) = {precision_macro}")

Average Precision Score (Micro) = 0.20310338137486977
Average Precision Score (Macro) = 0.17994501289314654


### Most frequent prediction

In [None]:

most_freq_dict = dev_df.groupby('id')['out_bool'].apply(lambda x: x.value_counts().index[0]).to_dict()
dev_df['most_freq'] = dev_df['id'].map(most_freq_dict)

In [None]:
dev_df

Unnamed: 0,id,text,labels,prediction,tar,out_mean,out_sum,freq_5,note_count,out_bool,num_pred,out_ewma,most_freq
0,125506,full code,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.02813817, 0.06421183, 0.17812477, 0.0306744...","[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.026693698, 0.13270357, 0.2407746, 0.0381074...","[0.053387396, 0.26540715, 0.4815492, 0.0762148...","[False, False, False, False, True, False, Fals...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.02813817, 0.06421183, 0.17812477, 0.0306744...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,101685,fellow note,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0403443, 0.031939324, 0.17625839, 0.0880279...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02367562, 0.020369051, 0.091736704, 0.04929...","[0.04735124, 0.040738102, 0.18347341, 0.098596...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0403443, 0.031939324, 0.17625839, 0.0880279...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,148308,ventricular paced rhythm,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.06666902, 0.035746805, 0.17273325, 0.165264...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,171359,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[False, False, False, False, False, False, Fal...",1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,166415,sinus rhythm normal ecg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.10029378, 0.03779263, 0.12295471, 0.0539075...","[0.20058756, 0.07558526, 0.24590942, 0.1078150...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.02190135, 0.014694269, 0.04717716, 0.074233...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1921,181458,title chief complaint hypotension nausea hpi m...,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[True, False, False, False, False, False, Fals...",1,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.86669964, 0.2774134, 0.12011458, 0.02914882...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1922,153703,chief complaint a fib with rvr and hypotension...,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0.84345543, 0.1699792, 0.26213166, 0.02812122...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.674716, 0.12843506, 0.24169803, 0.019457856...","[2.024148, 0.38530517, 0.7250941, 0.05837357, ...","[True, False, False, False, False, False, Fals...",3,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0.8417505, 0.1576569, 0.29371238, 0.024810847...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1923,127867,chief complaint expressive aphasia hpi history...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0.003747494, 0.040779464, 0.10674791, 0.06609...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0034088728, 0.038831808, 0.09208786, 0.0598...","[0.0068177455, 0.077663615, 0.18417571, 0.1197...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0034088728, 0.038831808, 0.09208786, 0.0598...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1924,183600,chief complaint hypotension brbpr hpi yo f wit...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0.08562414, 0.05908706, 0.07579271, 0.0174266...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[0.04563699, 0.059322633, 0.1706842, 0.0864884...","[0.09127398, 0.118645266, 0.3413684, 0.1729769...","[False, False, False, False, False, False, Fal...",2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.04563699, 0.059322633, 0.1706842, 0.0864884...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."


In [None]:
out_most_freq = np.vstack([dev_df['most_freq'][i] for i in dev_df.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_most_freq)
f1_score_micro = metrics.f1_score(targets, out_most_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_most_freq, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.021287642782969886
F1 Score (Micro) = 0.3245683930942895
F1 Score (Macro) = 0.2087266102251819


In [None]:
print(classification_report(targets, out_most_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.3926    0.2548    0.3090       208
       244.9     0.4000    0.0237    0.0447       169
      250.00     0.2059    0.0215    0.0389       326
       272.0     0.0000    0.0000    0.0000       178
       272.4     0.3606    0.2534    0.2976       296
       276.0     0.4000    0.2174    0.2817       184
       276.1     0.7647    0.0657    0.1209       198
       276.2     0.3699    0.1071    0.1662       252
       285.1     0.5741    0.2696    0.3669       230
       285.9     0.1600    0.0168    0.0304       238
       287.5     0.2500    0.0533    0.0879       150
       305.1     0.6000    0.0176    0.0343       170
         311     0.0000    0.0000    0.0000       141
      327.23     1.0000    0.0413    0.0794       121
       401.9     0.4992    0.4573    0.4773       691
      403.90     0.4051    0.1951    0.2634       164
      403.91     0.7500    0.2039    0.3206       103
      410.71     0.3125    

In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_most_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_most_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6059825774352542
RUC AUC Score (Macro) = 0.5753013599203782


In [None]:
precision_micro = metrics.average_precision_score(targets, out_most_freq, average='micro')
precision_macro = metrics.average_precision_score(targets, out_most_freq, average='macro')

print(f"Average Precision Score (Micro) = {precision_micro}")
print(f"Average Precision Score (Macro) = {precision_macro}")

Average Precision Score (Micro) = 0.20024414970513496
Average Precision Score (Macro) = 0.18006117048313608


In [None]:
def precision_at_5(df, column): # column: prediction
    df['p@5'] = 0
    for idx in df.index:
        idx_sorted = np.argsort(df[column][idx])
        idcs = idx_sorted[-3:]
        perc = (3 - sum(df['tar'][idx][idcs])) / 3
        df['p@5'][idx] = perc

### Testing

In [30]:
# Evaluate the model

# Evaluate the model

def testing():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [31]:
test_out, targets, losses = testing()
outputs = np.array(test_out) >= 0.5
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Loss:  0.28
Accuracy Score = 0.021287642782969886
F1 Score (Micro) = 0.3470724445914654
F1 Score (Macro) = 0.2289088698725901


In [32]:
print(classification_report(targets, outputs, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.3862    0.2963    0.3353       189
       244.9     0.5385    0.0422    0.0782       166
      250.00     0.3243    0.0395    0.0704       304
       272.0     0.0000    0.0000    0.0000       168
       272.4     0.3961    0.2662    0.3184       308
       276.0     0.3636    0.2198    0.2740       182
       276.1     0.5263    0.0538    0.0976       186
       276.2     0.3626    0.1441    0.2062       229
       285.1     0.4592    0.2239    0.3010       201
       285.9     0.6552    0.0709    0.1279       268
       287.5     0.3714    0.0739    0.1232       176
       305.1     0.8333    0.0307    0.0592       163
         311     0.0000    0.0000    0.0000       124
      327.23     1.0000    0.0333    0.0645        90
       401.9     0.5594    0.5123    0.5348       689
      403.90     0.3737    0.2606    0.3071       142
      403.91     0.6842    0.1287    0.2167       101
      410.71     0.4583    

In [33]:

ruc_auc_score_micro = metrics.roc_auc_score(targets, outputs, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, outputs, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6165182189149855
RUC AUC Score (Macro) = 0.5850755954121366


In [34]:

test_df['prediction'] = test_out
test_df['tar'] = targets

In [35]:
note_count_dict = test_df.groupby('id').size().to_dict()
test_df['note_count'] = test_df['id'].map(note_count_dict)

In [36]:
test_df['out_bool'] = [(test_df['prediction'][i]>=0.5).astype(int) for i in test_df.index]

In [37]:

out_freq_dict = test_df.groupby('id').out_bool.apply(np.sum).to_dict()
test_df['num_pred'] = test_df['id'].map(out_freq_dict)
test_df['num_pred'] = [(test_df['num_pred'][i]>=0.4*test_df['note_count'][i]).astype(int) for i in test_df.index]

In [38]:
df_freq = test_df.drop_duplicates('id')

In [39]:
out_freq = np.vstack([df_freq['num_pred'][i] for i in df_freq.index])
targets = np.vstack([df_freq['tar'][i] for i in df_freq.index])
#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq)
f1_score_micro = metrics.f1_score(targets, out_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

F1 Score (Micro) = 0.3466227347611203
F1 Score (Macro) = 0.22571267197487888


In [40]:
print(classification_report(targets, out_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

       038.9     0.3571    0.3147    0.3346       143
       244.9     0.4000    0.0317    0.0588       126
      250.00     0.2727    0.0357    0.0632       252
       272.0     0.0000    0.0000    0.0000       135
       272.4     0.3810    0.2903    0.3295       248
       276.0     0.3214    0.1971    0.2443       137
       276.1     0.4706    0.0593    0.1053       135
       276.2     0.3026    0.1299    0.1818       177
       285.1     0.5000    0.2469    0.3306       162
       285.9     0.6087    0.0651    0.1176       215
       287.5     0.2857    0.0597    0.0988       134
       305.1     0.8333    0.0400    0.0763       125
         311     0.0000    0.0000    0.0000        99
      327.23     1.0000    0.0274    0.0533        73
       401.9     0.5570    0.5480    0.5525       562
      403.90     0.3529    0.2542    0.2956       118
      403.91     0.7333    0.1310    0.2222        84
      410.71     0.2778    

In [41]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6178045033366941
RUC AUC Score (Macro) = 0.5849465837869322
