In [1]:
!pip install -q transformers

[K     |████████████████████████████████| 2.8 MB 6.4 MB/s 
[K     |████████████████████████████████| 3.3 MB 32.1 MB/s 
[K     |████████████████████████████████| 636 kB 67.6 MB/s 
[K     |████████████████████████████████| 50 kB 5.9 MB/s 
[K     |████████████████████████████████| 895 kB 53.6 MB/s 
[?25h

In [2]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
from sklearn.model_selection import train_test_split
import transformers
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from transformers import AutoTokenizer, AutoModel

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report


In [3]:
# Setting up the device for GPU usage

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
np.random.seed(123)
torch.manual_seed(123)
torch.cuda.manual_seed_all(123)

## Load data

In [6]:
#change to where you store mimic3 data
MIMIC_3_DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/datasets'

train_df = pd.read_csv('%s/train_10.csv' % MIMIC_3_DIR)
eval_df = pd.read_csv('%s/dev_10.csv' % MIMIC_3_DIR)
test_df = pd.read_csv('%s/test_10.csv' % MIMIC_3_DIR)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,LABELS,length
0,17341,151110,Nursing/other,rsbi,584.9;427.31,1
1,61638,103816,Nursing,title,414.01,1
2,61638,103816,General,title,414.01,1
3,23706,186321,Nursing/other,npn,401.9;428.0;530.81,1
4,55265,191108,General,title,530.81;584.9;427.31,1


In [7]:
full_df = pd.concat([train_df, eval_df, test_df], ignore_index=True)


 ## Preprocess Data

In [8]:
# split labels by ";", then convert to list
def split_lab (x):
    #print(x)
    return x.split(";")

full_df['LABELS'] = full_df['LABELS'].apply(split_lab)
#full_df['TEXT'] = full_df['TEXT'].apply(split_lab)

full_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,LABELS,length
0,17341,151110,Nursing/other,rsbi,"[584.9, 427.31]",1
1,61638,103816,Nursing,title,[414.01],1
2,61638,103816,General,title,[414.01],1
3,23706,186321,Nursing/other,npn,"[401.9, 428.0, 530.81]",1
4,55265,191108,General,title,"[530.81, 584.9, 427.31]",1


In [9]:
#load multi label binarizer for one-hot encoding
mlb = MultiLabelBinarizer(sparse_output=True)



In [10]:
#change label to one-hot encoding per code
full_df = full_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(full_df.pop('LABELS')),
                columns=mlb.classes_))

full_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,length,250.00,272.4,401.9,414.01,427.31,428.0,518.81,530.81,584.9,599.0
0,17341,151110,Nursing/other,rsbi,1,0,0,0,0,1,0,0,0,1,0
1,61638,103816,Nursing,title,1,0,0,0,1,0,0,0,0,0,0
2,61638,103816,General,title,1,0,0,0,1,0,0,0,0,0,0
3,23706,186321,Nursing/other,npn,1,0,0,1,0,0,1,0,1,0,0
4,55265,191108,General,title,1,0,0,0,0,1,0,0,1,1,0


In [11]:
# Convert columns to list of one hot encoding
icd_classes_50 = mlb.classes_

full_df['labels'] = full_df[icd_classes_50].values.tolist()
#train_df.sort_values(['length'], ascending=False, inplace=True)
full_df


Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,length,250.00,272.4,401.9,414.01,427.31,428.0,518.81,530.81,584.9,599.0,labels
0,17341,151110,Nursing/other,rsbi,1,0,0,0,0,1,0,0,0,1,0,"[0, 0, 0, 0, 1, 0, 0, 0, 1, 0]"
1,61638,103816,Nursing,title,1,0,0,0,1,0,0,0,0,0,0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
2,61638,103816,General,title,1,0,0,0,1,0,0,0,0,0,0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
3,23706,186321,Nursing/other,npn,1,0,0,1,0,0,1,0,1,0,0,"[0, 0, 1, 0, 0, 1, 0, 1, 0, 0]"
4,55265,191108,General,title,1,0,0,0,0,1,0,0,1,1,0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294460,97158,152158,Discharge summary,admission date discharge date date of birth se...,4644,0,0,1,0,0,0,1,0,0,0,"[0, 0, 1, 0, 0, 0, 1, 0, 0, 0]"
294461,99650,199859,Discharge summary,admission date discharge date date of birth se...,5126,0,0,0,1,1,1,1,0,1,1,"[0, 0, 0, 1, 1, 1, 1, 0, 1, 1]"
294462,93623,187232,Discharge summary,admission date discharge date date of birth se...,5171,0,1,1,0,0,0,0,0,0,0,"[0, 1, 1, 0, 0, 0, 0, 0, 0, 0]"
294463,96260,110058,Discharge summary,admission date discharge date date of birth se...,5173,0,0,0,0,0,0,1,0,0,1,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1]"


In [12]:
full_df.HADM_ID.unique().shape

(9446,)

In [13]:
full_df = full_df.drop(full_df[full_df['length']<300].index)

In [15]:
train_df, test_df = train_test_split(full_df, test_size=0.2)
train_df, eval_df = train_test_split(train_df, test_size=0.2)

In [16]:
train_df

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,length,250.00,272.4,401.9,414.01,427.31,428.0,518.81,530.81,584.9,599.0,labels
280946,94202,158722,Nursing,heart failure chf systolic chronic assessment ...,413,0,0,0,0,1,1,1,0,0,0,"[0, 0, 0, 0, 1, 1, 1, 0, 0, 0]"
166354,7960,192744,Nursing/other,micu nsg note 7a 7p neuro pt confused to date ...,365,0,0,0,0,0,1,0,0,0,1,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 1]"
159821,82217,183943,Radiology,am ct abdomen w contrast ct pelvis w contrast ...,324,0,1,1,0,0,1,0,0,0,0,"[0, 1, 1, 0, 0, 1, 0, 0, 0, 0]"
241805,87552,117940,Physician,sicu hpi date hd ab ceftriaxone vanco ppx h2b ...,639,0,0,0,0,0,0,1,0,0,0,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"
290033,94229,146659,Physician,chief complaint hour events ultrasound at am l...,812,0,1,0,1,1,1,1,0,1,0,"[0, 1, 0, 1, 1, 1, 1, 0, 1, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156942,76327,114950,Physician,chief complaint hour events events mental stat...,309,0,0,0,0,0,0,1,0,0,0,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"
235553,88508,166232,Nutrition,subjective pt has had a poor to fair appetite ...,388,0,0,0,0,0,0,0,1,0,0,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
170570,52379,161174,Echo,patient test information indication endocardit...,396,0,1,1,0,0,0,1,0,0,0,"[0, 1, 1, 0, 0, 0, 1, 0, 0, 0]"
177638,78388,180667,Physician,chief complaint hemoptysis dyspnea patient see...,469,0,0,1,0,1,0,0,1,0,0,"[0, 0, 1, 0, 1, 0, 0, 1, 0, 0]"


In [17]:
train_df.sort_values(['length'], inplace=True)
eval_df.sort_values(['length'], inplace=True)
test_df.sort_values(['length'], inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [18]:
train_df = train_df.drop(train_df[train_df['CATEGORY']!='Discharge summary'].index)
eval_df = eval_df.drop(eval_df[eval_df['CATEGORY']!='Discharge summary'].index)
test_df = test_df.drop(test_df[test_df['CATEGORY']!='Discharge summary'].index)

In [19]:
train_df

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,length,250.00,272.4,401.9,414.01,427.31,428.0,518.81,530.81,584.9,599.0,labels
155332,2245,136166,Discharge summary,name known lastname known firstname unit no nu...,301,0,0,0,0,0,0,0,0,1,0,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]"
155375,11995,132213,Discharge summary,admission date discharge date date of birth se...,301,0,0,0,0,0,0,0,1,0,0,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
232351,85998,164722,Discharge summary,name known lastname known firstname unit no nu...,302,0,1,1,1,1,1,0,1,1,0,"[0, 1, 1, 1, 1, 1, 0, 1, 1, 0]"
155563,1588,172764,Discharge summary,admission date discharge date date of birth se...,302,0,0,1,0,0,0,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]"
155750,18965,103819,Discharge summary,admission date discharge date date of birth se...,303,0,0,1,1,0,0,0,0,0,0,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207415,80342,160825,Discharge summary,admission date discharge date date of birth se...,6120,0,0,0,0,0,0,1,0,1,0,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]"
248321,86752,143537,Discharge summary,admission date discharge date date of birth se...,6331,1,0,0,0,0,0,1,0,0,0,"[1, 0, 0, 0, 0, 0, 1, 0, 0, 0]"
207416,65176,173812,Discharge summary,admission date discharge date date of birth se...,6487,0,0,0,0,0,0,1,0,0,0,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"
207417,51821,197028,Discharge summary,admission date discharge date date of birth se...,6787,0,0,0,0,0,0,1,0,0,0,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"


In [20]:
train_df.HADM_ID.unique().shape

(6168,)

In [21]:
#convert into 2 columns dataframe
train_df = pd.DataFrame(train_df, columns=['TEXT', 'labels'])
train_df.columns=['text', 'labels']
train_df.head()

eval_df = pd.DataFrame(eval_df, columns=['TEXT', 'labels'])
eval_df.columns=['text', 'labels']
eval_df.head()

test_df = pd.DataFrame(test_df, columns=['HADM_ID', 'TEXT', 'labels'])
test_df.columns=['id', 'text', 'labels']


In [22]:
train_df.reset_index(drop=True, inplace=True)
eval_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
test_df.head()

Unnamed: 0,id,text,labels
0,136704,name known lastname known firstname unit no nu...,"[0, 0, 1, 0, 1, 1, 0, 0, 1, 1]"
1,137272,admission date discharge date date of birth se...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
2,192096,name known lastname known firstname unit no nu...,"[0, 0, 0, 1, 0, 0, 0, 0, 1, 0]"
3,111599,name known lastname known firstname unit no nu...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]"
4,164436,name known lastname known firstname unit no nu...,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 1]"


### Set Model Parameters

In [23]:
# Defining some key variables to configure model training
MAX_LEN = 512
TRAIN_BATCH_SIZE = 12
VALID_BATCH_SIZE = 8
TEST_BATCH_SIZE = 8
EPOCHS = 5
LEARNING_RATE = 3e-05

#set tokenizer
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

Downloading:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

### Preparing Dataloader

In [24]:
#custom dataset for BERT class
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        
        '''
            set text as training data
            set labels as targets
        '''
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [25]:
#load df to dataset

training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
valid_set = CustomDataset(eval_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

In [26]:
#data loader
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': False
                }

val_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

training_loader = DataLoader(training_set, **train_params)
valid_loader = DataLoader(valid_set, **val_params)
testing_loader = DataLoader(testing_set, **test_params)

### Create model class from pretrained model

In [27]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()

        self.l1 = transformers.AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", return_dict=False)
        #self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)
        
        
        self.l2 = torch.nn.Dropout(0.3)
        

        self.l3 = torch.nn.Linear(768, 10)
    
    def forward(self, ids, mask, token_type_ids):
#        print("ids: ", ids.size(), "mask: ", mask.size(), "token type ids: ", token_type_ids.size())
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

model = BERTClass()
model.to(device)

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [28]:
#loss function
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [29]:
#optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

### Train fine-tuning model

In [30]:
def train(epoch):
    model.train()
    for _,data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        
    print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')

In [31]:
# Evaluate the model

def validation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(valid_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    print(f'Epoch: {epoch}, Validation Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
start_epoch=0
DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'
resume = True     
if resume:
    if os.path.isfile(f"%s/models/models_disch_epoch{start_epoch}.pth" % DIR):
        print("Resume from checkpoint...")
        checkpoint = torch.load(f"%s/models/models_disch_epoch{start_epoch}.pth" % DIR)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        initepoch = checkpoint['epoch']
        print("====>loaded checkpoint (epoch{})".format(checkpoint['epoch']))
    else:
        print("====>no checkpoint found.")
        initepoch = 0

for epoch in tqdm(range(EPOCHS)):
    train(epoch)
    validation(epoch)

    if (epoch+start_epoch+1) >3:
        checkpoint = {"model_state_dict": model.state_dict(),
                      "optimizer_state_dict": optimizer.state_dict(),
                      "epoch": epoch+start_epoch+1}
        path_checkpoint = f"%s/models/models_disch_epoch{epoch+start_epoch+1}.pth" % DIR
        torch.save(checkpoint, path_checkpoint)



====>no checkpoint found.


  0%|          | 0/5 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.3734656870365143


 20%|██        | 1/5 [06:54<27:39, 414.88s/it]

Epoch: 0, Validation Loss:  0.49
Epoch: 1, Training Loss:  0.3493160903453827


 40%|████      | 2/5 [13:50<20:45, 415.18s/it]

Epoch: 1, Validation Loss:  0.45
Epoch: 2, Training Loss:  0.2909400761127472


 60%|██████    | 3/5 [20:45<13:50, 415.17s/it]

Epoch: 2, Validation Loss:  0.43
Epoch: 3, Training Loss:  0.26042255759239197
Epoch: 3, Validation Loss:  0.43


 80%|████████  | 4/5 [27:45<06:57, 417.27s/it]

Epoch: 4, Training Loss:  0.19291900098323822
Epoch: 4, Validation Loss:  0.43


100%|██████████| 5/5 [35:00<00:00, 420.19s/it]


In [42]:

DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'

checkpoint = torch.load(f"%s/models/models_disch_epoch5.pth" % DIR)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>


### Model Evaluation

In [None]:
# Evaluate the model

def evaluation():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(valid_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
dev_out, dev_tar, losses = evaluation()



Loss:  0.06


In [33]:
# Evaluate the model
def testing():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')

    return fin_outputs, fin_targets, losses

In [43]:

test_out, targets, losses = testing()
outputs = np.array(test_out) >= 0.5
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")



Loss:  0.44
F1 Score (Micro) = 0.6364289379168805
F1 Score (Macro) = 0.5818949896831037


In [44]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, outputs, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, outputs, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7527155827583878
RUC AUC Score (Macro) = 0.7223888593427055


In [45]:
print(classification_report(targets, outputs, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

      250.00     0.6636    0.7360    0.6980       394
       272.4     0.7251    0.6119    0.6637       487
       401.9     0.6847    0.6804    0.6825       948
      414.01     0.7341    0.8194    0.7744       576
      427.31     0.6599    0.7430    0.6990       572
       428.0     0.5754    0.8328    0.6805       628
      518.81     0.5916    0.4493    0.5107       345
      530.81     0.8098    0.4244    0.5570       311
       584.9     0.5167    0.2831    0.3658       438
       599.0     0.4022    0.1221    0.1873       303

   micro avg     0.6538    0.6200    0.6364      5002
   macro avg     0.6363    0.5703    0.5819      5002
weighted avg     0.6456    0.6200    0.6167      5002
 samples avg     0.6253    0.6269    0.5895      5002



  _warn_prf(average, modifier, msg_start, len(result))


In [46]:
test_df['prediction'] = test_out
test_df['tar'] = targets

In [47]:
test_df

Unnamed: 0,id,text,labels,prediction,tar
0,136704,name known lastname known firstname unit no nu...,"[0, 0, 1, 0, 1, 1, 0, 0, 1, 1]","[0.2433057, 0.7839641, 0.8741178, 0.9079335, 0...","[0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, ..."
1,137272,admission date discharge date date of birth se...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0]","[0.13865775, 0.20852491, 0.88169754, 0.4461171...","[1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,192096,name known lastname known firstname unit no nu...,"[0, 0, 0, 1, 0, 0, 0, 0, 1, 0]","[0.253961, 0.20044975, 0.13838394, 0.97548646,...","[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, ..."
3,111599,name known lastname known firstname unit no nu...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]","[0.040394865, 0.03748464, 0.66826504, 0.011982...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
4,164436,name known lastname known firstname unit no nu...,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 1]","[0.07230637, 0.02127394, 0.4830181, 0.264972, ...","[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...,...
1929,150956,admission date discharge date date of birth se...,"[0, 0, 0, 0, 1, 1, 1, 0, 0, 0]","[0.71409523, 0.09763, 0.38058394, 0.023902949,...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, ..."
1930,160172,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0.07593462, 0.067108564, 0.5778489, 0.0065728...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ..."
1931,161772,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0.041289747, 0.07474115, 0.076118074, 0.17866...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ..."
1932,197451,admission date discharge date date of birth se...,"[0, 0, 1, 1, 0, 0, 0, 0, 1, 0]","[0.043296892, 0.023522004, 0.20441736, 0.05445...","[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, ..."


In [48]:
note_count_dict = test_df.groupby('id').size().to_dict()
test_df['note_count'] = test_df['id'].map(note_count_dict)

In [49]:
test_df['out_bool'] = [(test_df['prediction'][i]>=0.5).astype(int) for i in test_df.index]

In [50]:
out_freq_dict = test_df.groupby('id').out_bool.apply(np.sum).to_dict()
test_df['num_pred'] = test_df['id'].map(out_freq_dict)
test_df['num_pred'] = [(test_df['num_pred'][i]>=0.4*test_df['note_count'][i]).astype(int) for i in test_df.index]

In [51]:
df_freq = test_df.drop_duplicates('id')

In [52]:
out_freq = np.vstack([df_freq['num_pred'][i] for i in df_freq.index])
targets = np.vstack([df_freq['tar'][i] for i in df_freq.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq)
f1_score_micro = metrics.f1_score(targets, out_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

F1 Score (Micro) = 0.6375644994840041
F1 Score (Macro) = 0.5829270855897458


In [53]:
print(classification_report(targets, out_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

      250.00     0.6651    0.7398    0.7005       392
       272.4     0.7244    0.6124    0.6637       485
       401.9     0.6862    0.6818    0.6840       946
      414.01     0.7328    0.8185    0.7733       573
      427.31     0.6620    0.7482    0.7025       568
       428.0     0.5743    0.8328    0.6798       622
      518.81     0.5923    0.4529    0.5133       340
      530.81     0.8075    0.4235    0.5556       307
       584.9     0.5167    0.2851    0.3674       435
       599.0     0.4022    0.1237    0.1893       299

   micro avg     0.6540    0.6219    0.6376      4967
   macro avg     0.6363    0.5719    0.5829      4967
weighted avg     0.6460    0.6219    0.6181      4967
 samples avg     0.6265    0.6292    0.5912      4967



  _warn_prf(average, modifier, msg_start, len(result))


In [54]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7534696987169424
RUC AUC Score (Macro) = 0.7228953832411719


In [55]:
out_mean_dict = test_df.groupby('id').prediction.apply(np.mean).to_dict()
test_df['out_mean'] = test_df['id'].map(out_mean_dict)
test_df

Unnamed: 0,id,text,labels,prediction,tar,note_count,out_bool,num_pred,out_mean
0,136704,name known lastname known firstname unit no nu...,"[0, 0, 1, 0, 1, 1, 0, 0, 1, 1]","[0.2433057, 0.7839641, 0.8741178, 0.9079335, 0...","[0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, ...",1,"[0, 1, 1, 1, 0, 1, 0, 0, 0, 0]","[0, 1, 1, 1, 0, 1, 0, 0, 0, 0]","[0.2433057, 0.7839641, 0.8741178, 0.9079335, 0..."
1,137272,admission date discharge date date of birth se...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0]","[0.13865775, 0.20852491, 0.88169754, 0.4461171...","[1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]","[0.13865775, 0.20852491, 0.88169754, 0.4461171..."
2,192096,name known lastname known firstname unit no nu...,"[0, 0, 0, 1, 0, 0, 0, 0, 1, 0]","[0.253961, 0.20044975, 0.13838394, 0.97548646,...","[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...",2,"[0, 0, 0, 1, 0, 1, 0, 0, 0, 0]","[0, 0, 0, 1, 0, 1, 0, 0, 0, 0]","[0.2588029, 0.14538816, 0.15233836, 0.8497767,..."
3,111599,name known lastname known firstname unit no nu...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]","[0.040394865, 0.03748464, 0.66826504, 0.011982...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",1,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0]","[0, 0, 1, 0, 1, 0, 0, 0, 0, 0]","[0.040394865, 0.03748464, 0.66826504, 0.011982..."
4,164436,name known lastname known firstname unit no nu...,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 1]","[0.07230637, 0.02127394, 0.4830181, 0.264972, ...","[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",1,"[0, 0, 0, 0, 1, 1, 0, 0, 0, 0]","[0, 0, 0, 0, 1, 1, 0, 0, 0, 0]","[0.07230637, 0.02127394, 0.4830181, 0.264972, ..."
...,...,...,...,...,...,...,...,...,...
1929,150956,admission date discharge date date of birth se...,"[0, 0, 0, 0, 1, 1, 1, 0, 0, 0]","[0.71409523, 0.09763, 0.38058394, 0.023902949,...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, ...",1,"[1, 0, 0, 0, 1, 1, 0, 0, 1, 0]","[1, 0, 0, 0, 1, 1, 0, 0, 1, 0]","[0.71409523, 0.09763, 0.38058394, 0.023902949,..."
1930,160172,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0.07593462, 0.067108564, 0.5778489, 0.0065728...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",1,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 1]","[0, 0, 1, 0, 0, 0, 0, 0, 0, 1]","[0.07593462, 0.067108564, 0.5778489, 0.0065728..."
1931,161772,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0.041289747, 0.07474115, 0.076118074, 0.17866...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",1,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0.041289747, 0.07474115, 0.076118074, 0.17866..."
1932,197451,admission date discharge date date of birth se...,"[0, 0, 1, 1, 0, 0, 0, 0, 1, 0]","[0.043296892, 0.023522004, 0.20441736, 0.05445...","[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...",1,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0.043296892, 0.023522004, 0.20441736, 0.05445..."


In [56]:
df_mean = test_df.drop_duplicates('id')

In [57]:
out_mean = np.vstack([df_mean['out_mean'][i]>=0.5 for i in df_mean.index])
targets = np.vstack([df_mean['tar'][i] for i in df_mean.index])
#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_mean)
f1_score_micro = metrics.f1_score(targets, out_mean, average='micro')
f1_score_macro = metrics.f1_score(targets, out_mean, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

F1 Score (Micro) = 0.6372559148672383
F1 Score (Macro) = 0.5820898078109746


In [58]:
print(classification_report(targets, out_mean, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

      250.00     0.6636    0.7347    0.6973       392
       272.4     0.7237    0.6103    0.6622       485
       401.9     0.6869    0.6818    0.6844       946
      414.01     0.7340    0.8185    0.7739       573
      427.31     0.6620    0.7482    0.7025       568
       428.0     0.5749    0.8328    0.6802       622
      518.81     0.5930    0.4500    0.5117       340
      530.81     0.8075    0.4235    0.5556       307
       584.9     0.5210    0.2851    0.3685       435
       599.0     0.3956    0.1204    0.1846       299

   micro avg     0.6545    0.6209    0.6373      4967
   macro avg     0.6362    0.5705    0.5821      4967
weighted avg     0.6462    0.6209    0.6176      4967
 samples avg     0.6267    0.6280    0.5907      4967



  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_mean, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_mean, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.7531774511607752
RUC AUC Score (Macro) = 0.7224492173717927
