In [None]:
!pip install -q transformers

[K     |████████████████████████████████| 2.8 MB 11.0 MB/s 
[K     |████████████████████████████████| 3.3 MB 29.4 MB/s 
[K     |████████████████████████████████| 895 kB 46.3 MB/s 
[K     |████████████████████████████████| 636 kB 54.0 MB/s 
[K     |████████████████████████████████| 50 kB 6.6 MB/s 
[?25h

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
from sklearn.model_selection import train_test_split
import transformers
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from transformers import AutoTokenizer, AutoModel

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report


In [None]:
# Setting up the device for GPU usage

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
np.random.seed(123)
torch.manual_seed(123)
torch.cuda.manual_seed_all(123)

## Load data

In [None]:
#change to where you store mimic3 data
MIMIC_3_DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/datasets'

train_df = pd.read_csv('%s/train_10.csv' % MIMIC_3_DIR)
eval_df = pd.read_csv('%s/dev_10.csv' % MIMIC_3_DIR)
test_df = pd.read_csv('%s/test_10.csv' % MIMIC_3_DIR)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,LABELS,length
0,17341,151110,Nursing/other,rsbi,584.9;427.31,1
1,61638,103816,Nursing,title,414.01,1
2,61638,103816,General,title,414.01,1
3,23706,186321,Nursing/other,npn,401.9;428.0;530.81,1
4,55265,191108,General,title,530.81;584.9;427.31,1


In [None]:
full_df = pd.concat([train_df, eval_df, test_df], ignore_index=True)


 ## Preprocess Data

In [None]:
# split labels by ";", then convert to list
def split_lab (x):
    #print(x)
    return x.split(";")

full_df['LABELS'] = full_df['LABELS'].apply(split_lab)
#full_df['TEXT'] = full_df['TEXT'].apply(split_lab)

full_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,LABELS,length
0,17341,151110,Nursing/other,rsbi,"[584.9, 427.31]",1
1,61638,103816,Nursing,title,[414.01],1
2,61638,103816,General,title,[414.01],1
3,23706,186321,Nursing/other,npn,"[401.9, 428.0, 530.81]",1
4,55265,191108,General,title,"[530.81, 584.9, 427.31]",1


In [None]:
#load multi label binarizer for one-hot encoding
mlb = MultiLabelBinarizer(sparse_output=True)



In [None]:
#change label to one-hot encoding per code
full_df = full_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(full_df.pop('LABELS')),
                columns=mlb.classes_))

full_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,length,250.00,272.4,401.9,414.01,427.31,428.0,518.81,530.81,584.9,599.0
0,17341,151110,Nursing/other,rsbi,1,0,0,0,0,1,0,0,0,1,0
1,61638,103816,Nursing,title,1,0,0,0,1,0,0,0,0,0,0
2,61638,103816,General,title,1,0,0,0,1,0,0,0,0,0,0
3,23706,186321,Nursing/other,npn,1,0,0,1,0,0,1,0,1,0,0
4,55265,191108,General,title,1,0,0,0,0,1,0,0,1,1,0


In [None]:
# Convert columns to list of one hot encoding
icd_classes_50 = mlb.classes_

full_df['labels'] = full_df[icd_classes_50].values.tolist()
#train_df.sort_values(['length'], ascending=False, inplace=True)
full_df


Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,length,250.00,272.4,401.9,414.01,427.31,428.0,518.81,530.81,584.9,599.0,labels
0,17341,151110,Nursing/other,rsbi,1,0,0,0,0,1,0,0,0,1,0,"[0, 0, 0, 0, 1, 0, 0, 0, 1, 0]"
1,61638,103816,Nursing,title,1,0,0,0,1,0,0,0,0,0,0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
2,61638,103816,General,title,1,0,0,0,1,0,0,0,0,0,0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
3,23706,186321,Nursing/other,npn,1,0,0,1,0,0,1,0,1,0,0,"[0, 0, 1, 0, 0, 1, 0, 1, 0, 0]"
4,55265,191108,General,title,1,0,0,0,0,1,0,0,1,1,0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294460,97158,152158,Discharge summary,admission date discharge date date of birth se...,4644,0,0,1,0,0,0,1,0,0,0,"[0, 0, 1, 0, 0, 0, 1, 0, 0, 0]"
294461,99650,199859,Discharge summary,admission date discharge date date of birth se...,5126,0,0,0,1,1,1,1,0,1,1,"[0, 0, 0, 1, 1, 1, 1, 0, 1, 1]"
294462,93623,187232,Discharge summary,admission date discharge date date of birth se...,5171,0,1,1,0,0,0,0,0,0,0,"[0, 1, 1, 0, 0, 0, 0, 0, 0, 0]"
294463,96260,110058,Discharge summary,admission date discharge date date of birth se...,5173,0,0,0,0,0,0,1,0,0,1,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1]"


In [None]:
full_df.HADM_ID.unique().shape

(9446,)

In [None]:
full_df = full_df.drop(full_df[full_df['length']<300].index)

In [None]:
train_df, test_df = train_test_split(full_df, test_size=0.2)
train_df, eval_df = train_test_split(train_df, test_size=0.2)

In [None]:
train_df.sort_values(['length'], inplace=True)
eval_df.sort_values(['length'], inplace=True)
test_df.sort_values(['length'], inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
train_df = train_df.drop(train_df[train_df['CATEGORY']!='Nursing/other'].index)
eval_df = eval_df.drop(eval_df[eval_df['CATEGORY']!='Nursing/other'].index)
test_df = test_df.drop(test_df[test_df['CATEGORY']!='Nursing/other'].index)

In [None]:
train_df

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,TEXT,length,250.00,272.4,401.9,414.01,427.31,428.0,518.81,530.81,584.9,599.0,labels
155070,12711,133912,Nursing/other,ccu micu border nursing progress note 7p 7a ne...,300,0,0,0,0,1,1,0,0,0,0,"[0, 0, 0, 0, 1, 1, 0, 0, 0, 0]"
155216,419,148312,Nursing/other,7am 7pm nursing note see carevue for objective...,300,0,0,1,0,1,0,1,0,0,1,"[0, 0, 1, 0, 1, 0, 1, 0, 0, 1]"
155125,12368,106089,Nursing/other,admission note pt with mvp x yrs pt noted to h...,300,0,0,0,0,0,0,0,1,0,0,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
155204,19334,183582,Nursing/other,7p 7a see admission note cv initially hr to bu...,300,0,0,0,0,0,1,0,0,0,0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0]"
155237,5102,119479,Nursing/other,condition update d see carevue flowsheet for s...,300,1,0,0,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185042,27176,164610,Nursing/other,focus nursing progress year old male with alle...,574,0,0,0,0,1,1,0,0,1,0,"[0, 0, 0, 0, 1, 1, 0, 0, 1, 0]"
185400,17339,191167,Nursing/other,adm note mr known lastname is a 68yo man adm f...,580,0,0,0,1,1,1,1,0,0,0,"[0, 0, 0, 1, 1, 1, 1, 0, 0, 0]"
185772,31760,187564,Nursing/other,nsg aadm note mrs known lastname is an 82yo wo...,586,0,0,0,0,0,1,0,1,1,0,"[0, 0, 0, 0, 0, 1, 0, 1, 1, 0]"
186107,13705,187523,Nursing/other,npn micu mr known lastname has had an eventful...,592,0,0,0,0,0,0,1,0,0,0,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"


In [None]:
train_df.HADM_ID.unique().shape

(1644,)

In [None]:
#convert into 2 columns dataframe
train_df = pd.DataFrame(train_df, columns=['TEXT', 'labels'])
train_df.columns=['text', 'labels']
train_df.head()

eval_df = pd.DataFrame(eval_df, columns=['TEXT', 'labels'])
eval_df.columns=['text', 'labels']
eval_df.head()

test_df = pd.DataFrame(test_df, columns=['HADM_ID', 'TEXT', 'labels'])
test_df.columns=['id', 'text', 'labels']


In [None]:
train_df.reset_index(drop=True, inplace=True)
eval_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
test_df.head()

Unnamed: 0,id,text,labels
0,132213,pmicu nursing progress 7a 7p review of systems...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
1,111602,nursing progress note neuro psych intact a o x...,"[0, 0, 1, 1, 1, 0, 0, 1, 0, 0]"
2,118699,neuro pt extremely agitated t o shift has hall...,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0]"
3,165840,t sicu shift report see above admission note f...,"[1, 0, 1, 1, 0, 0, 0, 0, 0, 0]"
4,111514,nursing progress note neuro pt awake most of t...,"[0, 0, 1, 0, 1, 0, 0, 0, 1, 0]"


### Set Model Parameters

In [None]:
# Defining some key variables to configure model training
MAX_LEN = 512
TRAIN_BATCH_SIZE = 12
VALID_BATCH_SIZE = 8
TEST_BATCH_SIZE = 8
EPOCHS = 5
LEARNING_RATE = 3e-05

#set tokenizer
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

Downloading:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

### Preparing Dataloader

In [None]:
#custom dataset for BERT class
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        
        '''
            set text as training data
            set labels as targets
        '''
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [None]:
#load df to dataset

training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
valid_set = CustomDataset(eval_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

In [None]:
#data loader
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': False
                }

val_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

training_loader = DataLoader(training_set, **train_params)
valid_loader = DataLoader(valid_set, **val_params)
testing_loader = DataLoader(testing_set, **test_params)

### Create model class from pretrained model

In [None]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()

        self.l1 = transformers.AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", return_dict=False)
        #self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)
        
        
        self.l2 = torch.nn.Dropout(0.3)
        

        self.l3 = torch.nn.Linear(768, 10)
    
    def forward(self, ids, mask, token_type_ids):
#        print("ids: ", ids.size(), "mask: ", mask.size(), "token type ids: ", token_type_ids.size())
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

model = BERTClass()
model.to(device)

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [None]:
#loss function
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [None]:
#optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

### Train fine-tuning model

In [None]:
def train(epoch):
    model.train()
    for _,data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        
    print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')

In [None]:
# Evaluate the model

def validation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(valid_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    print(f'Epoch: {epoch}, Validation Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
start_epoch=5
DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'
resume = True     
if resume:
    if os.path.isfile(f"%s/models/models_nurs_oth_epoch{start_epoch}.pth" % DIR):
        print("Resume from checkpoint...")
        checkpoint = torch.load(f"%s/models/models_nurs_oth_epoch{start_epoch}.pth" % DIR)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        initepoch = checkpoint['epoch']
        print("====>loaded checkpoint (epoch{})".format(checkpoint['epoch']))
    else:
        print("====>no checkpoint found.")
        initepoch = 0

for epoch in tqdm(range(EPOCHS)):
    train(epoch)
    validation(epoch)

    if (epoch+start_epoch+1) >8:
        checkpoint = {"model_state_dict": model.state_dict(),
                      "optimizer_state_dict": optimizer.state_dict(),
                      "epoch": epoch+start_epoch+1}
        path_checkpoint = f"%s/models/models_nurs_oth_epoch{epoch+start_epoch+1}.pth" % DIR
        torch.save(checkpoint, path_checkpoint)



Resume from checkpoint...
====>loaded checkpoint (epoch5)




Epoch: 0, Training Loss:  0.37910225987434387


 20%|██        | 1/5 [02:41<10:47, 161.78s/it]

Epoch: 0, Validation Loss:  0.48
Epoch: 1, Training Loss:  0.32141098380088806


 40%|████      | 2/5 [05:23<08:05, 161.89s/it]

Epoch: 1, Validation Loss:  0.50
Epoch: 2, Training Loss:  0.3010225296020508


 60%|██████    | 3/5 [08:05<05:23, 161.93s/it]

Epoch: 2, Validation Loss:  0.52
Epoch: 3, Training Loss:  0.2707223892211914
Epoch: 3, Validation Loss:  0.53


 80%|████████  | 4/5 [10:53<02:44, 164.21s/it]

Epoch: 4, Training Loss:  0.21421736478805542
Epoch: 4, Validation Loss:  0.55


100%|██████████| 5/5 [14:02<00:00, 168.44s/it]


In [None]:

DIR = '/content/drive/MyDrive/Colab Notebooks/MSc-Individual-Project/'

checkpoint = torch.load(f"%s/models/models_nurs_oth_epoch10.pth" % DIR)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>


### Model Evaluation

In [None]:
# Evaluate the model

def evaluation():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(valid_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')
    return fin_outputs, fin_targets, losses

In [None]:
dev_out, dev_tar, losses = evaluation()



Loss:  0.06


In [None]:
# Evaluate the model
def testing():
    model.eval()

    fin_targets=[]
    fin_outputs=[]
    losses=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            fin_targets.extend(targets.cpu().detach().numpy())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy())
    print(f'Loss:  {np.mean(losses):.2f}')

    return fin_outputs, fin_targets, losses

In [None]:

test_out, targets, losses = testing()
outputs = np.array(test_out) >= 0.5
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Loss:  0.56
F1 Score (Micro) = 0.5238784370477568
F1 Score (Macro) = 0.41986309069143923


In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, outputs, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, outputs, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6776263346093735
RUC AUC Score (Macro) = 0.6277952205856064


In [None]:
print(classification_report(targets, outputs, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

      250.00     0.4231    0.1086    0.1728       304
       272.4     0.2667    0.0488    0.0825        82
       401.9     0.5814    0.3676    0.4505       544
      414.01     0.6754    0.4542    0.5432       284
      427.31     0.7582    0.6192    0.6817       562
       428.0     0.6609    0.6066    0.6326       694
      518.81     0.6561    0.5374    0.5908       575
      530.81     0.6667    0.0714    0.1290       112
       584.9     0.5369    0.4638    0.4977       470
       599.0     0.4011    0.4361    0.4179       321

   micro avg     0.6111    0.4585    0.5239      3948
   macro avg     0.5626    0.3714    0.4199      3948
weighted avg     0.6019    0.4585    0.5073      3948
 samples avg     0.5723    0.4548    0.4707      3948



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
test_df['prediction'] = test_out
test_df['tar'] = targets

In [None]:
test_df

Unnamed: 0,id,text,labels,prediction,tar
0,132213,pmicu nursing progress 7a 7p review of systems...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]","[0.021737553, 0.0034165808, 0.016645126, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
1,111602,nursing progress note neuro psych intact a o x...,"[0, 0, 1, 1, 1, 0, 0, 1, 0, 0]","[0.29699025, 0.033649553, 0.36466125, 0.151887...","[0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, ..."
2,118699,neuro pt extremely agitated t o shift has hall...,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0]","[0.06353807, 0.0144188, 0.50151366, 0.10083547...","[0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
3,165840,t sicu shift report see above admission note f...,"[1, 0, 1, 1, 0, 0, 0, 0, 0, 0]","[0.023780648, 0.0035994381, 0.26997888, 0.0086...","[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,111514,nursing progress note neuro pt awake most of t...,"[0, 0, 1, 0, 1, 0, 0, 0, 1, 0]","[0.06104754, 0.009263264, 0.33866292, 0.006309...","[0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, ..."
...,...,...,...,...,...
1506,124844,micu npn pt is now a candidate for organ donat...,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0.0052432325, 0.004724769, 0.011368385, 0.004...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, ..."
1507,196983,npn code status full all imipenum nsaids asa p...,"[1, 0, 0, 0, 1, 1, 0, 0, 0, 0]","[0.2901349, 0.019403586, 0.01800509, 0.0213513...","[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, ..."
1508,140674,nursing progress note please refer to carevue ...,"[1, 0, 1, 0, 0, 1, 0, 0, 0, 1]","[0.07580252, 0.0014430246, 0.5505351, 0.019751...","[1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ..."
1509,186995,npn micu mr known lastname has had an eventful...,"[1, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0.025808105, 0.0029470506, 0.019024633, 0.009...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ..."


In [None]:
note_count_dict = test_df.groupby('id').size().to_dict()
test_df['note_count'] = test_df['id'].map(note_count_dict)

In [None]:
test_df['out_bool'] = [(test_df['prediction'][i]>=0.5).astype(int) for i in test_df.index]

In [None]:
out_freq_dict = test_df.groupby('id').out_bool.apply(np.sum).to_dict()
test_df['num_pred'] = test_df['id'].map(out_freq_dict)
test_df['num_pred'] = [(test_df['num_pred'][i]>=0.4*test_df['note_count'][i]).astype(int) for i in test_df.index]

In [None]:
df_freq = test_df.drop_duplicates('id')

In [None]:
out_freq = np.vstack([df_freq['num_pred'][i] for i in df_freq.index])
targets = np.vstack([df_freq['tar'][i] for i in df_freq.index])

#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_freq)
f1_score_micro = metrics.f1_score(targets, out_freq, average='micro')
f1_score_macro = metrics.f1_score(targets, out_freq, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

F1 Score (Micro) = 0.5367802585193889
F1 Score (Macro) = 0.43531213169366667


In [None]:
print(classification_report(targets, out_freq, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

      250.00     0.3725    0.1080    0.1674       176
       272.4     0.3077    0.0580    0.0976        69
       401.9     0.5873    0.4217    0.4909       351
      414.01     0.6824    0.4951    0.5739       204
      427.31     0.7714    0.6708    0.7176       322
       428.0     0.6200    0.6458    0.6327       384
      518.81     0.6617    0.5914    0.6246       301
      530.81     0.5556    0.0704    0.1250        71
       584.9     0.5133    0.5019    0.5075       269
       599.0     0.3877    0.4490    0.4161       196

   micro avg     0.5973    0.4874    0.5368      2343
   macro avg     0.5460    0.4012    0.4353      2343
weighted avg     0.5853    0.4874    0.5184      2343
 samples avg     0.5660    0.4764    0.4810      2343



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_freq, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_freq, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6861302540695182
RUC AUC Score (Macro) = 0.6371996400378321


In [None]:
out_mean_dict = test_df.groupby('id').prediction.apply(np.mean).to_dict()
test_df['out_mean'] = test_df['id'].map(out_mean_dict)
test_df

Unnamed: 0,id,text,labels,prediction,tar,note_count,out_bool,num_pred,out_mean
0,132213,pmicu nursing progress 7a 7p review of systems...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]","[0.021737553, 0.0034165808, 0.016645126, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...",1,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0.021737553, 0.0034165808, 0.016645126, 0.005..."
1,111602,nursing progress note neuro psych intact a o x...,"[0, 0, 1, 1, 1, 0, 0, 1, 0, 0]","[0.29699025, 0.033649553, 0.36466125, 0.151887...","[0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...",1,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]","[0.29699025, 0.033649553, 0.36466125, 0.151887..."
2,118699,neuro pt extremely agitated t o shift has hall...,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0]","[0.06353807, 0.0144188, 0.50151366, 0.10083547...","[0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",2,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 1]","[0, 0, 1, 0, 1, 1, 0, 0, 0, 1]","[0.050417833, 0.007966412, 0.2674269, 0.061344..."
3,165840,t sicu shift report see above admission note f...,"[1, 0, 1, 1, 0, 0, 0, 0, 0, 0]","[0.023780648, 0.0035994381, 0.26997888, 0.0086...","[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",2,"[0, 0, 0, 0, 0, 1, 0, 0, 1, 0]","[0, 0, 1, 0, 0, 1, 0, 0, 1, 1]","[0.097935475, 0.0045407326, 0.40836653, 0.0060..."
4,111514,nursing progress note neuro pt awake most of t...,"[0, 0, 1, 0, 1, 0, 0, 0, 1, 0]","[0.06104754, 0.009263264, 0.33866292, 0.006309...","[0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, ...",3,"[0, 0, 0, 0, 1, 0, 1, 0, 0, 0]","[0, 0, 0, 0, 1, 0, 1, 0, 0, 0]","[0.12190374, 0.011870359, 0.40131068, 0.063366..."
...,...,...,...,...,...,...,...,...,...
1506,124844,micu npn pt is now a candidate for organ donat...,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0.0052432325, 0.004724769, 0.011368385, 0.004...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, ...",1,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[0.0052432325, 0.004724769, 0.011368385, 0.004..."
1507,196983,npn code status full all imipenum nsaids asa p...,"[1, 0, 0, 0, 1, 1, 0, 0, 0, 0]","[0.2901349, 0.019403586, 0.01800509, 0.0213513...","[1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, ...",12,"[0, 0, 0, 0, 1, 1, 0, 0, 0, 0]","[1, 0, 0, 0, 1, 1, 0, 0, 0, 0]","[0.40834463, 0.032400593, 0.04112446, 0.035844..."
1508,140674,nursing progress note please refer to carevue ...,"[1, 0, 1, 0, 0, 1, 0, 0, 0, 1]","[0.07580252, 0.0014430246, 0.5505351, 0.019751...","[1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",2,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 1, 0, 0, 1, 0, 0, 0, 0]","[0.08018428, 0.0015512323, 0.5158353, 0.025774..."
1509,186995,npn micu mr known lastname has had an eventful...,"[1, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[0.025808105, 0.0029470506, 0.019024633, 0.009...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",4,"[0, 0, 0, 0, 0, 1, 1, 0, 1, 0]","[0, 0, 0, 0, 0, 1, 1, 0, 0, 0]","[0.12644278, 0.005286597, 0.15537414, 0.094197..."


In [None]:
df_mean = test_df.drop_duplicates('id')

In [None]:
out_mean = np.vstack([df_mean['out_mean'][i]>=0.5 for i in df_mean.index])
targets = np.vstack([df_mean['tar'][i] for i in df_mean.index])
#targets = dev_tar
accuracy = metrics.accuracy_score(targets, out_mean)
f1_score_micro = metrics.f1_score(targets, out_mean, average='micro')
f1_score_macro = metrics.f1_score(targets, out_mean, average='macro')

print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

F1 Score (Micro) = 0.5185554171855542
F1 Score (Macro) = 0.41546168462034927


In [None]:
print(classification_report(targets, out_mean, target_names=icd_classes_50, digits=4))

              precision    recall  f1-score   support

      250.00     0.3429    0.0682    0.1137       176
       272.4     0.4444    0.0580    0.1026        69
       401.9     0.6157    0.3789    0.4691       351
      414.01     0.6815    0.4510    0.5428       204
      427.31     0.7829    0.6273    0.6966       322
       428.0     0.6497    0.5990    0.6233       384
      518.81     0.6777    0.5449    0.6041       301
      530.81     0.6667    0.0563    0.1039        71
       584.9     0.5240    0.4461    0.4819       269
       599.0     0.4255    0.4082    0.4167       196

   micro avg     0.6226    0.4443    0.5186      2343
   macro avg     0.5811    0.3638    0.4155      2343
weighted avg     0.6075    0.4443    0.4979      2343
 samples avg     0.5726    0.4412    0.4647      2343



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
ruc_auc_score_micro = metrics.roc_auc_score(targets, out_mean, average='micro')
ruc_auc_score_macro = metrics.roc_auc_score(targets, out_mean, average='macro')

print(f"RUC AUC Score (Micro) = {ruc_auc_score_micro}")
print(f"RUC AUC Score (Macro) = {ruc_auc_score_macro}")

RUC AUC Score (Micro) = 0.6749699906961105
RUC AUC Score (Macro) = 0.6298157408701714
