## Transformers for ICD Prediction from MIMIC III

Using Transformers pre-trained model for medical code predictions using MIMIC III Clinical notes data

- Data preprocessing based on CAML: https://github.com/jamesmullenbach/caml-mimic
- Pytorch training code based on : https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_multi_label_classification.ipynb
- Pytorch early stopping from : https://github.com/Bjarten/early-stopping-pytorch

In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from transformers import AutoTokenizer, AutoModel

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelBinarizer


In [2]:
# Setting up the device for GPU usage

!nvidia-smi

from torch import cuda
print(cuda.is_available())
device = 'cuda' if cuda.is_available() else 'cpu'

Mon Apr 26 22:52:09 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 466.11       Driver Version: 466.11       CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   51C    P0    18W /  N/A |    121MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Load data

In [3]:
#change to where you store mimic3 data
MIMIC_3_DIR = '~/OneDrive/Academic/CS598-DLH/caml-mimic/mimicdata/mimic3'

train_df = pd.read_csv('%s/train_50.csv' % MIMIC_3_DIR)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,7908,182396,admission date discharge date date of birth se...,287.5;584.9;45.13,105
1,11231,183363,admission date discharge date date of birth se...,96.71;401.9;272.4,106
2,3184,144347,admission date discharge date date of birth se...,530.81,117
3,24427,177066,admission date discharge date date of birth se...,96.71;V58.61;276.2;96.04,148
4,1262,183373,admission date discharge date service neurolog...,V58.61;244.9;414.01;401.9;96.71;427.31,156


 ## Preprocess Data

In [4]:
# split labels by ";", then convert to list
def split_lab (x):
    #print(x)
    return x.split(";")

train_df['LABELS'] = train_df['LABELS'].apply(split_lab)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,7908,182396,admission date discharge date date of birth se...,"[287.5, 584.9, 45.13]",105
1,11231,183363,admission date discharge date date of birth se...,"[96.71, 401.9, 272.4]",106
2,3184,144347,admission date discharge date date of birth se...,[530.81],117
3,24427,177066,admission date discharge date date of birth se...,"[96.71, V58.61, 276.2, 96.04]",148
4,1262,183373,admission date discharge date service neurolog...,"[V58.61, 244.9, 414.01, 401.9, 96.71, 427.31]",156


In [5]:
#check top 50 code
top_50 = pd.read_csv('%s/TOP_50_CODES.csv' % MIMIC_3_DIR)

top_50.head().values

array([['38.93'],
       ['428.0'],
       ['427.31'],
       ['414.01'],
       ['96.04']], dtype=object)

In [6]:
#load multi label binarizer for one-hot encoding
mlb = MultiLabelBinarizer(sparse_output=True)

#change label to one-hot encoding per code
train_df = train_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(train_df.pop('LABELS')),
                index=train_df.index,
                columns=mlb.classes_))

# Convert columns to list of one hot encoding
icd_classes_50 = mlb.classes_

train_df['labels'] = train_df[icd_classes_50].values.tolist()

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,length,038.9,244.9,250.00,272.0,272.4,276.1,...,96.6,96.71,96.72,99.04,99.15,995.92,V15.82,V45.81,V58.61,labels
0,7908,182396,admission date discharge date date of birth se...,105,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,11231,183363,admission date discharge date date of birth se...,106,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,3184,144347,admission date discharge date date of birth se...,117,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,24427,177066,admission date discharge date date of birth se...,148,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1262,183373,admission date discharge date service neurolog...,156,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,1,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [7]:
#check if one-hot encoding is correct
len(train_df.labels[0])

50

In [8]:
#convert into 2 columns dataframe
train_df = pd.DataFrame(train_df, columns=['TEXT', 'labels'])
train_df.columns=['text', 'labels']
train_df.head()

Unnamed: 0,text,labels
0,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,admission date discharge date date of birth se...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,admission date discharge date service neurolog...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


### Prepare Eval data

In [9]:
#same as train data preparation, but for evaluation
eval_df = pd.read_csv('%s/dev_50.csv' % MIMIC_3_DIR)

eval_df['LABELS'] = eval_df['LABELS'].apply(split_lab)

eval_df = eval_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(eval_df.pop('LABELS')),
                index=eval_df.index,
                columns=icd_classes_50))

eval_df['labels'] = eval_df[icd_classes_50].values.tolist()
eval_df = pd.DataFrame(eval_df, columns=['TEXT', 'labels'])
eval_df.columns=['text', 'labels']

print(len(eval_df.labels[0]))
eval_df.describe


50


<bound method NDFrame.describe of                                                    text  \
0     admission date discharge date date of birth se...   
1     admission date discharge date service neurosur...   
2     admission date discharge date date of birth se...   
3     admission date discharge date date of birth se...   
4     admission date discharge date date of birth se...   
...                                                 ...   
1568  admission date discharge date date of birth se...   
1569  admission date discharge date date of birth se...   
1570  admission date discharge date date of birth se...   
1571  admission date discharge date date of birth se...   
1572  admission date discharge date date of birth se...   

                                                 labels  
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
1     [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
2     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
3     [0, 0, 0, 0, 1, 0, 

In [10]:
#same as train data preparation, but for evaluation
test_df = pd.read_csv('%s/test_50.csv' % MIMIC_3_DIR)

test_df['LABELS'] = test_df['LABELS'].apply(split_lab)

test_df = test_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(test_df.pop('LABELS')),
                index=test_df.index,
                columns=icd_classes_50))

test_df['labels'] = test_df[icd_classes_50].values.tolist()
test_df = pd.DataFrame(test_df, columns=['TEXT', 'labels'])
test_df.columns=['text', 'labels']

print(len(test_df.labels[0]))
test_df.describe

50


<bound method NDFrame.describe of                                                    text  \
0     admission date discharge date date of birth se...   
1     admission date discharge date date of birth se...   
2     admission date discharge date date of birth se...   
3     admission date discharge date date of birth se...   
4     admission date discharge date date of birth se...   
...                                                 ...   
1724  admission date discharge date date of birth se...   
1725  admission date discharge date date of birth se...   
1726  admission date discharge date date of birth se...   
1727  admission date discharge date date of birth se...   
1728  admission date discharge date date of birth se...   

                                                 labels  
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
1     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
2     [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
3     [0, 0, 0, 0, 0, 0, 

### Set Model Parameters

In [21]:
# Defining some key variables to configure model training

#Change max length to 512 for bert-base
MAX_LEN = 300
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 10
LEARNING_RATE = 1e-05

In [12]:
#set tokenizer
#tokenizer = AutoTokenizer.from_pretrained("bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

### Preparing Dataloader

In [13]:
#custom dataset for BERT class
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        
        '''
            set text as training data
            set labels as targets
        '''
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [14]:
#load df to dataset

print("TRAIN Dataset: {}".format(train_df.shape))
print("EVAL Dataset: {}".format(eval_df.shape))
print("TEST Dataset: {}".format(test_df.shape))

training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
evaluation_set = CustomDataset(eval_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

TRAIN Dataset: (8066, 2)
EVAL Dataset: (1573, 2)
TEST Dataset: (1729, 2)


In [15]:
#data loader
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

eval_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
validation_loader = DataLoader(evaluation_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [26]:
#create dataset

def create_datasets(batch_size):
    
    training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
    evaluation_set = CustomDataset(eval_df, tokenizer, MAX_LEN)
    testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)
    
    #data loader
    train_params = {'batch_size': batch_size,
                    'shuffle': True,
                    'num_workers': 0
                    }

    eval_params = {'batch_size': batch_size,
                    'shuffle': True,
                    'num_workers': 0
                    }

    test_params = {'batch_size': batch_size,
                    'shuffle': True,
                    'num_workers': 0
                    }

    training_loader = DataLoader(training_set, **train_params)
    validation_loader = DataLoader(evaluation_set, **train_params)
    testing_loader = DataLoader(testing_set, **test_params)

    return training_loader, testing_loader, validation_loader

### Create model class from pretrained model

In [16]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        '''
            Load Pretrained model here
            Use return_dict=False for compatibility for 4.x
        
        '''
        #self.l1 = transformers.AutoModel.from_pretrained("bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12", return_dict=False)
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)
        
        self.l2 = torch.nn.Dropout(0.3)
        
        '''
            Changed Linear Output layer to 50 based on the class
        '''
        self.l3 = torch.nn.Linear(768, 50)
    
    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

model = BERTClass()
model.to(device)

BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [17]:
#loss function
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [18]:
#optimizer
optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)

### Implement Early Stopping

In [32]:
'''
    from https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
'''

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

### Train fine-tuning model

In [19]:
'''
def train(epoch):
    model.train()
    for _,data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        if _%5000==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
def validation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(validation_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets
'''

In [25]:
#train model with early stopping
def train_model(model, batch_size, patience, n_epochs):
    
    # to track the training loss as the model trains
    train_losses = []
    # to track the validation loss as the model trains
    valid_losses = []
    # to track the average training loss per epoch as the model trains
    avg_train_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = [] 
    
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    
    for epoch in range(1, n_epochs + 1):

        ###################
        # train the model #
        ###################
        model.train() # prep model for training
        
        for _,data in enumerate(training_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)

            #outputs = model(ids, mask, token_type_ids)
        
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            outputs = model(ids, mask, token_type_ids)
            
            # calculate the loss
            loss = loss_fn(outputs, targets)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # record training loss
            train_losses.append(loss.item())
            
        ######################    
        # validate the model #
        ######################
        model.eval() # prep model for evaluation
        for _, data in enumerate(validation_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            
            outputs = model(ids, mask, token_type_ids)
            # calculate the loss
            loss = loss_fn(outputs, targets)
            # record validation loss
            valid_losses.append(loss.item())

        # print training/validation statistics 
        # calculate average loss over an epoch
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)
        
        epoch_len = len(str(n_epochs))
        
        print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} ' +
                     f'valid_loss: {valid_loss:.5f}')
        
        print(print_msg)
        
        # clear lists to track next epoch
        train_losses = []
        valid_losses = []
        
        # early_stopping needs the validation loss to check if it has decresed, 
        # and if it has, it will make a checkpoint of the current model
        early_stopping(valid_loss, model)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break
        
    # load the last checkpoint with the best model
    model.load_state_dict(torch.load('checkpoint.pt'))

    return  model, avg_train_losses, avg_valid_losses
        

In [22]:
'''
for epoch in tqdm(range(EPOCHS)):
    train(epoch)
    
    outputs, targets = validation(epoch)
    outputs = np.array(outputs) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    precision_score_micro = metrics.precision_score(targets, outputs, average='micro')
    precision_score_macro = metrics.precision_score(targets, outputs, average='macro')
    recall_score_micro = metrics.recall_score(targets, outputs, average='micro')
    recall_score_macro = metrics.recall_score(targets, outputs, average='macro')
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
    auc_score_micro = metrics.roc_auc_score(targets,outputs, average='micro')
    auc_score_macro = metrics.roc_auc_score(targets,outputs, average='macro')
    print(f"Accuracy Score = {accuracy}")
    print(f"Precision Score (Micro) = {precision_score_micro}")
    print(f"Precision Score (Macro) = {precision_score_macro}")
    print(f"Recall Score (Micro) = {recall_score_micro}")
    print(f"Recall Score (Macro) = {recall_score_macro}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")
    print(f"AUC Score (Micro) = {auc_score_micro}")
    print(f"AUC Score (Macro) = {auc_score_macro}")
'''

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Epoch: 0, Loss:  0.5553519129753113


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.0
Precision Score (Micro) = 0.6559877955758963
Precision Score (Macro) = 0.12816516840092876
Recall Score (Micro) = 0.09264246472045674
Recall Score (Macro) = 0.05602258187586171
F1 Score (Micro) = 0.1623560505946762
F1 Score (Macro) = 0.06645325825380709
AUC Score (Micro) = 0.543070407039831
AUC Score (Macro) = 0.5240776910523874
Epoch: 1, Loss:  0.31213676929473877


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.0012714558169103624
Precision Score (Micro) = 0.7317961165048543
Precision Score (Macro) = 0.18726580243850527
Recall Score (Micro) = 0.1299148982010126
Recall Score (Macro) = 0.08513329766519558
F1 Score (Micro) = 0.22065684749794165
F1 Score (Macro) = 0.10272434164248265
AUC Score (Micro) = 0.5617714961257488
AUC Score (Macro) = 0.5385420391639579
Epoch: 2, Loss:  0.33922165632247925


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.007628734901462174
Precision Score (Micro) = 0.7372262773722628
Precision Score (Macro) = 0.23529531441459667
Recall Score (Micro) = 0.17408165463750944
Recall Score (Macro) = 0.11800058319931442
F1 Score (Micro) = 0.2816557734204793
F1 Score (Macro) = 0.1420754509045563
AUC Score (Micro) = 0.5828889971978038
AUC Score (Macro) = 0.5538938070518263
Epoch: 3, Loss:  0.2490013688802719


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.009535918626827717
Precision Score (Micro) = 0.710948905109489
Precision Score (Macro) = 0.2983627808639793
Recall Score (Micro) = 0.2098459549714532
Recall Score (Macro) = 0.1463363982225209
F1 Score (Micro) = 0.32404557930632955
F1 Score (Macro) = 0.1704543683767146
AUC Score (Micro) = 0.5992142110694191
AUC Score (Macro) = 0.5661764899334187
Epoch: 4, Loss:  0.24243684113025665


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.014621741894469168
Precision Score (Micro) = 0.7233891064435742
Precision Score (Macro) = 0.35258934850021484
Recall Score (Micro) = 0.21889475385112572
Recall Score (Macro) = 0.15903495326952144
F1 Score (Micro) = 0.33608997684419456
F1 Score (Macro) = 0.19232314066525838
AUC Score (Micro) = 0.6038467310853217
AUC Score (Macro) = 0.5727496822401283
Epoch: 5, Loss:  0.16622722148895264


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.017800381436745075
Precision Score (Micro) = 0.6894446059901134
Precision Score (Macro) = 0.3826217971901922
Recall Score (Micro) = 0.2554131207583755
Recall Score (Macro) = 0.18943927808314073
F1 Score (Micro) = 0.3727401351988681
F1 Score (Macro) = 0.2175784541156585
AUC Score (Micro) = 0.620008375363258
AUC Score (Macro) = 0.5855942689475472
Epoch: 6, Loss:  0.27711036801338196


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.015257469802924348
Precision Score (Micro) = 0.683191551774714
Precision Score (Macro) = 0.3691223541158294
Recall Score (Micro) = 0.2508887213185393
Recall Score (Macro) = 0.18730802716092987
F1 Score (Micro) = 0.36700283643239834
F1 Score (Macro) = 0.2221546470084548
AUC Score (Micro) = 0.6176596791824868
AUC Score (Macro) = 0.5843980147051038
Epoch: 7, Loss:  0.23411443829536438


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.014621741894469168
Precision Score (Micro) = 0.6730716442492451
Precision Score (Macro) = 0.4242900569083946
Recall Score (Micro) = 0.2641387482494883
Recall Score (Macro) = 0.20609938937413216
F1 Score (Micro) = 0.37939037598638403
F1 Score (Macro) = 0.25080166407574966
AUC Score (Micro) = 0.6234846003850697
AUC Score (Macro) = 0.5932336998370132
Epoch: 8, Loss:  0.1802912801504135


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.02097902097902098
Precision Score (Micro) = 0.6672535211267606
Precision Score (Macro) = 0.40293345145878795
Recall Score (Micro) = 0.2857912312829904
Recall Score (Macro) = 0.2045434133607786
F1 Score (Micro) = 0.4001810091258768
F1 Score (Macro) = 0.2404649916874617
AUC Score (Micro) = 0.6333593808324361
AUC Score (Macro) = 0.5910069687330667
Epoch: 9, Loss:  0.2240721881389618


  _warn_prf(average, modifier, msg_start, len(result))
100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [43:56<00:00, 263.68s/it]

Accuracy Score = 0.020343293070565798
Precision Score (Micro) = 0.6800197823936697
Precision Score (Macro) = 0.4943858012238653
Recall Score (Micro) = 0.2962404395130884
Recall Score (Macro) = 0.2302746571942668
F1 Score (Micro) = 0.4126960306145419
F1 Score (Macro) = 0.2697759320276721
AUC Score (Micro) = 0.6387930180612136
AUC Score (Macro) = 0.6042057714265181





In [None]:
batch_size = 100
n_epochs = 50

train_loader, test_loader, valid_loader = create_datasets(batch_size)

# early stopping patience; how long to wait after last time validation loss improved.
patience = 3

model, train_loss, valid_loss = train_model(model, batch_size, patience, n_epochs)


### Examine the loss and Early Stopping

In [None]:

# visualize the loss as the network trained
fig = plt.figure(figsize=(10,8))
plt.plot(range(1,len(train_loss)+1),train_loss, label='Training Loss')
plt.plot(range(1,len(valid_loss)+1),valid_loss,label='Validation Loss')

# find position of lowest validation loss
minposs = valid_loss.index(min(valid_loss))+1 
plt.axvline(minposs, linestyle='--', color='r',label='Early Stopping Checkpoint')

plt.xlabel('epochs')
plt.ylabel('loss')
plt.ylim(0, 0.5) # consistent scale
plt.xlim(0, len(train_loss)+1) # consistent scale
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
fig.savefig('loss_plot.png', bbox_inches='tight')

### Model Evaluation

In [23]:
# Evaluate the model

def evaluation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [24]:
outputs, targets = validation(epoch)
outputs = np.array(outputs) >= 0.5
accuracy = metrics.accuracy_score(targets, outputs)
precision_score_micro = metrics.precision_score(targets, outputs, average='micro')
precision_score_macro = metrics.precision_score(targets, outputs, average='macro')
recall_score_micro = metrics.recall_score(targets, outputs, average='micro')
recall_score_macro = metrics.recall_score(targets, outputs, average='macro')
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
auc_score_micro = metrics.roc_auc_score(targets,outputs, average='micro')
auc_score_macro = metrics.roc_auc_score(targets,outputs, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"Precision Score (Micro) = {precision_score_micro}")
print(f"Precision Score (Macro) = {precision_score_macro}")
print(f"Recall Score (Micro) = {recall_score_micro}")
print(f"Recall Score (Macro) = {recall_score_macro}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")
print(f"AUC Score (Micro) = {auc_score_micro}")
print(f"AUC Score (Macro) = {auc_score_macro}")



Accuracy Score = 0.020242914979757085
Precision Score (Micro) = 0.6851524524966858
Precision Score (Macro) = 0.48969806542249095
Recall Score (Micro) = 0.29598167414336163
Recall Score (Macro) = 0.238942451097267
F1 Score (Micro) = 0.4133839898686929
F1 Score (Macro) = 0.2835162247637371
AUC Score (Micro) = 0.6386125052959184
AUC Score (Macro) = 0.6084614744581104


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
torch.save(model.state_dict(), "bert_state_dict_model_v2.pt")

In [None]:
torch.save(model, "bert_model_v2.pt")