## Transformers for ICD Prediction from MIMIC III

Using Transformers pre-trained model for medical code predictions using MIMIC III Clinical notes data

- Data preprocessing based on CAML: https://github.com/jamesmullenbach/caml-mimic
- Pytorch training code based on : https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_multi_label_classification.ipynb

In [1]:
!pip install -q transformers
!nvidia-smi

Tue Apr 20 08:31:19 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.14       Driver Version: 470.14       CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0  On |                  N/A |
| 50%   54C    P0    35W / 200W |    566MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from transformers import AutoTokenizer, AutoModel

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelBinarizer


In [3]:
# Setting up the device for GPU usage

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

## Load data

In [4]:
#change to where you store mimic3 data
MIMIC_3_DIR = 'D:/OneDrive/Academic/CS598-DLH/caml-mimic/mimicdata/mimic3'

train_df = pd.read_csv('%s/train_50.csv' % MIMIC_3_DIR)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,7908,182396,admission date discharge date date of birth se...,287.5;584.9;45.13,105
1,11231,183363,admission date discharge date date of birth se...,96.71;401.9;272.4,106
2,3184,144347,admission date discharge date date of birth se...,530.81,117
3,24427,177066,admission date discharge date date of birth se...,96.71;V58.61;276.2;96.04,148
4,1262,183373,admission date discharge date service neurolog...,V58.61;244.9;414.01;401.9;96.71;427.31,156


 ## Preprocess Data

In [5]:
# split labels by ";", then convert to list
def split_lab (x):
    #print(x)
    return x.split(";")

train_df['LABELS'] = train_df['LABELS'].apply(split_lab)

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,LABELS,length
0,7908,182396,admission date discharge date date of birth se...,"[287.5, 584.9, 45.13]",105
1,11231,183363,admission date discharge date date of birth se...,"[96.71, 401.9, 272.4]",106
2,3184,144347,admission date discharge date date of birth se...,[530.81],117
3,24427,177066,admission date discharge date date of birth se...,"[96.71, V58.61, 276.2, 96.04]",148
4,1262,183373,admission date discharge date service neurolog...,"[V58.61, 244.9, 414.01, 401.9, 96.71, 427.31]",156


In [6]:
#check top 50 code
top_50 = pd.read_csv('%s/TOP_50_CODES.csv' % MIMIC_3_DIR)

top_50.head().values

array([['38.93'],
       ['428.0'],
       ['427.31'],
       ['414.01'],
       ['96.04']], dtype=object)

In [7]:
#load multi label binarizer for one-hot encoding
mlb = MultiLabelBinarizer(sparse_output=True)

#labels_onehot = mlb.fit_transform(train_df.pop('LABELS'))
#labels_onehot[0][1]

In [8]:
#change label to one-hot encoding per code
train_df = train_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(train_df.pop('LABELS')),
                index=train_df.index,
                columns=mlb.classes_))

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,length,038.9,244.9,250.00,272.0,272.4,276.1,...,96.04,96.6,96.71,96.72,99.04,99.15,995.92,V15.82,V45.81,V58.61
0,7908,182396,admission date discharge date date of birth se...,105,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,11231,183363,admission date discharge date date of birth se...,106,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
2,3184,144347,admission date discharge date date of birth se...,117,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,24427,177066,admission date discharge date date of birth se...,148,0,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,1
4,1262,183373,admission date discharge date service neurolog...,156,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1


In [9]:
# Convert columns to list of one hot encoding
icd_classes_50 = mlb.classes_

train_df['labels'] = train_df[icd_classes_50].values.tolist()

train_df.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,TEXT,length,038.9,244.9,250.00,272.0,272.4,276.1,...,96.6,96.71,96.72,99.04,99.15,995.92,V15.82,V45.81,V58.61,labels
0,7908,182396,admission date discharge date date of birth se...,105,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,11231,183363,admission date discharge date date of birth se...,106,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,3184,144347,admission date discharge date date of birth se...,117,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,24427,177066,admission date discharge date date of birth se...,148,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1262,183373,admission date discharge date service neurolog...,156,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,1,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [10]:
#check if one-hot encoding is correct
len(train_df.labels[0])

50

In [11]:
#convert into 2 columns dataframe
train_df = pd.DataFrame(train_df, columns=['TEXT', 'labels'])
train_df.columns=['text', 'labels']
train_df.head()

Unnamed: 0,text,labels
0,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,admission date discharge date date of birth se...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,admission date discharge date date of birth se...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,admission date discharge date service neurolog...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


### Prepare Eval data

In [12]:
#same as train data preparation, but for evaluation
eval_df = pd.read_csv('%s/dev_50.csv' % MIMIC_3_DIR)

eval_df['LABELS'] = eval_df['LABELS'].apply(split_lab)

eval_df = eval_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(eval_df.pop('LABELS')),
                index=eval_df.index,
                columns=icd_classes_50))

eval_df['labels'] = eval_df[icd_classes_50].values.tolist()
eval_df = pd.DataFrame(eval_df, columns=['TEXT', 'labels'])
eval_df.columns=['text', 'labels']

print(len(eval_df.labels[0]))
eval_df.describe


50


<bound method NDFrame.describe of                                                    text  \
0     admission date discharge date date of birth se...   
1     admission date discharge date service neurosur...   
2     admission date discharge date date of birth se...   
3     admission date discharge date date of birth se...   
4     admission date discharge date date of birth se...   
...                                                 ...   
1568  admission date discharge date date of birth se...   
1569  admission date discharge date date of birth se...   
1570  admission date discharge date date of birth se...   
1571  admission date discharge date date of birth se...   
1572  admission date discharge date date of birth se...   

                                                 labels  
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
1     [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
2     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
3     [0, 0, 0, 0, 1, 0, 

In [13]:
#same as train data preparation, but for evaluation
test_df = pd.read_csv('%s/test_50.csv' % MIMIC_3_DIR)

test_df['LABELS'] = test_df['LABELS'].apply(split_lab)

test_df = test_df.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(test_df.pop('LABELS')),
                index=test_df.index,
                columns=icd_classes_50))

test_df['labels'] = test_df[icd_classes_50].values.tolist()
test_df = pd.DataFrame(test_df, columns=['TEXT', 'labels'])
test_df.columns=['text', 'labels']

print(len(test_df.labels[0]))
test_df.describe

50


<bound method NDFrame.describe of                                                    text  \
0     admission date discharge date date of birth se...   
1     admission date discharge date date of birth se...   
2     admission date discharge date date of birth se...   
3     admission date discharge date date of birth se...   
4     admission date discharge date date of birth se...   
...                                                 ...   
1724  admission date discharge date date of birth se...   
1725  admission date discharge date date of birth se...   
1726  admission date discharge date date of birth se...   
1727  admission date discharge date date of birth se...   
1728  admission date discharge date date of birth se...   

                                                 labels  
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
1     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
2     [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
3     [0, 0, 0, 0, 0, 0, 

### Set Model Parameters

In [18]:
# Defining some key variables to configure model training
MAX_LEN = 200
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 200
LEARNING_RATE = 1e-05

#set tokenizer
#tokenizer = AutoTokenizer.from_pretrained("bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

### Preparing Dataloader

In [19]:
#custom dataset for BERT class
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        
        '''
            set text as training data
            set labels as targets
        '''
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [20]:
#load df to dataset

print("TRAIN Dataset: {}".format(train_df.shape))
print("EVAL Dataset: {}".format(eval_df.shape))
print("TEST Dataset: {}".format(test_df.shape))

training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
evaluation_set = CustomDataset(eval_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

TRAIN Dataset: (8066, 2)
EVAL Dataset: (1573, 2)
TEST Dataset: (1729, 2)


In [21]:
#data loader
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

eval_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
evaluation_loader = DataLoader(evaluation_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

### Create model class from pretrained model

In [22]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        '''
            Load Pretrained model here
            Use return_dict=False for compatibility for 4.x
        
        '''
        #self.l1 = transformers.AutoModel.from_pretrained("bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12", return_dict=False)
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)
        
        
        self.l2 = torch.nn.Dropout(0.3)
        
        '''
            Changed Linear Output layer to 50 based on the class
        '''
        self.l3 = torch.nn.Linear(768, 50)
    
    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

model = BERTClass()
model.to(device)

BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [23]:
#loss function
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [24]:
#optimizer
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

### Train fine-tuning model

In [25]:
def train(epoch):
    model.train()
    for _,data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        if _%5000==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def validation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(evaluation_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets


In [None]:
for epoch in tqdm(range(EPOCHS)):
    train(epoch)
    
    outputs, targets = validation(epoch)
    outputs = np.array(outputs) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    precision_score_micro = metrics.precision_score(targets, outputs, average='micro')
    precision_score_macro = metrics.precision_score(targets, outputs, average='macro')
    recall_score_micro = metrics.recall_score(targets, outputs, average='micro')
    recall_score_macro = metrics.recall_score(targets, outputs, average='macro')
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
    auc_score_micro = metrics.roc_auc_score(targets,outputs, average='micro')
    auc_score_macro = metrics.roc_auc_score(targets,outputs, average='macro')
    print(f"Accuracy Score = {accuracy}")
    print(f"Precision Score (Micro) = {precision_score_micro}")
    print(f"Precision Score (Macro) = {precision_score_macro}")
    print(f"Recall Score (Micro) = {recall_score_micro}")
    print(f"Recall Score (Macro) = {recall_score_macro}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")
    print(f"AUC Score (Micro) = {auc_score_micro}")
    print(f"AUC Score (Macro) = {auc_score_macro}")

  0%|          | 0/200 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Loss:  0.698462724685669


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.0012714558169103624
Precision Score (Micro) = 0.7552334943639292
Precision Score (Macro) = 0.04743456704912767
Recall Score (Micro) = 0.050522460411504905
Recall Score (Macro) = 0.02500154571970054
F1 Score (Micro) = 0.09470920840064621
F1 Score (Macro) = 0.032313116059400605
AUC Score (Micro) = 0.5241656083682793
AUC Score (Macro) = 0.5109281711731051
Epoch: 1, Loss:  0.32429251074790955


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.0012714558169103624
Precision Score (Micro) = 0.6692708333333334
Precision Score (Macro) = 0.1918537973305856
Recall Score (Micro) = 0.16611009371970267
Recall Score (Macro) = 0.11378439168680664
F1 Score (Micro) = 0.2661603521187538
F1 Score (Macro) = 0.13056247278487038
AUC Score (Micro) = 0.5775625215956768
AUC Score (Macro) = 0.5504422891208182
Epoch: 2, Loss:  0.32411664724349976


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.01144310235219326
Precision Score (Micro) = 0.6750298685782556
Precision Score (Macro) = 0.18547533165454694
Recall Score (Micro) = 0.18259183453624905
Recall Score (Macro) = 0.12065325380759308
F1 Score (Micro) = 0.2874342886213329
F1 Score (Macro) = 0.13518727826346558
AUC Score (Micro) = 0.5854141579301108
AUC Score (Macro) = 0.5531321950120339
Epoch: 3, Loss:  0.27115920186042786


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.013350286077558804
Precision Score (Micro) = 0.6735578526175392
Precision Score (Macro) = 0.2739751791767203
Recall Score (Micro) = 0.21760206829688677
Recall Score (Macro) = 0.14919513283468563
F1 Score (Micro) = 0.3289366552678717
F1 Score (Macro) = 0.1697066429798739
AUC Score (Micro) = 0.6017443645505077
AUC Score (Macro) = 0.565725876543854
Epoch: 4, Loss:  0.26220160722732544


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.01080737444373808
Precision Score (Micro) = 0.7127152803224625
Precision Score (Macro) = 0.2851671718876496
Recall Score (Micro) = 0.20952278358289347
Recall Score (Macro) = 0.1444475415709677
F1 Score (Micro) = 0.32384282384282387
F1 Score (Macro) = 0.16964943099906865
AUC Score (Micro) = 0.5991102896823747
AUC Score (Macro) = 0.56538937388548
Epoch: 5, Loss:  0.355991929769516


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.015257469802924348
Precision Score (Micro) = 0.6754494730316181
Precision Score (Macro) = 0.3444216358999928
Recall Score (Micro) = 0.23473015189055263
Recall Score (Macro) = 0.1721292225010971
F1 Score (Micro) = 0.34838915980494045
F1 Score (Macro) = 0.19962701867418894
AUC Score (Micro) = 0.6098182597358396
AUC Score (Macro) = 0.5768905570792079
Epoch: 6, Loss:  0.16813421249389648


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.013350286077558804
Precision Score (Micro) = 0.7039052496798975
Precision Score (Macro) = 0.4461319566884083
Recall Score (Micro) = 0.23688462781428418
Recall Score (Macro) = 0.1766217453673615
F1 Score (Micro) = 0.35447731119529297
F1 Score (Macro) = 0.21435188791831347
AUC Score (Micro) = 0.6117748783830457
AUC Score (Macro) = 0.5804936016327497
Epoch: 7, Loss:  0.1790991872549057


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.01652892561983471
Precision Score (Micro) = 0.6843298384811561
Precision Score (Macro) = 0.41122293308395863
Recall Score (Micro) = 0.26015296779058494
Recall Score (Macro) = 0.18881657197884652
F1 Score (Micro) = 0.37699032157352486
F1 Score (Macro) = 0.22786918449030671
AUC Score (Micro) = 0.622046729112759
AUC Score (Macro) = 0.5848241566580121
Epoch: 8, Loss:  0.3026924729347229


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.01652892561983471
Precision Score (Micro) = 0.6597554492291334
Precision Score (Macro) = 0.41962500321993157
Recall Score (Micro) = 0.26737046213508564
Recall Score (Macro) = 0.21257044897460958
F1 Score (Micro) = 0.38052893829053275
F1 Score (Macro) = 0.2553172636182628
AUC Score (Micro) = 0.6244589419098742
AUC Score (Macro) = 0.5956453784625352
Epoch: 9, Loss:  0.26199695467948914


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.01652892561983471
Precision Score (Micro) = 0.6824175824175824
Precision Score (Macro) = 0.4208316927813884
Recall Score (Micro) = 0.26758590972745877
Recall Score (Macro) = 0.2092608243792886
F1 Score (Micro) = 0.3844308597074982
F1 Score (Macro) = 0.2466608580155789
AUC Score (Micro) = 0.6254604624682099
AUC Score (Macro) = 0.594980900551743
Epoch: 10, Loss:  0.14187109470367432


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.017800381436745075
Precision Score (Micro) = 0.6848716548334244
Precision Score (Macro) = 0.4432141637649067
Recall Score (Micro) = 0.27017128083593667
Recall Score (Macro) = 0.20881441913219287
F1 Score (Micro) = 0.38748551564310546
F1 Score (Macro) = 0.25456263382787997
AUC Score (Micro) = 0.6267675640992577
AUC Score (Macro) = 0.5945992418151198
Epoch: 11, Loss:  0.2474285066127777


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.017800381436745075
Precision Score (Micro) = 0.6860876667574191
Precision Score (Macro) = 0.45155252205505275
Recall Score (Micro) = 0.2714639663901756
Recall Score (Macro) = 0.20010108595785056
F1 Score (Micro) = 0.3890089533806731
F1 Score (Macro) = 0.24224535309071102
AUC Score (Micro) = 0.6274211149147816
AUC Score (Macro) = 0.5901022750834977
Epoch: 12, Loss:  0.26166632771492004


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.019071837253655435
Precision Score (Micro) = 0.6549329501915708
Precision Score (Macro) = 0.44390278017766066
Recall Score (Micro) = 0.2946245825702898
Recall Score (Macro) = 0.22503368256472053
F1 Score (Micro) = 0.4064194962478639
F1 Score (Macro) = 0.27065170842605846
AUC Score (Micro) = 0.6369255079443633
AUC Score (Macro) = 0.6003575126410182
Epoch: 13, Loss:  0.17901968955993652


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.02161474888747616
Precision Score (Micro) = 0.6384315424610052
Precision Score (Macro) = 0.45450463815926073
Recall Score (Micro) = 0.31746202736184426
Recall Score (Macro) = 0.24182752318970732
F1 Score (Micro) = 0.42405928484063604
F1 Score (Macro) = 0.288208905516805
AUC Score (Micro) = 0.6467007975839307
AUC Score (Macro) = 0.606731047371374
Epoch: 14, Loss:  0.2009640485048294


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.017164653528289893
Precision Score (Micro) = 0.6292524377031419
Precision Score (Macro) = 0.46776068173794755
Recall Score (Micro) = 0.3128299041258214
Recall Score (Macro) = 0.25372999301295107
F1 Score (Micro) = 0.4179018563822133
F1 Score (Macro) = 0.29739393505325934
AUC Score (Micro) = 0.6440819983529333
AUC Score (Macro) = 0.6129496401349819
Epoch: 15, Loss:  0.19666309654712677


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.023521932612841703
Precision Score (Micro) = 0.622322899505766
Precision Score (Macro) = 0.4762070262580901
Recall Score (Micro) = 0.32554131207583753
Recall Score (Macro) = 0.2524526984409359
F1 Score (Micro) = 0.42747011811302066
F1 Score (Macro) = 0.2968732681438206
AUC Score (Micro) = 0.6495511136041967
AUC Score (Macro) = 0.6108979265564911
Epoch: 16, Loss:  0.12911078333854675


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.02288620470438652
Precision Score (Micro) = 0.621245275512234
Precision Score (Macro) = 0.47212265860313457
Recall Score (Micro) = 0.3364214154906819
Recall Score (Macro) = 0.26291092911152586
F1 Score (Micro) = 0.4364779874213836
F1 Score (Macro) = 0.30641186927821895
AUC Score (Micro) = 0.6544866026233089
AUC Score (Macro) = 0.6157009582394987
Epoch: 17, Loss:  0.1402273029088974


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.02097902097902098
Precision Score (Micro) = 0.6165699419651791
Precision Score (Macro) = 0.47873566962123315
Recall Score (Micro) = 0.33189701605084565
Recall Score (Macro) = 0.2584475195184397
F1 Score (Micro) = 0.43151260504201683
F1 Score (Macro) = 0.3088441550331962
AUC Score (Micro) = 0.6521379064425376
AUC Score (Macro) = 0.6133692475501517
Epoch: 18, Loss:  0.16909220814704895


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Score = 0.02288620470438652
Precision Score (Micro) = 0.5971599402092675
Precision Score (Macro) = 0.47691055769347146
Recall Score (Micro) = 0.3442852526123021
Recall Score (Macro) = 0.28021647539100997
F1 Score (Micro) = 0.4367611889306457
F1 Score (Macro) = 0.32148002394661807
AUC Score (Micro) = 0.656602095506203
AUC Score (Macro) = 0.6225022688986288
Epoch: 19, Loss:  0.16070808470249176


 10%|█         | 20/200 [3:05:45<27:51:32, 557.18s/it]

Accuracy Score = 0.028607755880483154
Precision Score (Micro) = 0.621215119730853
Precision Score (Macro) = 0.537302715364342
Recall Score (Micro) = 0.33814499622966715
Recall Score (Macro) = 0.26829977969599406
F1 Score (Micro) = 0.4379185267857143
F1 Score (Macro) = 0.3175875702993338
AUC Score (Micro) = 0.6552763126087573
AUC Score (Macro) = 0.6183510357728449




Epoch: 20, Loss:  0.1762336641550064


 10%|█         | 21/200 [3:15:02<27:42:06, 557.13s/it]

Accuracy Score = 0.02161474888747616
Precision Score (Micro) = 0.6186708860759493
Precision Score (Macro) = 0.5011394886779741
Recall Score (Micro) = 0.33696003447161477
Recall Score (Macro) = 0.2652661411210928
F1 Score (Micro) = 0.43629262849571093
F1 Score (Macro) = 0.3148571939778131
AUC Score (Micro) = 0.654582919192069
AUC Score (Macro) = 0.6164902639620172




Epoch: 21, Loss:  0.14684833586215973


### Model Evaluation

In [None]:
# Evaluate the model
def evaluation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets


In [None]:
outputs, targets = validation(epoch)
outputs = np.array(outputs) >= 0.5
accuracy = metrics.accuracy_score(targets, outputs)
precision_score_micro = metrics.precision_score(targets, outputs, average='micro')
precision_score_macro = metrics.precision_score(targets, outputs, average='macro')
recall_score_micro = metrics.recall_score(targets, outputs, average='micro')
recall_score_macro = metrics.recall_score(targets, outputs, average='macro')
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
auc_score_micro = metrics.roc_auc_score(targets,outputs, average='micro')
auc_score_macro = metrics.roc_auc_score(targets,outputs, average='macro')
print(f"Accuracy Score = {accuracy}")
print(f"Precision Score (Micro) = {precision_score_micro}")
print(f"Precision Score (Macro) = {precision_score_macro}")
print(f"Recall Score (Micro) = {recall_score_micro}")
print(f"Recall Score (Macro) = {recall_score_macro}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")
print(f"AUC Score (Micro) = {auc_score_micro}")
print(f"AUC Score (Macro) = {auc_score_macro}")

In [None]:
torch.save(model.state_dict(), "bert_state_dict_model.pt")

In [None]:
torch.save(model, "bert_model.pt")