In [1]:
import torch
import numpy as np
import pandas as pd
import transformers
from sklearn import metrics
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig, BertForSequenceClassification
from transformers import DistilBertTokenizerFast, DistilBertModel, DistilBertConfig

from custom_datasets import CustomDatasetTextPairs

In [2]:
transformers.logging.set_verbosity_error()

In [3]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [4]:
df = pd.read_csv(r'data\MedicalConcepts_augmented.csv')
df.head()

Unnamed: 0,Term1,Term2,label
0,avandia,hemoptysis,0
1,haemorrhoids,infertility,1
2,sinemet,sinemet,1
3,epilepsy,dizziness,0
4,albuterol,serevent,1


In [5]:
df.shape

(1132, 3)

In [6]:
# Sections of config

# Defining some key variables that will be used later on in the training
MAX_LEN = 10
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 7
LEARNING_RATE = 1e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', max_length = MAX_LEN)

In [8]:
# Creating the dataset and dataloader for the neural network
#train_data, dev_data = train_test_split(df, stratify = df.label, test_size = 0.2)

train_size = 0.8
train_data = df.sample(frac=train_size, random_state=200)
test_data = df.copy().drop(train_data.index).reset_index(drop=True)
train_data = train_data.reset_index(drop=True)

train_set = CustomDatasetTextPairs(train_data, tokenizer, MAX_LEN)
test_set = CustomDatasetTextPairs(test_data, tokenizer, MAX_LEN)

In [9]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False,
                'num_workers': 0
                }

train_loader = DataLoader(train_set, **train_params)
test_loader = DataLoader(test_set, **test_params)

In [10]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 
class BERTClass(transformers.PreTrainedModel):
    def __init__(self, config):
        super(BERTClass, self).__init__(config)
        self.l1 = transformers.BertModel.from_pretrained(r'models\bert_mlm')
        self.l2 = torch.nn.Dropout(0.2)
        self.l3 = torch.nn.Linear(768, 1)
    
    def forward(self, ids, mask, token_type_ids):
        output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1['pooler_output'])
        output_3 = self.l3(output_2)
        output = torch.sigmoid(output_3)
        return output

config = BertConfig()
model = BERTClass(config)
model.to(device)

BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [11]:
def loss_fn(outputs, target):
    return torch.nn.BCELoss()(outputs, target)

In [12]:
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [13]:
train_data.shape[0]

906

In [14]:
def train():
    model.train()
    for _, data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)

        target = data['target'].unsqueeze(1).to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)
        
        optimizer.zero_grad()
        loss = loss_fn(outputs, target)
        if _%50==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [15]:
def validation():
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(test_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['target'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [16]:
for epoch in range(EPOCHS):
    train()
#     outputs, targets = validation()
#     outputs = np.array(outputs) >= 0.5
#     accuracy = metrics.accuracy_score(targets, outputs)
#     print(f"Accuracy Score = {accuracy}")



Epoch: 0, Loss:  0.7424248456954956
Epoch: 0, Loss:  0.7464559078216553
Epoch: 0, Loss:  0.7739934921264648
Epoch: 1, Loss:  0.7192226648330688
Epoch: 1, Loss:  0.6600151062011719
Epoch: 1, Loss:  0.6476351022720337
Epoch: 2, Loss:  0.6514908671379089
Epoch: 2, Loss:  0.6853300333023071
Epoch: 2, Loss:  0.6335180401802063
Epoch: 3, Loss:  0.6790067553520203
Epoch: 3, Loss:  0.7154678106307983
Epoch: 3, Loss:  0.5829678177833557
Epoch: 4, Loss:  0.6999630928039551
Epoch: 4, Loss:  0.5286862850189209
Epoch: 4, Loss:  0.5015780925750732
Epoch: 5, Loss:  0.6326205134391785
Epoch: 5, Loss:  0.7118173241615295
Epoch: 5, Loss:  0.5272166728973389
Epoch: 6, Loss:  0.6581325531005859
Epoch: 6, Loss:  0.48955488204956055
Epoch: 6, Loss:  0.5218454599380493


In [17]:
model.save_pretrained(r"models\bert_fine_tuned")