# Import dependencies

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchtext.legacy import data

from BiLSTM import BiLSTM

import random

from transformers import BertTokenizer, BertForSequenceClassification, BertConfig

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score


In [2]:
BERT_MODEL_PATH = './bert_fine_tuned'

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')

    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device('cpu')

There are 1 GPU(s) available.
We will use the GPU: NVIDIA GeForce RTX 2060 SUPER


# BiLSTM with BERT as embedding

In [4]:
SEED = 42
N_EPOCHS = 15

train_data_path = './data/semeval_train_data.csv'
output_path_result = './results/' + 'bilstm_bert_train.csv'

torch.manual_seed(SEED)

torch.backends.cudnn.deterministic = True

In [5]:
TEXT = data.Field(
    tokenize = 'spacy',
    batch_first = True,
    include_lengths = True)

LABEL = data.LabelField(
    dtype = torch.long,
    batch_first = True,
    use_vocab = False)

fields = [('text',TEXT),('label', LABEL)]



In [6]:
training_data = data.TabularDataset(
    path = train_data_path,
    format = 'csv',
    fields = fields,
    skip_header = True)

print(vars(training_data.examples[0]))

{'text': ['Judging', 'from', 'previous', 'posts', 'this', 'used', 'to', 'be', 'a', 'good', 'place', ';', 'but', 'not', 'any', 'longer', '.'], 'label': '0'}


In [7]:
train_data, valid_data = training_data.split(
    split_ratio = 0.8, 
    random_state = random.seed(SEED)
)

In [8]:
TEXT.build_vocab(
    train_data, 
    vectors='glove.6B.300d'
)
LABEL.build_vocab(train_data)

#No. of unique tokens in text
print("Size of TEXT vocabulary:",len(TEXT.vocab))

#No. of unique tokens in label
print("Size of LABEL vocabulary:",len(LABEL.vocab))

#Commonly used words
print(TEXT.vocab.freqs.most_common(10))  

Size of TEXT vocabulary: 3468
Size of LABEL vocabulary: 3
[('.', 1309), ('the', 943), (';', 826), ('and', 678), ('I', 479), ('a', 458), ('is', 421), ('to', 416), ('was', 367), ('of', 286)]


In [9]:
BATCH_SIZE = 64

train_iterator, valid_iterator = data.BucketIterator.splits(
    (train_data, valid_data), 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.text),
    sort_within_batch=True,
    device = device)

## Load Model

In [10]:
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, do_lower_case=True)

bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_PATH, num_labels=3)
bert_model.to(device)

config = BertConfig.from_pretrained(BERT_MODEL_PATH)

optimus = BertForSequenceClassification(config)

In [11]:
# Uncomment for printing the params
# dict(model.named_parameters())

word_emb_weight_param = 'bert.embeddings.word_embeddings.weight'

word_emb_w = dict(bert_model.named_parameters())[word_emb_weight_param]

print(word_emb_w.shape)

torch.Size([30522, 768])


In [12]:
size_of_vocab, embedding_dim = word_emb_w.shape

num_hidden_nodes = 32
num_output_nodes = 3
num_layers = 2
bidirection = True
dropout = 0.2

In [13]:
model = BiLSTM(
    size_of_vocab, 
    embedding_dim, 
    num_hidden_nodes, 
    num_output_nodes, 
    num_layers, 
    bidirectional = bidirection, 
    dropout = dropout
)

In [14]:
print(model)

BiLSTM(
  (embedding): Embedding(30522, 768)
  (lstm): LSTM(768, 32, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (fc): Linear(in_features=64, out_features=3, bias=True)
)


In [15]:
pretrained_embeddings = word_emb_w
model.embedding.weight.data.copy_(pretrained_embeddings)

print(pretrained_embeddings.shape)

torch.Size([30522, 768])


In [16]:
# define optimizer and loss
optimizer = optim.Adam(model.parameters())
criterion = nn.MultiMarginLoss()

#define metric
def binary_accuracy(preds, y):
    #round predictions to the closest integer
    rounded_preds = torch.round(preds)
    
    correct = (rounded_preds == y).float() 
    acc = correct.sum() / len(correct)
    return acc
    
#push to cuda if available
bilstm_bert = model.to(device)
criterion = criterion.to(device)

In [17]:
def train_bilstm_bert(model, iterator, optimizer, criterion):
    
    #initialize every epoch 
    epoch_loss = 0
    epoch_acc = 0
    epoch_f1 = 0
    epoch_precision = 0
    epoch_recall = 0
    
    #set the model in training phase
    model.train()  
    
    for batch in iterator:
        #resets the gradients after every batch
        optimizer.zero_grad()   
        
        #retrieve text and no. of words
        text, text_lengths = batch.text
        
        predictions = model(text, text_lengths.cpu()).cpu()
        predicted = torch.argmax(predictions, dim = 1)

        labels = batch.label.cpu()
        #compute the loss
        loss = criterion(predictions, labels)        
        
        #compute the binary accuracy
        # acc = binary_accuracy(predictions, batch.label)   
        acc = (predicted == labels).sum()

        #round predictions to the closest integer
        # predicted = torch.round(predictions).tolist()
        real = labels.tolist()

        #compute the f1_score
        f1score = f1_score(real, predicted, average="macro", zero_division=0)

        #compute the precision
        precision = precision_score(real, predicted, average="macro", zero_division=0)

        #compute the recall
        recall = recall_score(real, predicted, average="macro", zero_division=0)
        
        #backpropage the loss and compute the gradients
        loss.backward()       
        
        #update the weights
        optimizer.step()      
        
        #loss and accuracy
        epoch_loss += loss.item() / len(batch)
        epoch_acc += acc.item() / len(batch)
        epoch_f1 += f1score
        epoch_precision += precision
        epoch_recall += recall 
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_f1 / len(iterator), epoch_precision / len(iterator), epoch_recall / len(iterator)

In [18]:
def evaluate_bilstm_bert(model, iterator, criterion):
    #initialize every epoch
    epoch_loss = 0
    epoch_acc = 0
    epoch_f1 = 0
    epoch_precision = 0
    epoch_recall = 0

    #deactivating dropout layers
    model.eval()
    predicted = []
    #deactivates autograd
    with torch.no_grad():
        for batch in iterator:
            #retrieve text and no. of words
            text, text_lengths = batch.text
            
            labels = batch.label.cpu()
            predictions = model(text, text_lengths.cpu()).cpu()
            predicted = torch.argmax(predictions, dim = 1)
            
            #compute loss and accuracy
            loss = criterion(predictions, labels)
            # acc = binary_accuracy(predictions, batch.label)
            acc = (predicted == labels).sum()

            #round predictions to the closest integer
            real = labels.tolist()

            #compute the f1_score
            f1score = f1_score(real, predicted, average="macro", zero_division=0)

            #compute the precision
            precision = precision_score(real, predicted, average="macro", zero_division=0)

            #compute the recall
            recall = recall_score(real, predicted, average="macro", zero_division=0)
            
            #keep track of loss and accuracy
            epoch_loss += loss.item() / len(batch)
            epoch_acc += acc.item() / len(batch)
            epoch_f1 += f1score
            epoch_precision += precision
            epoch_recall += recall
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_f1 / len(iterator), epoch_precision / len(iterator), epoch_recall / len(iterator), predicted

In [19]:
def printProgress(epoch, total_epochs, train_loss, validation_loss, train_acc, validation_acc):
    print(f"Iteration {epoch} / {total_epochs}")
    print(f"\tTrain loss: {train_loss*100:.2f}")
    print(f"\tValidation loss: {validation_loss*100:.2f}")
    print(f"\tTrain accuracy: {train_acc*100:.2f}")
    print(f"\tValidation accuracy: {validation_acc*100:.2f}")

In [20]:
best_valid_loss = float('inf')

columns = 'Name,Epoch,Loss,Acc,F1,Precision,Recall,Predicted'
f = open(output_path_result, 'w')
f.write(columns)
f.write('\n')

for epoch in range(N_EPOCHS):
    #train the model
    train_loss, train_acc, train_f1, train_precision, train_recall = train_bilstm_bert(model, train_iterator, optimizer, criterion)
    
    #evaluate the model
    validation_loss, validation_acc, validation_f1, validation_precision, validation_recall, predicted = evaluate_bilstm_bert(model, valid_iterator, criterion)
    
    if (epoch + 1) % 5 == 0:
        printProgress(epoch + 1, N_EPOCHS, train_loss, validation_loss, train_acc, validation_acc)

    #save the best model
    if validation_loss < best_valid_loss:
        best_valid_loss = validation_loss
        torch.save(model.state_dict(), './bilstm_bert_model/saved_weights.pt')

    f.write(f'BERT_BILSTM_Train,{epoch},{train_loss*100:.2f},{train_acc*100:.2f},{train_f1*100:.2f},{train_precision*100:.2f},{train_recall*100:.2f},{predicted}')
    f.write('\n')
    f.write(f'BERT_BILSTM_Test,{epoch},{validation_loss*100:.2f},{validation_acc*100:.2f},{validation_f1*100:.2f},{validation_precision*100:.2f},{validation_recall*100:.2f},{predicted}')
    f.write('\n')

f.close()

Iteration 5 / 15
	Train loss: 0.18
	Validation loss: 0.97
	Train accuracy: 88.31
	Validation accuracy: 60.71
Iteration 10 / 15
	Train loss: 0.01
	Validation loss: 1.22
	Train accuracy: 99.75
	Validation accuracy: 62.50
Iteration 15 / 15
	Train loss: 0.00
	Validation loss: 1.25
	Train accuracy: 99.88
	Validation accuracy: 61.83


In [21]:
torch.save(model, './bilstm_bert_model/model')

# Test the model on the Gold dataset

In [22]:
test_data_path = './data/semeval_train_data.csv'

fields = [('text',TEXT),('label', LABEL)]

test_data = data.TabularDataset(
    path = test_data_path,
    format = 'csv',
    fields = fields,
    skip_header = True
)

print(vars(test_data.examples[0]))

{'text': ['Judging', 'from', 'previous', 'posts', 'this', 'used', 'to', 'be', 'a', 'good', 'place', ';', 'but', 'not', 'any', 'longer', '.'], 'label': '0'}


In [23]:
TEXT.build_vocab(
    test_data, 
    vectors='glove.6B.300d'
)
LABEL.build_vocab(test_data)

#No. of unique tokens in text
print("Size of TEXT vocabulary:",len(TEXT.vocab))

#No. of unique tokens in label
print("Size of LABEL vocabulary:",len(LABEL.vocab))

#Commonly used words
print(TEXT.vocab.freqs.most_common(10))  

Size of TEXT vocabulary: 3946
Size of LABEL vocabulary: 3
[('.', 1629), ('the', 1165), (';', 1032), ('and', 860), ('I', 599), ('a', 577), ('is', 524), ('to', 514), ('was', 464), ('of', 352)]


In [24]:
BATCH_SIZE = 64

test_iterator = data.BucketIterator(
    test_data, 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.text),
    sort_within_batch=True,
    device = device)

In [25]:
loss, accuracy, f1, precision, recall, predicted = evaluate_bilstm_bert(model, test_iterator, criterion)

print(f"Test loss: {loss*100:.2f}")
print(f"Test accuracy: {accuracy*100:.2f}")
print(f"Test F1: {f1*100:.2f}")
print(f"Test precision: {precision*100:.2f}")
print(f"Test recall: {recall*100:.2f}")

Test loss: 1.50
Test accuracy: 40.53
Test F1: 33.52
Test precision: 34.68
Test recall: 34.93
