In [None]:
from google.colab import drive
drive.mount('/content/Drive')

Mounted at /content/Drive


In [None]:
!pip install torch torchvision torchaudio
!pip install transformers
!pip install tqdm



In [None]:
import json
import random
import warnings
import torch
import time
import argparse
import json
import os
import time
import copy
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional


from tqdm import tqdm
from sklearn.metrics import f1_score, precision_score, recall_score
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from transformers import BertForSequenceClassification, BertTokenizer, BertModel, AdamW

In [None]:
# List of relation types
keys = ['no_relation', 'per:title', 'org:top_members/employees',
        'org:country_of_headquarters', 'per:parents', 'per:age',
        'per:countries_of_residence', 'per:children', 'org:alternate_names',
        'per:charges', 'per:cities_of_residence', 'per:origin', 'org:founded_by',
        'per:employee_of', 'per:siblings', 'per:alternate_names', 'org:website',
        'per:religion', 'per:stateorprovince_of_death', 'org:parents',
        'org:subsidiaries', 'per:other_family', 'per:stateorprovinces_of_residence',
        'org:members', 'per:cause_of_death', 'org:member_of',
        'org:number_of_employees/members', 'per:country_of_birth',
        'org:shareholders', 'org:stateorprovince_of_headquarters',
        'per:city_of_death', 'per:date_of_birth', 'per:spouse',
        'org:city_of_headquarters', 'per:date_of_death', 'per:schools_attended',
        'org:political/religious_affiliation', 'per:country_of_death',
        'org:founded', 'per:stateorprovince_of_birth', 'per:city_of_birth',
        'org:dissolved']

# Assigning indices to the list elements and storing them in a dictionary
rel2id = {key: idx for idx, key in enumerate(keys)}
id2rel = {idx: key for key, idx in rel2id.items()}

# Printing the dictionaries
print(rel2id)
print(id2rel)

{'no_relation': 0, 'per:title': 1, 'org:top_members/employees': 2, 'org:country_of_headquarters': 3, 'per:parents': 4, 'per:age': 5, 'per:countries_of_residence': 6, 'per:children': 7, 'org:alternate_names': 8, 'per:charges': 9, 'per:cities_of_residence': 10, 'per:origin': 11, 'org:founded_by': 12, 'per:employee_of': 13, 'per:siblings': 14, 'per:alternate_names': 15, 'org:website': 16, 'per:religion': 17, 'per:stateorprovince_of_death': 18, 'org:parents': 19, 'org:subsidiaries': 20, 'per:other_family': 21, 'per:stateorprovinces_of_residence': 22, 'org:members': 23, 'per:cause_of_death': 24, 'org:member_of': 25, 'org:number_of_employees/members': 26, 'per:country_of_birth': 27, 'org:shareholders': 28, 'org:stateorprovince_of_headquarters': 29, 'per:city_of_death': 30, 'per:date_of_birth': 31, 'per:spouse': 32, 'org:city_of_headquarters': 33, 'per:date_of_death': 34, 'per:schools_attended': 35, 'org:political/religious_affiliation': 36, 'per:country_of_death': 37, 'org:founded': 38, 'per

In [None]:
# tell pytorch to use the gpu if available
if torch.cuda.is_available():

    DEVICE = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not
else:
    print('No GPU available, using the CPU instead.')
    DEVICE = torch.device("cpu")

USE_CUDA = torch.cuda.is_available()
if USE_CUDA:
    print("using GPU")
else:
    print("using CPU")

There are 1 GPU(s) available.
We will use the GPU: Tesla V100-SXM2-16GB
using GPU


In [None]:
#BERT tokenizer
model_directory = '/content/Drive/MyDrive/COMP61332_text_mining/RE/Tacred/Bert_RE/span-bert/spanbert-base-cased'
tokenizer = BertTokenizer.from_pretrained(model_directory)
print('BERT tokenizer loaded')

BERT tokenizer loaded


In [None]:
def setup_seed(seed):
    # Sets the seed for generating random numbers for the CPU.

    # Sets the seed for generating random numbers for all GPUs.
    torch.cuda.manual_seed_all(seed)

    # Sets the seed for generating random numbers with NumPy.
    np.random.seed(seed)

    # Sets the seed for the built-in Python random module.
    random.seed(seed)

setup_seed(44)

In [None]:
def load_tacred_dataset(file_path):
    """
    Load the TACRED dataset from a JSON file.

    Args:
    file_path (str): The path to the JSON file containing the dataset.

    Returns:
    dict: The loaded dataset.
    """
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [None]:
def prepare_data(data):
    # Get the number of instances in the data
    n = len(data)

    # Initialize an empty list to store processed data
    info = []

    # Iterate through each instance in the data
    for i in range(n):
        # Initialize an empty dictionary for storing processed information of a single data instance
        single_data = {}

        # Extract start and end indices of subject and object entities
        ss = data[i]["subj_start"]
        se = data[i]["subj_end"]
        os = data[i]["obj_start"]
        oe = data[i]["obj_end"]

        # Extract subject and object tokens based on their start and end indices
        subj = data[i]['token'][ss: se+1]
        obj = data[i]['token'][os: oe+1]

        temp = data[i]['token'].copy()

        temp[ss: se+1] = ['[MASK]']
        temp[os: oe+1] = ['[MASK]']

        # Convert subject and object tokens into strings
        ent1 = ' '.join(subj)
        ent2 = ' '.join(obj)

        # Extract relation label
        rel = data[i]["relation"]

        # Concatenate all tokens to form the original sentence
        text = " ".join(temp)

        # Store processed information in the dictionary
        single_data['rel'] = rel
        single_data['ent1'] = ent1
        single_data['ent2'] = ent2
        single_data['text'] = text

        # Append processed information of a single data instance to the list
        info.append(single_data)

    # Return the processed data
    return info

In [None]:
def process_relation_extraction_data(info, max_length=64):
    # Initialize a dictionary to store processed data
    data = {}
    data['label'] = []  # List to store relation labels
    data['mask'] = []   # List to store attention masks
    data['text'] = []   # List to store tokenized and padded texts

    # Iterate through each instance in the provided 'info' data
    for line in info:
        # Check if the relation label is present in the 'rel2id' dictionary
        if line['rel'] not in rel2id:
            # If the relation label is not found, assign label 0 (for unknown relation)
            data['label'].append(0)
        else:
            # If the relation label is found, assign its corresponding index
            data['label'].append(rel2id[line['rel']])

        # Concatenate subject, object, and text to form a single sentence
        sent = line['ent1'] +'[SEP]'+line['ent2'] + '[SEP]' + line['text']

        # Tokenize the concatenated sentence and add special tokens
        indexed_tokens = tokenizer.encode(sent, add_special_tokens=True)

        # Determine the available length of the tokenized sentence
        avai_len = len(indexed_tokens)

        # Pad the tokenized sentence with 0s to match the maximum length
        while len(indexed_tokens) < max_length:
            indexed_tokens.append(0)

        # Trim the tokenized sentence to the maximum length
        indexed_tokens = indexed_tokens[:max_length]

        # Convert the tokenized sentence to a PyTorch tensor
        indexed_tokens = torch.tensor(indexed_tokens).long().unsqueeze(0)  # (1, L)

        # Create an attention mask for the tokenized sentence
        att_mask = torch.zeros(indexed_tokens.size()).long()  # (1, L)
        att_mask[0, :avai_len] = 1

        # Append the tokenized and padded sentence, and its attention mask to the data dictionary
        data['text'].append(indexed_tokens)
        data['mask'].append(att_mask)

    return data

In [None]:
def convert_data_to_tensors(data):
    # Extract text, mask, and label from the data
    text = data['text']
    mask = data['mask']
    label = data['label']

    # Convert text and mask tensors to numpy arrays
    text = [t.numpy() for t in text]
    mask = [t.numpy() for t in mask]

    # Convert numpy arrays to PyTorch tensors
    text = torch.tensor(text)
    mask = torch.tensor(mask)
    label = torch.tensor(label)

    return text, mask, label

In [None]:
class BERT_Classifier(nn.Module):
    def __init__(self, label_num):
        super().__init__()
        # Initialize the BERT encoder from pre-trained weights
        model_directory = '/content/Drive/MyDrive/COMP61332_text_mining/RE/Tacred/Bert_RE/span-bert/spanbert-base-cased'

        # Load the tokenizer and model
        self.encoder = BertModel.from_pretrained(model_directory)
        # Dropout layer to prevent overfitting
        self.dropout = nn.Dropout(0.1, inplace=False)
        # Fully connected layer for classification
        self.fc = nn.Linear(768, label_num)  # 768 is the hidden size of BERT
        # Cross-entropy loss criterion
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x, attention_mask, label=None):
        # Pass the input through the BERT encoder
        x = self.encoder(x, attention_mask=attention_mask)[0]  # Output is tuple (last_hidden_state, pooler_output), we take the last_hidden_state
        # Take only the first token's output (CLS token)
        x = x[:, 0, :]
        # Apply dropout
        x = self.dropout(x)
        # Pass through the fully connected layer
        x = self.fc(x)
        # If label is not provided, return logits only
        if label is None:
            return None, x
        else:
            # Calculate the cross-entropy loss and return both loss and logits
            return self.criterion(x, label), x

labels_num=len(rel2id)
# print(labels_num)
model = BERT_Classifier(labels_num)
# Tell pytorch to run this model on the GPU.
model.cuda()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BERT_Classifier(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

In [None]:
def train(net, train_dataset, dev_dataset, num_epochs, learning_rate, batch_size):

    print('Training...')

    # Set the network to training mode
    net.train()

    # Define the optimizer
    optimizer = optim.SGD(net.parameters(), lr=learning_rate)

    # Create a data loader for training data
    train_iter = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)

    for epoch in range(num_epochs):
        correct = 0
        total = 0
        iter = 0
        all_pred = []
        all_true = []

        # Initialize tqdm to show progress bar
        progress_bar = tqdm(train_iter, desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch')

        for text, mask, y in progress_bar:
            iter += 1
            optimizer.zero_grad()

            # If the batch size is not equal to the specified batch size, break the loop
            if text.size(0) != batch_size:
                break

            # Reshape text and mask tensors
            text = text.reshape(batch_size, -1)
            mask = mask.reshape(batch_size, -1)

            # Move tensors to GPU if available
            if USE_CUDA:
                text = text.cuda()
                mask = mask.cuda()
                y = y.cuda()

            # Forward pass
            loss, logits = net(text, mask, y)

            # Backpropagation
            loss.backward()
            optimizer.step()

            # Compute accuracy
            _, predicted = torch.max(logits.data, 1)
            total += text.size(0)
            correct += predicted.data.eq(y.data).cpu().sum()

            # Collect predictions and true labels
            all_pred.extend(predicted.cpu().numpy())
            all_true.extend(y.cpu().numpy())

            # Update progress bar
            progress_bar.set_postfix({'loss': loss.item(), 'accuracy': correct.item() / total})

        # After the end of each epoch, compute metrics
        accuracy = correct.cpu().numpy().tolist()/total
        loss = loss.detach().cpu()

        # Compute F1 scores
        macro_f1 = f1_score(all_true, all_pred, average='macro')
        micro_f1 = f1_score(all_true, all_pred, average='micro')
        weighted_f1 = f1_score(all_true, all_pred, average='weighted')

        # Print metrics
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print(f"Loss: {loss.mean().numpy().tolist()}, Accuracy: {accuracy}")
        print(f"Macro F1: {macro_f1}, Micro F1: {micro_f1}, Weighted F1: {weighted_f1}")

        print()

        print("Validation...")

        dev_acc, dev_macro_f1, dev_micro_f1, dev_weighted_f1, _, _ = eval(net, dev_dataset, batch_size)


In [None]:

def eval(net, dataset, batch_size):
    # Set the network to evaluation mode
    net.eval()

    # Create an iterator for the evaluation dataset
    eval_iter = DataLoader(dataset, batch_size, shuffle=False)

    # Lists to store predictions and true labels
    all_pred = []
    all_true = []

    # Lists to store evaluation metrics
    acc_list = []
    macro_f1_list = []
    micro_f1_list = []
    weighted_f1_list = []
    precision_list = []  # List for precision scores
    recall_list = []     # List for recall scores

    with torch.no_grad():
        correct = 0  # Counter for correctly classified samples
        total = 0    # Counter for total samples
        # Progress bar for visualization during evaluation
        progress_bar = tqdm(eval_iter, desc='Evaluation', unit='batch')

        for text, mask, y in progress_bar:
            if text.size(0) != batch_size:
                break

            text = text.reshape(batch_size, -1)
            mask = mask.reshape(batch_size, -1)

            if USE_CUDA:
                text, mask, y = text.cuda(), mask.cuda(), y.cuda()

            outputs = net(text, mask)
            loss, logits = outputs if isinstance(outputs, tuple) else (None, outputs)

            _, predicted = torch.max(logits, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()

            all_pred.extend(predicted.cpu().numpy())
            all_true.extend(y.cpu().numpy())

        # Calculate overall accuracy and F1 scores
        acc = correct / total
        macro_f1 = f1_score(all_true, all_pred, average='macro')
        micro_f1 = f1_score(all_true, all_pred, average='micro')
        weighted_f1 = f1_score(all_true, all_pred, average='weighted')

        # Calculate precision and recall
        precision_macro = precision_score(all_true, all_pred, average='macro')
        recall_macro = recall_score(all_true, all_pred, average='macro')
        precision_micro = precision_score(all_true, all_pred, average='micro')
        recall_micro = recall_score(all_true, all_pred, average='micro')
        precision_weighted = precision_score(all_true, all_pred, average='weighted')
        recall_weighted = recall_score(all_true, all_pred, average='weighted')

        # Append metrics to respective lists
        acc_list.append(acc)
        macro_f1_list.append(macro_f1)
        micro_f1_list.append(micro_f1)
        weighted_f1_list.append(weighted_f1)
        precision_list.append((precision_macro, precision_micro, precision_weighted))
        recall_list.append((recall_macro, recall_micro, recall_weighted))

        # Print evaluation results
        print(f"Eval Result: Acc: {acc:.4f}, Macro F1: {macro_f1:.4f}, Micro F1: {micro_f1:.4f}, Weighted F1: {weighted_f1:.4f}")
        print(f"Precision (Macro, Micro, Weighted): {precision_macro:.4f}, {precision_micro:.4f}, {precision_weighted:.4f}")
        print(f"Recall (Macro, Micro, Weighted): {recall_macro:.4f}, {recall_micro:.4f}, {recall_weighted:.4f}")

        # Return evaluation metrics
        return acc_list, macro_f1_list, micro_f1_list, weighted_f1_list, precision_list, recall_list


In [None]:
# Load data from JSON file
train_data = load_tacred_dataset('/content/Drive/MyDrive/COMP61332_text_mining/RE/Tacred/json/train.json')
dev_data = load_tacred_dataset('/content/Drive/MyDrive/COMP61332_text_mining/RE/Tacred/json/dev.json')
test_data = load_tacred_dataset('/content/Drive/MyDrive/COMP61332_text_mining/RE/Tacred/json/test.json')

In [None]:
train_info = prepare_data(train_data)
dev_info = prepare_data(dev_data)
test_info = prepare_data(test_data)

print(len(train_info))
print(len(dev_info))
print(len(test_info))
print(train_info[1])

68124
22631
15509
{'rel': 'no_relation', 'ent1': 'Forsberg', 'ent2': 'John D.', 'text': "In 1983 , a year after the rally , [MASK] received the so-called `` genius award '' from the [MASK] and Catherine T. MacArthur Foundation ."}


In [None]:
train_data = process_relation_extraction_data(train_info, 128)
dev_data = process_relation_extraction_data(dev_info, 128)
test_data = process_relation_extraction_data(test_info, 128)


print(len(train_data['label']))
print(len(dev_data['label']))
print(len(test_data['label']))

68124
22631
15509


In [None]:
# Preprocess train data
train_text, train_mask, train_label = convert_data_to_tensors(train_data)
print("--train data--")
print(train_text.shape)
print(train_mask.shape)
print(train_label.shape)

# Preprocess dev data
dev_text, dev_mask, dev_label = convert_data_to_tensors(dev_data)
print("--eval data--")
print(dev_text.shape)
print(dev_mask.shape)
print(dev_label.shape)

# Preprocess test data
test_text, test_mask, test_label = convert_data_to_tensors(test_data)
print("--test data--")
print(test_text.shape)
print(test_mask.shape)
print(test_label.shape)

  text = torch.tensor(text)


--train data--
torch.Size([68124, 1, 128])
torch.Size([68124, 1, 128])
torch.Size([68124])
--eval data--
torch.Size([22631, 1, 128])
torch.Size([22631, 1, 128])
torch.Size([22631])
--test data--
torch.Size([15509, 1, 128])
torch.Size([15509, 1, 128])
torch.Size([15509])


In [None]:
train_dataset = torch.utils.data.TensorDataset(train_text,train_mask,train_label)
dev_dataset = torch.utils.data.TensorDataset(dev_text,dev_mask,dev_label)

In [None]:
train(model, train_dataset, dev_dataset, 20, 0.002, 32)

Training...


Epoch 1/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.15batch/s, loss=1.59, accuracy=0.808]


Epoch 1/20
Loss: 1.5880838632583618, Accuracy: 0.8081825657894737
Macro F1: 0.021326175259683104, Micro F1: 0.8081825657894737, Weighted F1: 0.7233361765279224


Epoch 2/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.15batch/s, loss=0.652, accuracy=0.813]


Epoch 2/20
Loss: 0.6519356369972229, Accuracy: 0.813234257518797
Macro F1: 0.03054672386393424, Micro F1: 0.813234257518797, Weighted F1: 0.7381366476788387


Epoch 3/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.15batch/s, loss=0.486, accuracy=0.822]


Epoch 3/20
Loss: 0.48626071214675903, Accuracy: 0.8217075892857143
Macro F1: 0.04719642333743773, Micro F1: 0.8217075892857142, Weighted F1: 0.75883112211996


Epoch 4/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.15batch/s, loss=0.988, accuracy=0.831]


Epoch 4/20
Loss: 0.9884495139122009, Accuracy: 0.8309151785714286
Macro F1: 0.07078476587192864, Micro F1: 0.8309151785714286, Weighted F1: 0.7750408966710549


Epoch 5/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.14batch/s, loss=1.01, accuracy=0.838]


Epoch 5/20
Loss: 1.0083210468292236, Accuracy: 0.8376703477443609
Macro F1: 0.09639334222974634, Micro F1: 0.8376703477443609, Weighted F1: 0.7882306966107725


Epoch 6/20: 100%|█████████▉| 2128/2129 [06:54<00:00,  5.14batch/s, loss=0.356, accuracy=0.846]


Epoch 6/20
Loss: 0.35592734813690186, Accuracy: 0.8460261983082706
Macro F1: 0.13039423169420367, Micro F1: 0.8460261983082706, Weighted F1: 0.8029327233943531


Epoch 7/20: 100%|█████████▉| 2128/2129 [06:54<00:00,  5.14batch/s, loss=0.497, accuracy=0.852]


Epoch 7/20
Loss: 0.4967767000198364, Accuracy: 0.8523114426691729
Macro F1: 0.16211571725393803, Micro F1: 0.8523114426691729, Weighted F1: 0.814227465498961


Epoch 8/20: 100%|█████████▉| 2128/2129 [06:54<00:00,  5.14batch/s, loss=0.786, accuracy=0.856]


Epoch 8/20
Loss: 0.7858227491378784, Accuracy: 0.8562323778195489
Macro F1: 0.18889154434448224, Micro F1: 0.8562323778195489, Weighted F1: 0.8226162331907334


Epoch 9/20: 100%|█████████▉| 2128/2129 [06:54<00:00,  5.14batch/s, loss=1.09, accuracy=0.862]


Epoch 9/20
Loss: 1.092086672782898, Accuracy: 0.862297344924812
Macro F1: 0.23035536223637187, Micro F1: 0.862297344924812, Weighted F1: 0.8330767883430494


Epoch 10/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.14batch/s, loss=0.521, accuracy=0.867]


Epoch 10/20
Loss: 0.5207123756408691, Accuracy: 0.8667910009398496
Macro F1: 0.25257298942988904, Micro F1: 0.8667910009398496, Weighted F1: 0.8401298898236669


Epoch 11/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.14batch/s, loss=0.35, accuracy=0.87]


Epoch 11/20
Loss: 0.349540114402771, Accuracy: 0.8704769736842105
Macro F1: 0.28173055254308155, Micro F1: 0.8704769736842105, Weighted F1: 0.8464835639130407


Epoch 12/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.14batch/s, loss=0.769, accuracy=0.875]


Epoch 12/20
Loss: 0.7690846920013428, Accuracy: 0.8754258693609023
Macro F1: 0.30570507033888344, Micro F1: 0.8754258693609023, Weighted F1: 0.8536656836186413


Epoch 13/20: 100%|█████████▉| 2128/2129 [06:53<00:00,  5.14batch/s, loss=0.288, accuracy=0.879]


Epoch 13/20
Loss: 0.2881782650947571, Accuracy: 0.8790677866541353
Macro F1: 0.3325770748167101, Micro F1: 0.8790677866541353, Weighted F1: 0.8593821712086258


Epoch 14/20: 100%|█████████▉| 2128/2129 [06:54<00:00,  5.14batch/s, loss=0.626, accuracy=0.883]


Epoch 14/20
Loss: 0.6259258389472961, Accuracy: 0.8829593515037594
Macro F1: 0.3521456624234536, Micro F1: 0.8829593515037595, Weighted F1: 0.8650230978777146


Epoch 15/20: 100%|█████████▉| 2128/2129 [06:55<00:00,  5.13batch/s, loss=0.357, accuracy=0.887]


Epoch 15/20
Loss: 0.3573543429374695, Accuracy: 0.8865572133458647
Macro F1: 0.3788103456200234, Micro F1: 0.8865572133458647, Weighted F1: 0.8702560490379867


Epoch 16/20: 100%|█████████▉| 2128/2129 [06:55<00:00,  5.12batch/s, loss=0.509, accuracy=0.892]


Epoch 16/20
Loss: 0.508627712726593, Accuracy: 0.8916676456766918
Macro F1: 0.40188285386568223, Micro F1: 0.8916676456766918, Weighted F1: 0.8767077354990487


Epoch 17/20: 100%|█████████▉| 2128/2129 [06:55<00:00,  5.13batch/s, loss=0.405, accuracy=0.895]


Epoch 17/20
Loss: 0.4049725830554962, Accuracy: 0.8950746005639098
Macro F1: 0.4234565202645503, Micro F1: 0.8950746005639099, Weighted F1: 0.8815383744497707


Epoch 18/20: 100%|█████████▉| 2128/2129 [06:54<00:00,  5.13batch/s, loss=0.267, accuracy=0.899]


Epoch 18/20
Loss: 0.26676639914512634, Accuracy: 0.8987165178571429
Macro F1: 0.4479076137337033, Micro F1: 0.8987165178571429, Weighted F1: 0.8861701979259721


Epoch 19/20: 100%|█████████▉| 2128/2129 [06:54<00:00,  5.13batch/s, loss=0.606, accuracy=0.902]


Epoch 19/20
Loss: 0.6059044599533081, Accuracy: 0.9015654370300752
Macro F1: 0.4679489214784599, Micro F1: 0.9015654370300752, Weighted F1: 0.8902703457481905


Epoch 20/20: 100%|█████████▉| 2128/2129 [06:55<00:00,  5.12batch/s, loss=0.405, accuracy=0.905]


Epoch 20/20
Loss: 0.4049081802368164, Accuracy: 0.904561207706767
Macro F1: 0.48793835432587235, Micro F1: 0.904561207706767, Weighted F1: 0.8938942004266996


In [None]:
torch.save(model, 'span-bert_best_model_withmask.pth')
print("entire Model saved successfully.")

entire Model saved successfully.


In [None]:
import json

# grid parameter
batch_sizes = [32, 64, 128]
learning_rates = [0.02, 0.002, 0.0002]
num_epochs = [15, 20, 25]

results = []

# grid search
for batch_size in batch_sizes:
    for lr in learning_rates:
        for epoch in num_epochs:
            print(f"Training with batch size = {batch_size}, lr = {lr}, epoch = {epoch}")
            # set to initial state
            model = BERT_Classifier(labels_num)
            # Tell pytorch to run this model on the GPU.
            if torch.cuda.is_available():
                model.cuda()
            # run model
            train(model, train_dataset, dev_dataset, epoch, lr, batch_size)

            # evaluate the model
            test_dataset = torch.utils.data.TensorDataset(test_text, test_mask, test_label)
            acc_list, macro_f1_list, micro_f1_list, weighted_f1_list, precision_list, recall_list = eval(model, test_dataset, batch_size)

            # save result
            result = {
                'batch_size': batch_size,
                'lr': lr,
                'epoch': epoch,
                'acc_list': acc_list,
                'macro_f1_list': macro_f1_list,
                'micro_f1_list': micro_f1_list,
                'weighted_f1_list': weighted_f1_list,
                'precision_list': precision_list,
                'recall_list': recall_list
            }
            results.append(result)

            # save in the file
            with open('grid_search_results.txt', 'a') as file:
                file.write(json.dumps(result) + '\n')


for result in results:
    print(f"Batch size: {result['batch_size']}, LR: {result['lr']}, Epoch: {result['epoch']}")
    print(f"Accuracy List: {result['acc_list']}")
    print(f"Macro F1 List: {result['macro_f1_list']}")
    print(f"Micro F1 List: {result['micro_f1_list']}")
    print(f"Weighted F1 List: {result['weighted_f1_list']}")
    print(f"Precision List: {result['precision_list']}")
    print(f"Recall List: {result['recall_list']}")
    print("--------------------------------------------------")

In [None]:
# use test dataset to evaluate the model 
test_text, test_mask, test_label = convert_data_to_tensors(test_data)
test_dataset = torch.utils.data.TensorDataset(test_text, test_mask, test_label)
acc_list, macro_f1_list, micro_f1_list, weighted_f1_list, precision_list, recall_list= eval(model, test_dataset, 32)

Evaluation: 100%|█████████▉| 484/485 [00:32<00:00, 15.11batch/s]


Eval Result: Acc: 0.8224, Macro F1: 0.2634, Micro F1: 0.8224, Weighted F1: 0.7937
Precision (Macro, Micro, Weighted): 0.3915, 0.8224, 0.7833
Recall (Macro, Micro, Weighted): 0.2383, 0.8224, 0.8224


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
