In [None]:
# from google.colab import drive
# drive.mount('/content/Drive')

In [None]:
!pip install torch
!pip install transformers
!pip install tqdm

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple


In [None]:
import json
import random
import torch
import json
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional

from tqdm import tqdm
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score, recall_score
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaModel

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# List of relation types
keys = ['no_relation', 'per:title', 'org:top_members/employees',
        'org:country_of_headquarters', 'per:parents', 'per:age',
        'per:countries_of_residence', 'per:children', 'org:alternate_names',
        'per:charges', 'per:cities_of_residence', 'per:origin', 'org:founded_by',
        'per:employee_of', 'per:siblings', 'per:alternate_names', 'org:website',
        'per:religion', 'per:stateorprovince_of_death', 'org:parents',
        'org:subsidiaries', 'per:other_family', 'per:stateorprovinces_of_residence',
        'org:members', 'per:cause_of_death', 'org:member_of',
        'org:number_of_employees/members', 'per:country_of_birth',
        'org:shareholders', 'org:stateorprovince_of_headquarters',
        'per:city_of_death', 'per:date_of_birth', 'per:spouse',
        'org:city_of_headquarters', 'per:date_of_death', 'per:schools_attended',
        'org:political/religious_affiliation', 'per:country_of_death',
        'org:founded', 'per:stateorprovince_of_birth', 'per:city_of_birth',
        'org:dissolved']

# Assigning indices to the list elements and storing them in a dictionary
rel2id = {key: idx for idx, key in enumerate(keys)}
id2rel = {idx: key for key, idx in rel2id.items()}

# # Printing the dictionaries
# print(rel2id)
# print(id2rel)

In [None]:
# tell pytorch to use the gpu if available
if torch.cuda.is_available():

    DEVICE = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not
else:
    print('No GPU available, using the CPU instead.')
    DEVICE = torch.device("cpu")

USE_CUDA = torch.cuda.is_available()
if USE_CUDA:
    print("using GPU")
else:
    print("using CPU")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

There are 1 GPU(s) available.
We will use the GPU: NVIDIA GeForce RTX 3090
using GPU


In [None]:
#RoBERTa tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base", do_lower_case=True)
print('RoBERTa tokenizer loaded')

RoBERTa tokenizer loaded


In [None]:
"""
By setting the same seed, it ensures that the same sequence of random numbers is generated
each time the program is run, thus making the experimental results reproducible and aiding
in debugging and verifying the robustness of the model.
"""

def setup_seed(seed):
    # Sets the seed for generating random numbers for the CPU.

    # Sets the seed for generating random numbers for all GPUs.
    torch.cuda.manual_seed_all(seed)

    # Sets the seed for generating random numbers with NumPy.
    np.random.seed(seed)

    # Sets the seed for the built-in Python random module.
    random.seed(seed)

setup_seed(44)

In [None]:
def load_tacred_dataset(file_path):
    """
    Load the TACRED dataset from a JSON file.

    Args:
    file_path (str): The path to the JSON file containing the dataset.

    Returns:
    dict: The loaded dataset.
    """
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

Our data preprocessing function `prepare_data(data)` plays a key role in natural language processing tasks. This function takes raw data and converts it into an easy-to-process format. It goes through each data instance, extracts the start and end index of the entity, extracts the entity text from it, and then gets the relationship label. It then combines words from the original text into sentences and organizes this processed information into dictionary form. Ultimately, it adds the dictionary of each data instance to a list and returns the list as output. This function not only provides the function of data format conversion, but also facilitates subsequent model training and text analysis.

In [None]:
def prepare_data(data):
    # Get the number of instances in the data
    n = len(data)

    # Initialize an empty list to store processed data
    info = []

    # Iterate through each instance in the data
    for i in range(n):
        # Initialize an empty dictionary for storing processed information of a single data instance
        single_data = {}

        # Extract start and end indices of subject and object entities
        ss = data[i]["subj_start"]
        se = data[i]["subj_end"]
        os = data[i]["obj_start"]
        oe = data[i]["obj_end"]

        # Extract subject and object tokens based on their start and end indices
        subj = data[i]['token'][ss: se+1]
        obj = data[i]['token'][os: oe+1]

        # Convert subject and object tokens into strings
        ent1 = ' '.join(subj)
        ent2 = ' '.join(obj)

        # Extract relation label
        rel = data[i]["relation"]

        # Concatenate all tokens to form the original sentence
        text = " ".join(data[i]["token"])

        # Store processed information in the dictionary
        single_data['rel'] = rel
        single_data['ent1'] = ent1
        single_data['ent2'] = ent2
        single_data['text'] = text

        # Append processed information of a single data instance to the list
        info.append(single_data)

    # Return the processed data
    return info

The `process_relation_extraction_data(info, max_length=64)` function plays a vital role in the relationship extraction task. This function is used to process raw data and prepare it into a format suitable for model input. First, the function traverses the provided data information and integrates entities, text information, and relationship tags. For each data instance, the function will splice the entity and text information into a sentence, and perform word segmentation and filling operations. Next, the sentences are tokenized by adding special tags. The function truncates or pads sentences according to the given maximum length to ensure consistent sentence lengths. Finally, the function adds the processed sentences and their corresponding attention masks to the output data. In this way, we are able to transform the raw data into a form suitable for model training and prepare it for further relationship extraction tasks.

In [None]:
def process_relation_extraction_data(info, max_length=64):
    # Initialize a dictionary to store processed data
    data = {}
    data['label'] = []  # List to store relation labels
    data['mask'] = []   # List to store attention masks
    data['text'] = []   # List to store tokenized and padded texts

    # Iterate through each instance in the provided 'info' data
    for line in info:
        # Check if the relation label is present in the 'rel2id' dictionary
        if line['rel'] not in rel2id:
            # If the relation label is not found, assign label 0 (for unknown relation)
            data['label'].append(0)
        else:
            # If the relation label is found, assign its corresponding index
            data['label'].append(rel2id[line['rel']])

        # Concatenate subject, object, and text to form a single sentence
        sent = line['ent1'] + line['ent2'] + line['text']

        # Tokenize the concatenated sentence and add special tokens
        indexed_tokens = tokenizer.encode(sent, add_special_tokens=True)

        # Determine the available length of the tokenized sentence
        avai_len = len(indexed_tokens)

        # Pad the tokenized sentence with 0s to match the maximum length
        while len(indexed_tokens) < max_length:
            indexed_tokens.append(0)

        # Trim the tokenized sentence to the maximum length
        indexed_tokens = indexed_tokens[:max_length]

        # Convert the tokenized sentence to a PyTorch tensor
        indexed_tokens = torch.tensor(indexed_tokens).long().unsqueeze(0)  # (1, L)

        # Create an attention mask for the tokenized sentence
        att_mask = torch.zeros(indexed_tokens.size()).long()  # (1, L)
        att_mask[0, :avai_len] = 1

        # Append the tokenized and padded sentence, and its attention mask to the data dictionary
        data['text'].append(indexed_tokens)
        data['mask'].append(att_mask)

    return data

In [None]:
def convert_data_to_tensors(data):
    # Extract text, mask, and label from the data
    text = data['text']
    mask = data['mask']
    label = data['label']

    # Convert text and mask tensors to numpy arrays
    text = [t.numpy() for t in text]
    mask = [t.numpy() for t in mask]

    # Convert numpy arrays to PyTorch tensors
    text = torch.tensor(text)
    mask = torch.tensor(mask)
    label = torch.tensor(label)

    return text, mask, label

The `RoBERTa_Classifier` class implements a RoBERTa-based classifier for relation extraction tasks. The model utilizes pre-trained RoBERTa encoder to extract text features and perform classification through fully connected layers. During the initialization process, the pretrained RoBERTa model is loaded by calling the `from_pretrained` method, and a fully connected layer containing `label_num` output categories is defined. The model also includes a dropout layer to prevent overfitting and uses cross-entropy loss as the loss function for classification.

In the forward propagation process, the input data passes through the RoBERTa encoder to obtain text feature representation, and then is classified through the fully connected layer. The final output is the class probability distribution predicted by the model. If a label is provided, the model computes the cross-entropy loss and returns the loss value and the predicted probability distribution; otherwise, only the predicted probability distribution is returned.

When in use, we can initialize the model according to specific tasks and number of labels, and move it to a designated computing device (such as GPU). Use `labels_num=len(rel2id)` to get the number of relationship labels and create the corresponding model instance.

In [None]:
class RoBERTa_Classifier(nn.Module):
    def __init__(self, label_num):
        super().__init__()
        # Initialize the RoBERTa encoder from pre-trained weights
        self.encoder = RobertaModel.from_pretrained("roberta-base")
        # Dropout layer to prevent overfitting
        self.dropout = nn.Dropout(0.1, inplace=False)
        # Fully connected layer for classification
        self.fc = nn.Linear(768, label_num)  # 768 is the hidden size of RoBERTa
        # Cross-entropy loss criterion
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x, attention_mask, label=None):
        # Pass the input through the RoBERTa encoder
        x = self.encoder(x, attention_mask=attention_mask)[0]  # Output is tuple (last_hidden_state, pooler_output), we take the last_hidden_state
        # Take only the first token's output (CLS token)
        x = x[:, 0, :]
        # Apply dropout
        x = self.dropout(x)
        # Pass through the fully connected layer
        x = self.fc(x)
        # If label is not provided, return logits only
        if label is None:
            return None, x
        else:
            # Calculate the cross-entropy loss and return both loss and logits
            return self.criterion(x, label), x

labels_num=len(rel2id)
model = RoBERTa_Classifier(labels_num)
model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RoBERTa_Classifier(
  (encoder): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): 

The `train` function is an important tool for model training. It receives a neural network model, a training data set, a validation data set, and some hyperparameters such as the number of training epochs, learning rate, and batch size. During training, this function sets the model to training mode and uses stochastic gradient descent (SGD) as the optimizer. It iterates over the training data set, performing forward propagation, computing loss, back propagation, and parameter updates in each batch. At the same time, it monitors the loss and accuracy of the model in real time, and uses the tqdm library to display the training progress. At the end of each cycle, this function calculates the accuracy, loss, F1 score and other evaluation indicators of the model on the training set and validation set, and prints out these indicators.

Additionally, at the end of each epoch, the function calls the `eval` function to evaluate the model on the validation set. After training is completed, the model status will be saved to a .pth file for subsequent model application or further training. Overall, the `train` function is a core tool for completing model training and monitoring performance.

In the `train` function, in addition to monitoring the loss and accuracy of the model, the F1 score is also used as an evaluation metric for model performance. At the end of each training cycle, the model's accuracy, loss, and F1 score under three different weighting methods on the training set and validation set were calculated: macro average (macro), micro average (micro), and weighted average (weighted ).

The F1 score is an indicator that combines the precision and recall of the model. It comprehensively evaluates the imbalanced class distribution and classifier effect. It is calculated as the harmonic mean of precision and recall, which can be used to evaluate the performance of the model in multi-classification tasks. During this training process, three different ways of calculating F1 scores were used to comprehensively evaluate the model's performance on different categories.

Specifically, when calculating these evaluation indicators, the `f1_score` function is used, in which the `average` parameter is set to `'macro'`, `'micro'` and `'weighted'` respectively to calculate the macro average, micro Average and weighted average F1 scores. These F1 scores provide an assessment of model performance at different levels, helping to better understand how the model performs in classification tasks.

In [None]:
def train(net, train_dataset, dev_dataset, num_epochs, learning_rate, batch_size):

    # Set the network to training mode
    net.train()

    # Define the optimizer
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0)

    # Create a data loader for training data
    train_iter = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)

    # Lists to store metrics
    epoch_losses = []
    epoch_accuracies = []

    for epoch in range(num_epochs):

        print('Training...')

        correct = 0
        total = 0
        iter = 0
        all_pred = []
        all_true = []

        # Initialize tqdm to show progress bar
        progress_bar = tqdm(train_iter, desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch')

        for text, mask, y in progress_bar:
            iter += 1
            optimizer.zero_grad()

            # If the batch size is not equal to the specified batch size, break the loop
            if text.size(0) != batch_size:
                break

            # Reshape text and mask tensors
            text = text.reshape(batch_size, -1)
            mask = mask.reshape(batch_size, -1)

            # Move tensors to GPU if available
            if USE_CUDA:
                text = text.cuda()
                mask = mask.cuda()
                y = y.cuda()

            # Forward pass
            loss, logits = net(text, mask, y)

            # Backpropagation
            loss.backward()
            optimizer.step()

            # Compute accuracy
            _, predicted = torch.max(logits.data, 1)
            total += text.size(0)
            correct += predicted.data.eq(y.data).cpu().sum()

            # Collect predictions and true labels
            all_pred.extend(predicted.cpu().numpy())
            all_true.extend(y.cpu().numpy())

            # Update progress bar
            progress_bar.set_postfix({'loss': loss.item(), 'accuracy': correct.item() / total})

        # After the end of each epoch, compute metrics
        accuracy = correct.cpu().numpy().tolist()/total
        loss = loss.detach().cpu()

        # Compute F1 scores
        macro_f1 = f1_score(all_true, all_pred, average='macro')
        micro_f1 = f1_score(all_true, all_pred, average='micro')
        weighted_f1 = f1_score(all_true, all_pred, average='weighted')

        epoch_losses.append(loss)
        epoch_accuracies.append(accuracy)

        # Print metrics
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print(f"Loss: {loss.mean().numpy().tolist()}, Accuracy: {accuracy}")
        print(f"Macro F1: {macro_f1}, Micro F1: {micro_f1}, Weighted F1: {weighted_f1}")

        print("Validation...")

        dev_acc, dev_macro_f1, dev_micro_f1, dev_weighted_f1, dev_precision_list, dev_recall_list= eval(net, dev_dataset, batch_size)

        print()

    torch.save(model.state_dict(), 'RoBERTa_EX1_80_model.pth')

    return epoch_losses, epoch_accuracies


In [None]:
from sklearn.metrics import precision_score, recall_score

def eval(net, dataset, batch_size):
    # Set the network to evaluation mode
    net.eval()

    # Create an iterator for the evaluation dataset
    eval_iter = DataLoader(dataset, batch_size, shuffle=False)

    # Lists to store predictions and true labels
    all_pred = []
    all_true = []

    # Lists to store evaluation metrics
    acc_list = []
    macro_f1_list = []
    micro_f1_list = []
    weighted_f1_list = []
    precision_list = []  # List to store precision scores
    recall_list = []     # List to store recall scores

    with torch.no_grad():
        correct = 0  # Counter for correctly classified samples
        total = 0    # Counter for total samples
        # Progress bar for visualization during evaluation
        progress_bar = tqdm(eval_iter, desc='Evaluation', unit='batch')

        # Iterate through each batch in the evaluation dataset
        for text, mask, y in progress_bar:
            # If the batch size is not as expected, break the loop
            if text.size(0) != batch_size:
                break

            # Reshape text and mask tensors
            text = text.reshape(batch_size, -1)
            mask = mask.reshape(batch_size, -1)

            # Move tensors to GPU if available
            if USE_CUDA:
                text, mask, y = text.cuda(), mask.cuda(), y.cuda()

            # Forward pass through the network
            outputs = net(text, mask)
            # Unpack outputs if it's a tuple (contains both loss and logits)
            loss, logits = outputs if isinstance(outputs, tuple) else (None, outputs)

            # Calculate predicted labels
            _, predicted = torch.max(logits, 1)
            # Update total and correct counts
            total += y.size(0)
            correct += (predicted == y).sum().item()

            # Store predicted and true labels for computing F1 score later
            all_pred.extend(predicted.cpu().numpy())
            all_true.extend(y.cpu().numpy())

            # Calculate accuracy for the current batch
            accuracy = correct / total
            progress_bar.set_postfix({'accuracy': accuracy})

        # Calculate overall accuracy
        acc = correct / total
        # Calculate F1 scores
        macro_f1 = f1_score(all_true, all_pred, average='macro')
        micro_f1 = f1_score(all_true, all_pred, average='micro')
        weighted_f1 = f1_score(all_true, all_pred, average='weighted')
        # Calculate precision and recall
        precision = precision_score(all_true, all_pred, average='macro')
        recall = recall_score(all_true, all_pred, average='macro')

        # Append metrics to respective lists
        acc_list.append(acc)
        macro_f1_list.append(macro_f1)
        micro_f1_list.append(micro_f1)
        weighted_f1_list.append(weighted_f1)
        precision_list.append(precision)
        recall_list.append(recall)

        # Print evaluation results
        print(f"Eval Result: right {correct}, total {total}, Acc: {acc:.4f}")
        print(f"Macro F1: {macro_f1:.4f}, Micro F1: {micro_f1:.4f}, Weighted F1: {weighted_f1:.4f}")
        print(f"Precision: {precision:.4f}, Recall: {recall:.4f}")

        # Return evaluation metrics lists
        return acc_list, macro_f1_list, micro_f1_list, weighted_f1_list, precision_list, recall_list


In [None]:
# Load data from JSON file
train_data = load_tacred_dataset('/home/featurize/data/different_size_dataset/80/train_80%.json')
dev_data = load_tacred_dataset('/home/featurize/data/TACRED/dev.json')
test_data = load_tacred_dataset('/home/featurize/data/TACRED/test.json')

In [None]:
# ent1+ent2+text dataset
train_info = prepare_data(train_data)
dev_info = prepare_data(dev_data)
test_info = prepare_data(test_data)

print(len(train_info))
print(len(dev_info))
print(len(test_info))

54496
22631
15509


In [None]:
train_data = process_relation_extraction_data(train_info, 128)
dev_data = process_relation_extraction_data(dev_info, 128)
test_data = process_relation_extraction_data(test_info, 128)

print(len(train_data['label']))
print(len(dev_data['label']))
print(len(test_data['label']))

54496
22631
15509


In [None]:
# Preprocess train data
train_text, train_mask, train_label = convert_data_to_tensors(train_data)
print("--train data--")
print(train_text.shape)
print(train_mask.shape)
print(train_label.shape)

# Preprocess dev data
dev_text, dev_mask, dev_label = convert_data_to_tensors(dev_data)
print("--eval data--")
print(dev_text.shape)
print(dev_mask.shape)
print(dev_label.shape)

# Preprocess test data
test_text, test_mask, test_label = convert_data_to_tensors(test_data)
print("--test data--")
print(test_text.shape)
print(test_mask.shape)
print(test_label.shape)

  text = torch.tensor(text)


--train data--
torch.Size([54496, 1, 128])
torch.Size([54496, 1, 128])
torch.Size([54496])
--eval data--
torch.Size([22631, 1, 128])
torch.Size([22631, 1, 128])
torch.Size([22631])
--test data--
torch.Size([15509, 1, 128])
torch.Size([15509, 1, 128])
torch.Size([15509])


In [None]:
train_dataset = torch.utils.data.TensorDataset(train_text, train_mask, train_label)
dev_dataset = torch.utils.data.TensorDataset(dev_text, dev_mask, dev_label)

In [None]:
epoch_losses, epoch_accuracies = train(model, train_dataset, dev_dataset, 15, 0.002, 16)

Training...


Epoch 1/15: 100%|██████████| 3406/3406 [04:43<00:00, 12.02batch/s, loss=0.286, accuracy=0.828] 


Epoch 1/15
Loss: 0.2859255373477936, Accuracy: 0.8277121256605989
Macro F1: 0.08742568882672813, Micro F1: 0.827712125660599, Weighted F1: 0.7783901028334086
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:42<00:00, 32.98batch/s, accuracy=0.816]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 18465, total 22624, Acc: 0.8162
Macro F1: 0.1918, Micro F1: 0.8162, Weighted F1: 0.7706
Precision: 0.2595, Recall: 0.1813

Training...


Epoch 2/15: 100%|██████████| 3406/3406 [04:34<00:00, 12.42batch/s, loss=0.48, accuracy=0.874]  


Epoch 2/15
Loss: 0.479958176612854, Accuracy: 0.8735687022900763
Macro F1: 0.3564654790514785, Micro F1: 0.8735687022900763, Weighted F1: 0.855931833604945
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.70batch/s, accuracy=0.841]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19033, total 22624, Acc: 0.8413
Macro F1: 0.3368, Micro F1: 0.8413, Weighted F1: 0.8065
Precision: 0.4983, Recall: 0.2943

Training...


Epoch 3/15: 100%|██████████| 3406/3406 [04:34<00:00, 12.40batch/s, loss=0.248, accuracy=0.893] 


Epoch 3/15
Loss: 0.24797911942005157, Accuracy: 0.8927260716382853
Macro F1: 0.4951776167120667, Micro F1: 0.8927260716382853, Weighted F1: 0.8835423903579418
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.65batch/s, accuracy=0.848]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19193, total 22624, Acc: 0.8483
Macro F1: 0.4662, Micro F1: 0.8483, Weighted F1: 0.8346
Precision: 0.5734, Recall: 0.4342

Training...


Epoch 4/15: 100%|██████████| 3406/3406 [04:35<00:00, 12.36batch/s, loss=0.381, accuracy=0.909] 


Epoch 4/15
Loss: 0.38075315952301025, Accuracy: 0.9089290957134468
Macro F1: 0.5894230674885355, Micro F1: 0.9089290957134468, Weighted F1: 0.9031944804279178
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.64batch/s, accuracy=0.86] 
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19454, total 22624, Acc: 0.8599
Macro F1: 0.5054, Micro F1: 0.8599, Weighted F1: 0.8464
Precision: 0.5859, Recall: 0.4792

Training...


Epoch 5/15: 100%|██████████| 3406/3406 [04:34<00:00, 12.41batch/s, loss=0.43, accuracy=0.923]   


Epoch 5/15
Loss: 0.4300401508808136, Accuracy: 0.9229484732824428
Macro F1: 0.6424622332205538, Micro F1: 0.9229484732824428, Weighted F1: 0.9188972664454687
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.66batch/s, accuracy=0.861]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19480, total 22624, Acc: 0.8610
Macro F1: 0.5133, Micro F1: 0.8610, Weighted F1: 0.8470
Precision: 0.6416, Recall: 0.4615

Training...


Epoch 6/15: 100%|██████████| 3406/3406 [04:33<00:00, 12.45batch/s, loss=0.126, accuracy=0.937]  


Epoch 6/15
Loss: 0.1256888508796692, Accuracy: 0.9372064004697592
Macro F1: 0.7096487615362621, Micro F1: 0.9372064004697592, Weighted F1: 0.9345604565355032
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.81batch/s, accuracy=0.856]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19367, total 22624, Acc: 0.8560
Macro F1: 0.5500, Micro F1: 0.8560, Weighted F1: 0.8500
Precision: 0.6235, Recall: 0.5362

Training...


Epoch 7/15: 100%|██████████| 3406/3406 [04:34<00:00, 12.41batch/s, loss=0.288, accuracy=0.949]  


Epoch 7/15
Loss: 0.2884180545806885, Accuracy: 0.9494825308279506
Macro F1: 0.7714316547484983, Micro F1: 0.9494825308279506, Weighted F1: 0.9480376863815891
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.65batch/s, accuracy=0.853]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19298, total 22624, Acc: 0.8530
Macro F1: 0.5391, Micro F1: 0.8530, Weighted F1: 0.8481
Precision: 0.6111, Recall: 0.5180

Training...


Epoch 8/15: 100%|██████████| 3406/3406 [04:33<00:00, 12.47batch/s, loss=0.068, accuracy=0.961]   


Epoch 8/15
Loss: 0.06795675307512283, Accuracy: 0.9611164122137404
Macro F1: 0.8101521838700899, Micro F1: 0.9611164122137404, Weighted F1: 0.960185180335678
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:42<00:00, 33.14batch/s, accuracy=0.856]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19362, total 22624, Acc: 0.8558
Macro F1: 0.5239, Micro F1: 0.8558, Weighted F1: 0.8452
Precision: 0.6222, Recall: 0.4796

Training...


Epoch 9/15: 100%|██████████| 3406/3406 [04:33<00:00, 12.45batch/s, loss=0.0169, accuracy=0.971]  


Epoch 9/15
Loss: 0.016938338056206703, Accuracy: 0.9709886964180857
Macro F1: 0.8566383722038654, Micro F1: 0.9709886964180857, Weighted F1: 0.9704951708388769
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.58batch/s, accuracy=0.853]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19303, total 22624, Acc: 0.8532
Macro F1: 0.5345, Micro F1: 0.8532, Weighted F1: 0.8467
Precision: 0.6000, Recall: 0.5032

Training...


Epoch 10/15: 100%|██████████| 3406/3406 [04:34<00:00, 12.43batch/s, loss=0.158, accuracy=0.977]   


Epoch 10/15
Loss: 0.15820422768592834, Accuracy: 0.9767872871403406
Macro F1: 0.8769044516992505, Micro F1: 0.9767872871403406, Weighted F1: 0.9764256182216842
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.66batch/s, accuracy=0.838]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 18963, total 22624, Acc: 0.8382
Macro F1: 0.5492, Micro F1: 0.8382, Weighted F1: 0.8417
Precision: 0.5482, Recall: 0.5853

Training...


Epoch 11/15: 100%|██████████| 3406/3406 [04:34<00:00, 12.39batch/s, loss=0.0218, accuracy=0.982]  


Epoch 11/15
Loss: 0.02179173193871975, Accuracy: 0.9821454785672343
Macro F1: 0.8997243082292761, Micro F1: 0.9821454785672343, Weighted F1: 0.9819615891056651
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.78batch/s, accuracy=0.853]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19292, total 22624, Acc: 0.8527
Macro F1: 0.5622, Micro F1: 0.8527, Weighted F1: 0.8504
Precision: 0.5820, Recall: 0.5685

Training...


Epoch 12/15: 100%|██████████| 3406/3406 [04:35<00:00, 12.37batch/s, loss=0.0658, accuracy=0.985]  


Epoch 12/15
Loss: 0.0658058300614357, Accuracy: 0.9850447739283618
Macro F1: 0.9148579560233158, Micro F1: 0.9850447739283618, Weighted F1: 0.9849165134274046
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.70batch/s, accuracy=0.853]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19287, total 22624, Acc: 0.8525
Macro F1: 0.5448, Micro F1: 0.8525, Weighted F1: 0.8478
Precision: 0.6359, Recall: 0.5139

Training...


Epoch 13/15: 100%|██████████| 3406/3406 [04:33<00:00, 12.44batch/s, loss=0.0213, accuracy=0.987]  


Epoch 13/15
Loss: 0.02133133076131344, Accuracy: 0.9872834703464475
Macro F1: 0.9230674717125289, Micro F1: 0.9872834703464475, Weighted F1: 0.9871887051046606
Validation...


Evaluation: 100%|█████████▉| 1414/1415 [00:43<00:00, 32.72batch/s, accuracy=0.851]
  _warn_prf(average, modifier, msg_start, len(result))


Eval Result: right 19243, total 22624, Acc: 0.8506
Macro F1: 0.5567, Micro F1: 0.8506, Weighted F1: 0.8481
Precision: 0.5767, Recall: 0.5520

Training...


Epoch 14/15: 100%|█████████▉| 3398/3406 [04:34<00:00, 12.34batch/s, loss=0.00411, accuracy=0.99]  

In [None]:
import matplotlib.pyplot as plt

num_epochs = 15

# Plotting
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs + 1), epoch_losses, label='Loss')
plt.title('Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs + 1), epoch_accuracies, label='Accuracy', color='orange')
plt.title('Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
test_dataset = torch.utils.data.TensorDataset(test_text, test_mask, test_label)
acc_list, macro_f1_list, micro_f1_list, weighted_f1_list, precision_list, recall_list = eval(model, test_dataset, 16)

In [None]:
print(acc_list)
print(precision_list)
print(recall_list)
print()
print(macro_f1_list)
print(micro_f1_list)
print(weighted_f1_list)