In [None]:
accuracy_log_name = "bert_model_accuracy_log.txt"
bert_model_path = "..\\custom_models\\bert_model"
tokenized_bookcorpus_dataset_path = '..\\custom_datasets\\tokenized_bookcorpus_lines_dataset'
tokenized_wikipedia_dataset_path = '..\\custom_datasets\\tokenized_wikipedia_lines_dataset'

In [3]:
import os
import sys
sys.path.append(os.path.realpath("../../"))

In [4]:
import evaluate
import gc
import torch
from datasets import load_from_disk
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, BertForPreTraining, DefaultDataCollator
from tqdm.auto import tqdm
from TokenizedBERTDatasetModule import TokenizedBERTDataset

In [3]:
tokenized_bookcorpus_dataset = load_from_disk(tokenized_bookcorpus_dataset_path)
tokenized_wikipedia_dataset = load_from_disk(tokenized_wikipedia_dataset_path)

Loading dataset from disk:   0%|          | 0/18 [00:00<?, ?it/s]

Loading dataset from disk:   0%|          | 0/42 [00:00<?, ?it/s]

In [4]:
batch_size = 8

data_loader = DataLoader(
    TokenizedBERTDataset([tokenized_bookcorpus_dataset['validation'], tokenized_wikipedia_dataset['validation']]),
    batch_size=batch_size, collate_fn=DefaultDataCollator(), shuffle=True, pin_memory=True, pin_memory_device="cuda:0"
)

In [5]:
bert_model = BertForPreTraining.from_pretrained(bert_model_path)

In [6]:
device = torch.device("cuda") if torch.cuda.is_available else torch.device("cpu")
bert_model.to(device)

BertForPreTraining(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

In [7]:
def save_batch_mlm_accuracy(batch, model_outputs, mlm_accuracy_metric):
    batch_predictions = []
    batch_references = []
    for entry_index in range(len(batch['attention_mask'])):
        try:
            final_attention_index = (batch['attention_mask'][entry_index] == 0).nonzero(as_tuple=True)[0][0].item()
        except IndexError:
            final_attention_index = len(batch['attention_mask'][entry_index])

        predictions = torch.argmax(model_outputs.prediction_logits[entry_index], dim=-1)[:final_attention_index]
        references = []
        for i in range(final_attention_index):
            label_entry = batch['labels'][entry_index][i].item()
            if(label_entry == -100):
                references.append(batch['input_ids'][entry_index][i].item())
            else:
                references.append(label_entry)

        batch_predictions += predictions
        batch_references += references
    
    mlm_accuracy_metric.add_batch(predictions = batch_predictions, references = batch_references)

In [8]:
def save_batch_nsp_accuracy(batch, model_outputs, nsp_accuracy_metric):
    predictions = torch.argmax(model_outputs.seq_relationship_logits, dim=-1)
    references = batch['next_sentence_label']
    nsp_accuracy_metric.add_batch(predictions = predictions, references = references)

In [9]:
def calculate_model_accuracies(model, early_stop = 0):
    current_index = 0
    model.eval()
    mlm_accuracy_metric = evaluate.load("accuracy")
    nsp_accuracy_metric = evaluate.load("accuracy")
    progress_bar = tqdm(range(len(data_loader))) if early_stop == 0 else tqdm(range(early_stop))
    for batch in data_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        save_batch_mlm_accuracy(batch, outputs, mlm_accuracy_metric)
        save_batch_nsp_accuracy(batch, outputs, nsp_accuracy_metric)

        #   Descomente as linhas abaixo somente se estiver tendo problemas de consumo de memória ao executar o script, já que elas aumentam o tempo
        #de execução
        
        # gc.collect()
        # torch.cuda.empty_cache()
        progress_bar.update(1)
        current_index += 1
        if(early_stop > 0 and current_index >= early_stop):
            break
    return mlm_accuracy_metric.compute(), nsp_accuracy_metric.compute()

In [5]:
def write_accuracies_to_log(accuracy_log, mlm_accuracy, nsp_accuracy):
    accuracy_log.write("Masked Language Model Accuracy: " + str(mlm_accuracy) + "\n")
    accuracy_log.write("Next Sentence Prediction Accuracy: " + str(nsp_accuracy) + "\n")

In [4]:
accuracy_log = open(accuracy_log_name, mode="a", encoding="utf-8")

In [12]:
for evaluation_index in range(10):
    mlm_accuracy, nsp_accuracy = calculate_model_accuracies(bert_model, early_stop = 100)
    accuracy_log.write("EVALUATION " + str(evaluation_index + 1) + ":\n")
    write_accuracies_to_log(accuracy_log, mlm_accuracy, nsp_accuracy)
accuracy_log.close()

  0%|          | 0/100 [00:00<?, ?it/s]

KeyboardInterrupt: 