In [1]:
import numpy as np
import torch
import torch.nn.functional as F
from pytorch_transformers import AdamW, WarmupLinearSchedule
from torch import nn
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset)

from seqeval.metrics import classification_report
from model.xlmr_for_token_classification import XLMRForTokenClassification
from utils.train_utils import add_xlmr_args, evaluate_model
from utils.data_utils import NerProcessor, create_dataset, convert_examples_to_features

In [2]:
def evaluate_model(model, eval_dataset, label_list, batch_size, device):
     """
     Evaluates an NER model on the eval_dataset provided.
     Returns:
          F1_score: Macro-average f1_score on the evaluation dataset.
          Report: detailed classification report 
     """
#      print(eval_dataset.shape)
#      print(eval_dataset)
     # Run prediction for full data
     eval_sampler = SequentialSampler(eval_dataset)
     eval_dataloader = DataLoader(
          eval_dataset, sampler=eval_sampler, batch_size=batch_size)

     model.eval() # turn of dropout

     y_true = []
     y_pred = []

     label_map = {i: label for i, label in enumerate(label_list, 1)}
     print(label_map)
     for input_ids, label_ids, l_mask, valid_ids in eval_dataloader:

          input_ids = input_ids.to(device)
          label_ids = label_ids.to(device)

          valid_ids = valid_ids.to(device)
          l_mask = l_mask.to(device)

          with torch.no_grad():
               logits = model(input_ids, labels=None, labels_mask=None,
                              valid_mask=valid_ids)

          logits = torch.argmax(logits, dim=2)
          logits = logits.detach().cpu().numpy()
          label_ids = label_ids.cpu().numpy()

          for i, cur_label in enumerate(label_ids):
               temp_1 = []
               temp_2 = []

               for j, m in enumerate(cur_label):
                    if valid_ids[i][j]:  # if it's a valid label
                         temp_1.append(label_map[m])
                         temp_2.append(label_map[logits[i][j]])

               assert len(temp_1) == len(temp_2)
               y_true.append(temp_1)
               y_pred.append(temp_2)
#      print(y_true, y_pred)
     report = classification_report(y_true, y_pred, digits=4)
     f1 = f1_score(y_true, y_pred, average='macro')

     return f1, report, y_true, y_pred

In [3]:
torch.cuda.device_count()

4

In [7]:
torch.cuda.set_device(1)

In [8]:
num_labels = 6 # 5 + 1 
hidden_size = 768 #if 'base' in args.pretrained_path else 1024 # TODO: move this inside model.__init__

device = 'cuda' if (torch.cuda.is_available()) else 'cpu'

# creating model
model = XLMRForTokenClassification(pretrained_path="./pretrained_models/xlmr.base/",
                                   n_labels=num_labels, hidden_size=hidden_size,
                                   dropout_p=0.2, device=device)

2021-10-28 16:57:37 | INFO | fairseq.file_utils | loading archive file ./pretrained_models/xlmr.base/
2021-10-28 16:57:41 | INFO | fairseq.tasks.multilingual_masked_lm | dictionary: 250001 types


In [9]:
model.to(device)

XLMRForTokenClassification(
  (linear_1): Linear(in_features=768, out_features=768, bias=True)
  (classification_head): Linear(in_features=768, out_features=6, bias=True)
  (xlmr): RobertaHubInterface(
    (model): RobertaModel(
      (encoder): RobertaEncoder(
        (sentence_encoder): TransformerEncoder(
          (dropout_module): FairseqDropout()
          (embed_tokens): Embedding(250002, 768, padding_idx=1)
          (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1)
          (layernorm_embedding): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (layers): ModuleList(
            (0): TransformerEncoderLayerBase(
              (self_attn): MultiheadAttention(
                (dropout_module): FairseqDropout()
                (k_proj): Linear(in_features=768, out_features=768, bias=True)
                (v_proj): Linear(in_features=768, out_features=768, bias=True)
                (q_proj): Linear(in_features=768, out_features=768, bias=True)


In [10]:
state_dict = torch.load(open('./results/finetuned_models/model_eng_scientific_xb_v5/model.pt', ), 'rb')
model.load_state_dict(state_dict)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 64: invalid start byte