<a href="https://colab.research.google.com/github/Vaibhav-Tyro/keyphrase-extraction-using-BERT/blob/main/Evaluate_pynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
""" Evaluate the model"""
%tb
import random
import logging
import os
import numpy as np
import torch
from transformers  import BertForTokenClassification
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

from torch.utils.data import DataLoader
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', default='data/msra/', help = "Directory containing the dataset")
parser.add_argument('--bert_model_dir', default = 'bert-base-chinese-pytorch', help = "Directory containing the BERT model in PyTorch")
parser.add_argument('--model_dir', default = 'experiments/base_model', help = "Directory containing params.json")
parser.add_argument('--seed', type = int, default = 23, help = "random seed for initialization")
parser.add_argument('--restore_file', default = 'best', help = "name of the file in 'model_dir' containing weights to load")
parser.add_argument('--multi_gpu', default = False, action='store_true', help = "whether to use multiple GPUs if available")

def evaluate(model, data_iterator, params, marks='Eval', verbose = False):
  """ Evaluate the model on 'steps' batches."""
  # set model to evaluation mode
  model.eval()

  idx2tag = params.idx2tag

  true_tags = []
  pred_tags = []

  # a running average object for loss
  loss_avg = utils.RunningAverage()

  for _ in range(params.eval_steps):
    #fetch the next evaluation batch
    Batch_data, batch_tags = next(data_iterator)
    batch_masks = batch_data.gt(0)

    loss = model(batch_data, token_type_ids = None, attention_mask = batch_masks, labels = batch_tags)
    if params.n_gpu > 1 and params.multi_gpu:
      loss = loss.mean()
    loss_avg.update(loss.item())

    batch_output = model(batch_data, token_type_ids=None, attention_mask = batch_masks) #shape: (batch_size, max_len, num_labels)

    batch_output = batch_output.detach().cpu().numpy()
    batch_tags = batch_tags.to('cpu').numpy()

    pred_tags.extend([idz2tag.get(idx) for indices in np.argmax(batch_output,axis = 2) for idx in indices])
    true_tags.extend([idx2tag.get(idx) for indices in batch_tags for idx in indices])
  assert len(pred_tags) == len(true_tags)

  #logging loss, f1 and report
  metrics = {}
  f1 = f1_score(true_tags, pred_tags)
  metrics['loss'] = loss_avg()
  metrics['f1'] = f1
  metrics_str = "; ".join("{}: {05.2f}".format(k,v) for k,v in metrics.items())
  logging.info("-{} metrics: ".format(mark) + metrics_str)

  if verbose:
    report = classification_report(true_tags, pred_tags)
    logging.info(report)
  return metrics

if __name__ == '__main__':
  args = parser.parse_args()

  #Load the parameters from json file
  json_path = os.path.join(args.model_dir, 'params.json')
  assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
  params = utils.Params(json_path)

  # Use  GPUs if available
  params.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  params.n_gpu = torch.cuda.device_count()
  params.multi_gpu = args.multi_gpu

  # set the random seed for reproducible experiments
  random.seed(args.seed)
  torch.manual_seed(args.seed)
  if params.n_gpu > 0:
    torch.cuda.manual_seed_all(args.seed) #set random seed for all GPUs
  params.seed = args.seed

  #set the logger
  utils.set_logger(os.path.join(args.model_dir, 'evaluate.log'))

  #create the input data pipeline
  logging.info("loading the dataset...")

  #Initialize the DataLoader
  data_loader = DataLoader(args.data_dir, args.bert_model_dir, params, token_pad_idx=0)

  #Load data
  test_data = data_loader.load_data('test')

  #specify the test set size
  params.test_size = test_data['size']
  params.eval_steps = params.test_size // params.batch_size
  test_data_iterator = data_loader.data_iterator(test_data, shuffle=False)

  logging.info("-done.")

  #define the model
  config_path = os.path.join(args.bert_model_dir, 'bert_config.json')
  config = BertConfig.from_json_file(config_path)
  model = BertForTokenClassification(config, num_labels = len(params.tag2idx))

  model.to(params.device)
  #Reload weights from the saved file
  utils.load_checkpoint(os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model)
  if args.fp16:
    model.half()
  if params.n_gpu > 1 and args.multi_gpu:
    model = torch.nn.DataParallel(model)

    logging.info("starting evaluation...")
    test_metrics = evaluate(model, test_data_iterator, params, mark='Test', verbose=True)



SystemExit: ignored

usage: ipykernel_launcher.py [-h] [--data_dir DATA_DIR]
                             [--bert_model_dir BERT_MODEL_DIR]
                             [--model_dir MODEL_DIR] [--seed SEED]
                             [--restore_file RESTORE_FILE] [--multi_gpu]
ipykernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-cf35deca-3b8c-42b2-a2a2-a244b916c1eb.json


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
!pip install transformers


Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/19/22/aff234f4a841f8999e68a7a94bdd4b60b4cebcfeca5d67d61cd08c9179de/transformers-3.3.1-py3-none-any.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 4.9MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 36.7MB/s 
Collecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 48.9MB/s 
Collecting tokenizers==0.8.1.rc2
[?25l  Downloading https://files.pythonhosted.org/packages/80/83/8b9fccb9e48eeb575ee19179e2bdde0ee9a1904f97de5f02d19016b8804f/tokenizers-0.8.1rc2-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)
[K 