In [1]:
# !nvidia-smi
import torch
from utils.utils import predict, show

## check cuda availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
import sys
import json
import torch
import argparse
import numpy as np 

import model.model as module_arch
import utils.dataloader as module_data

from utils.metric import evaluate 
from utils.utils import decode_tags
from utils.metric import sequence_f1
from utils.parse_config import ConfigParser

resume = 'storage/best_model/model_best.pth'

args = argparse.ArgumentParser(description='PyTorch Template')
args.add_argument('-c', '--config', default=None, type=str, help='config file path (default: None)')
args.add_argument('-r', '--resume', default=f"{resume}", type=str, help='path to latest checkpoint (default: None)')
args.add_argument('-d', '--device', default='cpu', type=str, help='indices of GPUs to enable (default: all)')
args.add_argument('--f', default='save', type=None)

# Check if in IPython environment
if any("ipykernel" in arg for arg in sys.argv):
    sys.argv = sys.argv[:1]  # Keep only the script name
    
config = ConfigParser.from_args(args)
logger = config.get_logger('test')

# setup dataloader instances
data_loader = config.init_obj('dataloader', module_data)

# build model architecturea
model = config.init_obj('arch', module_arch, num_tag=data_loader.num_tag, path_lm=data_loader.path_lm)

# get function handles of loss and metrics
metric_fns = {"sequence_f1": sequence_f1}

Train : 10409 sentences
Dev : 3486 sentences
Test : 3442 sentences
Max sents length: 512 tokens
num_vocab: 250002
num_tag: 105
num_span: 5
num_spantag: 417


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
# Load checkpoint
logger.info('Loading checkpoint: {} ...'.format(config.resume))
# checkpoint = torch.load(config.resume)
# state_dict = checkpoint['state_dict']
# model.load_state_dict(state_dict)

## 
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
checkpoint = torch.load(config.resume, map_location='cpu')
model.load_state_dict(checkpoint['state_dict'])
model.to(device)

INFO:test:Loading checkpoint: storage/best_model/model_best.pth ...


NERModel(
  (lm): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (Layer

In [4]:
from utils.utils import predict, show

text = """
เมื่อวันที่29ก.ค.55 สำนักข่าวบีบีซีรายงานว่า มหาวิทยาลัยออกซ์ฟอร์ดของอังกฤษได้แก้ไขกฎการแต่งกายภายในมหาวิทยาลัย หลังชมรมเพื่อความหลากหลายทางเพศยื่นคำร้องว่ากฎที่มีอยู่เดิมไม่เป็นธรรมกับกลุ่มนศข้ามเพศtransgenderใน
มหาวิทยาลัย
"""
# Setup
lm_path = data_loader.path_lm
ids2tag = data_loader.ids2spantag
max_sent_length = data_loader.sent_length

tokens, out = predict(model, text, lm_path, ids2tag, max_sent_length)
tokens = [tk for tk in tokens if tk!=data_loader.pad]

print("|".join(tokens), "\n")
[show(x) for x in out];

<s>|เมื่อ|วันที่|29|ก|.|ค|.|55||สํานัก|ข่าว||บี|บี|ซี||รายงาน|ว่า|มหาวิทยาลัย||ออก|ซ์||ฟอร์ด|ของ||อังกฤษ|ได้||แก้ไข||กฎ|การ||แต่ง|กาย||ภายใน|มหาวิทยาลัย|หลัง||ชม|รม|เพื่อ|ความ|หลากหลาย|ทาง|เพศ||ยื่น|คํา|ร้อง|ว่า||กฎ|ที่|มี|อยู่||เดิม|ไม่|เป็น|ธรรม|กับ|กลุ่ม|น|ศ||ข้าม||เพศ|trans|gende|r|ใน|มหาวิทยาลัย|</s> 

[2, 9]         date           วันที่29ก.ค.55
[12, 16]       media          บีบีซี
[26, 28]       country        อังกฤษ
