In [None]:
import random
import json
from tqdm import tqdm
import unicodedata
from collections import OrderedDict

import torch
import onnx

In [None]:
# local modules
from ner_tokenizer_bio import NER_tokenizer_BIO
from bert_for_token_classification_pl import BertForTokenClassification_pl

In [None]:
BEST_MODEL_PATH='./model/epoch=4-step=660.ckpt'
TOKENIZER_PATH = './model/iot-nlu-tokenizer'
ONNX_FILE_PATH = './model/iot-nlu.onnx'

# インテントの種類数 (None=0, LED_ON=1, LED_OFF=2, READ_THERMO=3, OPEN=4, CLOSE=5, SET_TEMP=6)
NUM_INTENT_LABELS = 7

# スロットの種類数 (COL=1, COLLTDEV=2, LOC=3, ONOFFDEV=4, OPENABLE=5, TEMPDEV=6, TEMPERTURE_NUM=7, THMDEV=8)
NUM_ENTITY_TYPE = 8

In [None]:
# トークナイザのロード
# 固有表現のカテゴリーの数`num_entity_type`を入力に入れる必要がある。
tokenizer = NER_tokenizer_BIO.from_pretrained(
    TOKENIZER_PATH,
    num_entity_type=NUM_ENTITY_TYPE
)

In [None]:
# Load JointBert from ckpt
model = BertForTokenClassification_pl.load_from_checkpoint(
    BEST_MODEL_PATH
)
model.eval()
model.bert_tc.eval()
bert=model.bert_tc
bert.cuda()

In [None]:
# 個別に実行
entities           = [] # 正解の固有表現
entities_predicted = [] # 抽出された固有表現

text = unicodedata.normalize('NFKC', '会議室にある黄色い電灯の火を点灯してくださいな')

encoding, spans = tokenizer.encode_plus_untagged(
    text, return_tensors='pt', max_length=128
)
encoding = { k: v.cuda() for k, v in encoding.items() } 

with torch.no_grad():
    print(encoding)
    total_loss, logits_intent, logits_slot = bert(**encoding)
    scores_intent = logits_intent.cpu().numpy()
    scores_slots  = logits_slot[0].cpu().numpy().tolist()
    print(f'total_loss\n\ttype={type(total_loss)}\n\tsize={total_loss.size}, val={total_loss}')
    print(f'logits_intent\n\ttype={type(logits_intent)}\n\tshape={logits_intent.shape}\n\tval={logits_intent}')
    print(f'logits_slot\n\ttype={type(logits_slot)}\n\tshape={logits_slot.shape}\n\tval={logits_slot}')

# Intent 分類スコアを Intent に変換する
intent = scores_intent.argmax(-1)[0]
# Slot 分類スコアを固有表現に変換する
entities_predicted = tokenizer.convert_bert_output_to_entities(
    text, scores_slots, spans
)

print("入力",text)
print("予測 intent  :", intent)
print("予測 entities:", json.dumps(entities_predicted, indent=2, ensure_ascii=False))

In [None]:
input_ids      = encoding["input_ids"]
attention_mask = encoding["attention_mask"]
token_type_ids = encoding["token_type_ids"]
print(input_ids)
print(attention_mask)
print(token_type_ids)
input_ids.shape

In [None]:
# Export the model
_ = bert.eval()
torch.onnx.export(
    bert.cpu(),
    tuple([input_ids.cpu(), attention_mask.cpu(), token_type_ids.cpu()]),
    ONNX_FILE_PATH,
    export_params=True, opset_version=11,
    input_names = ['input_ids', 'attention_mask', 'token_type_ids'],
    output_names = ['total_loss', 'intent_logits', 'slot_logits']
)

In [None]:
import onnxruntime as ort

encoding, spans = tokenizer.encode_plus_untagged(
    text, max_length=128, return_tensors='pt'
)
encoding = { k: v.cpu() for k, v in encoding.items() } 

inputs = {
    "input_ids": input_ids.cpu().numpy(),
    "attention_mask": attention_mask.cpu().numpy(),
    "token_type_ids": token_type_ids.cpu().numpy()
}

ort_session = ort.InferenceSession(
    ONNX_FILE_PATH,
    providers=['CUDAExecutionProvider']
)
total_loss, logits_intent, logits_slot = ort_session.run( None, inputs)
scores_intent = logits_intent
scores_slots  = logits_slot[0]

print(f'total_loss\n\ttype={type(total_loss)}\n\tsize={total_loss.size}, val={total_loss}')
print(f'logits_intent\n\ttype={type(logits_intent)}\n\tshape={logits_intent.shape}\n\tval={logits_intent}')
print(f'logits_slot\n\ttype={type(logits_slot)}\n\tshape={logits_slot.shape}\n\tval={logits_slot}')

# Intent 分類スコアを Intent に変換する
intent = scores_intent.argmax(-1)[0]
# Slot 分類スコアを固有表現に変換する
entities_predicted = tokenizer.convert_bert_output_to_entities(
    text, scores_slots, spans
)

print("入力",text)
print("予測 intent  :", intent)
print("予測 entities:", json.dumps(entities_predicted, indent=2, ensure_ascii=False))

In [None]:
# simplify onnx
!onnxsim model/iot-nlu.onnx model/iot-nlu-sim.onnx