In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/My Drive/ner_example/

/content/drive/My Drive/ner_example


In [None]:
!pip install flask_ngrok
!pip install transformers
!pip install konlpy

In [None]:
import json
import numpy as np

from main import order, multiple, blank
from transformers import TFBertModel, BertTokenizer
from tensorflow.keras.models import load_model
from flask import Flask, request, Response, jsonify
from flask_ngrok import run_with_ngrok
from konlpy.tag import Okt

In [None]:
okt = Okt()
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model = load_model('best_model.h5', custom_objects={'TFBertModel': TFBertModel})



In [None]:
# 모델 예측에 필요한 정적 변수
max_len = 88
index_to_ner = {0: 'PER_B', 1: 'DAT_B', 2: '-', 3: 'ORG_B', 4: 'CVL_B', 5: 'NUM_B', 6: 'LOC_B', 7: 'EVT_B', 8: 'TRM_B', 9: 'TRM_I', 10: 'EVT_I', 11: 'PER_I', 12: 'CVL_I', 13: 'NUM_I', 14: 'TIM_B', 15: 'TIM_I', 16: 'ORG_I', 17: 'DAT_I', 18: 'ANM_B', 19: 'MAT_B', 20: 'MAT_I', 21: 'AFW_B', 22: 'FLD_B', 23: 'LOC_I', 24: 'AFW_I', 25: 'PLT_B', 26: 'FLD_I', 27: 'ANM_I', 28: 'PLT_I', 29: '[PAD]'}

In [None]:
def get_total_dict(result_list):
  total_dict = {}
  for result in result_list:
    for word, tag in result:
      if tag in total_dict:
        total_dict[tag].append(word)
      else:
        total_dict[tag] = [word]
  return total_dict

def ner_inference(test_sentence):
  morphs = okt.morphs(test_sentence)
  test_sentence = " ".join(morphs)
  tokenized_sentence = np.array([tokenizer.encode(test_sentence, max_length=max_len, truncation=True, padding='max_length')])
  tokenized_mask = np.array([[int(x!=1) for x in tokenized_sentence[0].tolist()]])
  ans = model.predict([tokenized_sentence, tokenized_mask])
  ans = np.argmax(ans, axis=2)

  tokens = tokenizer.convert_ids_to_tokens(tokenized_sentence[0])
  new_tokens, new_labels = [], []
  for token, label_idx in zip(tokens, ans[0]):
    if token=='[CLS]' or token == '[SEP]' or token == '[PAD]':
      pass
    else:
      new_tokens.append(token)
      new_labels.append(index_to_ner[label_idx])
  return [new_tokens, new_labels]

def get_result(new_tokens, new_labels):
  result = []
  word, label = "", ""
  for item in zip(new_tokens, new_labels):
    if not item[0].startswith('##'):
      result.append((word, label))
      word = item[0]
    else:
      word += item[0][2:]
    label = item[1]
  result.append((word, label))
  result.pop(0)
  return result

In [None]:
import random
app = Flask(__name__)

@app.route('/')
def index():
  return "hello flask"

@app.route('/workbook-with-text', methods=['POST'])
def make_question_response():
  data = request.json
  text = data['text']
  sentence_list, input_list = [], []

  predict_list = []
  sentence_list = text.split('.')
  for sentence in sentence_list:
    token, label = ner_inference(sentence)
    predict_list.append(get_result(token, label))

  question_list = []
  total_dict = get_total_dict(predict_list)
  for predict in predict_list:
    if predict:
      question_list.append(blank('PER_B', predict))
      question_list.append(multiple('PER_B', predict, total_dict))
  question_list.append(order(predict_list))
  random.shuffle(question_list)
  return jsonify(question_list), 200

In [None]:
run_with_ngrok(app)
app.run()