<a href="https://colab.research.google.com/github/ideablast/NLPer_transformer_doc2vec_chatbot/blob/kdg/Load_model_complete.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install konlpy

Collecting konlpy
[?25l  Downloading https://files.pythonhosted.org/packages/85/0e/f385566fec837c0b83f216b2da65db9997b35dd675e107752005b7d392b1/konlpy-0.5.2-py2.py3-none-any.whl (19.4MB)
[K     |████████████████████████████████| 19.4MB 1.4MB/s 
Collecting tweepy>=3.7.0
  Downloading https://files.pythonhosted.org/packages/bb/7c/99d51f80f3b77b107ebae2634108717362c059a41384a1810d13e2429a81/tweepy-3.9.0-py2.py3-none-any.whl
Collecting JPype1>=0.7.0
[?25l  Downloading https://files.pythonhosted.org/packages/fd/96/1030895dea70855a2e1078e3fe0d6a63dcb7c212309e07dc9ee39d33af54/JPype1-1.1.2-cp36-cp36m-manylinux2010_x86_64.whl (450kB)
[K     |████████████████████████████████| 460kB 47.3MB/s 
[?25hCollecting colorama
  Downloading https://files.pythonhosted.org/packages/44/98/5b86278fbbf250d239ae0ecb724f8572af1c91f4a11edf4d36a206189440/colorama-0.4.4-py2.py3-none-any.whl
Collecting beautifulsoup4==4.6.0
[?25l  Downloading https://files.pythonhosted.org/packages/9e/d4/10f46e5cfac773e22707237

In [15]:
from keras import models
from keras import layers
from keras import optimizers, losses, metrics
from keras import preprocessing

import tensorflow as tf
import numpy as np
import os
import re

from konlpy.tag import Okt
# from hanspell import spell_checker
# from pykospacing import spacing

import pickle

In [4]:
# 태그 단어
PAD = "<PADDING>"   # 패딩
STA = "<START>"     # 시작
END = "<END>"       # 끝
OOV = "<OOV>"       # 없는 단어(Out of Vocabulary)

# 태그 인덱스
PAD_INDEX = 0
STA_INDEX = 1
END_INDEX = 2
OOV_INDEX = 3

# 데이터 타입
ENCODER_INPUT  = 0
DECODER_INPUT  = 1
DECODER_TARGET = 2

# Hyper-parameters for Transformer
NUM_LAYERS = 2 # Encdoer, Decoder layer수(각각)
D_MODEL = 256 # word embedding dimension
NUM_HEADS = 8 # attention 헤드 수. D_Model % NUM_HEADS == 0이 되야 함!
UNITS = 512 # FFNN 유닛수
DROPOUT = 0.1 #dropout rate
EPOCHS = 50 ## Transformer, C,M Classification 에폭(에너르기폭발)
# for Transformer data pipelining
BATCH_SIZE = 64
BUFFER_SIZE = 1000
# VOCAB_SIZE = 0 # 단어사전이 보유한 단어의 개수. 후에 len(words) 로 바뀜.
# 한 문장에서 단어의 최대 개수
max_sequences = 30
# 정규 표현식 필터
RE_FILTER = re.compile("[\"':;~()]")

## functions

In [6]:
# 형태소분석 함수
def pos_tag(sentences):
    
    # KoNLPy 형태소분석기 설정
    tagger = Okt()
    
    # 문장 품사 변수 초기화
    sentences_pos = []
    
    # 모든 문장 반복
    for sentence in sentences:
        # [\"':;~()] 특수기호 제거
        sentence = re.sub(RE_FILTER, "", sentence)
        
        # 배열인 형태소분석의 출력을 띄어쓰기로 구분하여 붙임
        sentence = " ".join(tagger.morphs(sentence))
        sentences_pos.append(sentence)
        
    return sentences_pos

In [7]:
# 문장을 인덱스로 변환
def convert_text_to_index(sentences, vocabulary, type): 
    
    sentences_index = []
    
    # 모든 문장에 대해서 반복
    for sentence in sentences:
        sentence_index = []
        
        # 디코더 입력일 경우 맨 앞에 START 태그 추가
        if type == DECODER_INPUT:
            sentence_index.extend([vocabulary[STA]])
        
        # 문장의 단어들을 띄어쓰기로 분리
        for word in sentence.split():
            if vocabulary.get(word) is not None:
                # 사전에 있는 단어면 해당 인덱스를 추가
                sentence_index.extend([vocabulary[word]])
            else:
                # 사전에 없는 단어면 OOV 인덱스를 추가
                sentence_index.extend([vocabulary[OOV]])

        # 최대 길이 검사
        if type == DECODER_TARGET:
            # 디코더 목표일 경우 맨 뒤에 END 태그 추가
            if len(sentence_index) >= max_sequences:
                sentence_index = sentence_index[:max_sequences-1] + [vocabulary[END]]
            else:
                sentence_index += [vocabulary[END]]
        else:
            if len(sentence_index) > max_sequences:
                sentence_index = sentence_index[:max_sequences]
            
        # 최대 길이에 없는 공간은 패딩 인덱스로 채움
        sentence_index += (max_sequences - len(sentence_index)) * [vocabulary[PAD]]
        
        # 문장의 인덱스 배열을 추가
        sentences_index.append(sentence_index)

    return np.asarray(sentences_index)

## custom function for Transformer model loading

In [8]:
## scaled dot product Attention
def scaled_dot_product_attention(query, key, value, mask):
  matmul_qk = tf.matmul(query, key, transpose_b=True) # QK^T

  depth = tf.cast(tf.shape(key)[-1], tf.float32)
  logits = matmul_qk / tf.math.sqrt(depth) #  QK^T / sqrt(d_k)

  if mask is not None:
    logits += (mask * -1e9) # zero padding token softmax 결과가 0이 나오도록
  
  attention_weights = tf.nn.softmax(logits, axis = -1) # softmax(QK^T / sqrt(d_k))

  output = tf.matmul(attention_weights, value) # softmax(QK^T / sqrt(d_k)) * V

  return output

In [9]:
def create_padding_mask(x):
  mask = tf.cast(tf.math.equal(x, 0), tf.float32)
  # (batch_size, 1, 1, sequence length)
  return mask[:, tf.newaxis, tf.newaxis, :]

In [10]:
# it handle mask future tokens in a sequence used decoder. and mask pad tokens
def create_look_ahead_mask(x):
  seq_len = tf.shape(x)[1]
  look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
  padding_mask = create_padding_mask(x)
  return tf.maximum(look_ahead_mask, padding_mask)

## Load models(Transformer, 2 bilstm) & dictionraies(6)

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
t_model = models.load_model('/content/drive/My Drive/Transformer_text_savedmodelform', compile=False)
m_model = models.load_model('/content/drive/My Drive/main_lstm_cl_test.h5')
c_model = models.load_model('/content/drive/My Drive/category_lstm_cl_test.h5')

In [16]:
with open('/content/drive/My Drive/dictionary_list.pickle', 'rb') as handle:
  dictionary_list = pickle.load(handle)

word_to_index = dictionary_list[0]
index_to_word = dictionary_list[1]
category_to_index = dictionary_list[2]
index_to_category = dictionary_list[3]
main_to_index = dictionary_list[4]
index_to_main = dictionary_list[5]

In [17]:
#input : '이 옷 다른 사이즈도 볼 수 있을까요?'
#output : ('의류', 1.0)
def show_prob_c(stc):
  list_stc = [stc]
  pos_stc = pos_tag(list_stc)
  index_stc = convert_text_to_index(pos_stc, word_to_index, 0).reshape(1,30)
  logits = c_model.predict(index_stc)

  index = np.argmax(logits)
  probability = np.max(logits)

  return index_to_category[index], probability

In [18]:
#input : '이 옷 다른 사이즈도 볼 수 있을까요?'
#output : ('치수문의', 0.7858745)
def show_prob_m(stc):
  list_stc = [stc]
  pos_stc = pos_tag(list_stc)
  index_stc = convert_text_to_index(pos_stc, word_to_index, 0).reshape(1,30)
  logits = m_model.predict(index_stc)

  index = np.argmax(logits)
  probability = np.max(logits)

  return index_to_main[index], probability

In [19]:
# input : '남성 바지는 어느 쪽에 있나요?'
# output : '저 뒤쪽 에 있어요'
def Transformer_prediction(stc):
  list_stc = [stc]
  pos_stc = pos_tag(list_stc)
  index_stc = convert_text_to_index(pos_stc, word_to_index, ENCODER_INPUT)
  input_seq = index_stc.squeeze()
  sentence = tf.expand_dims(input_seq, axis=0) # make tensor type
  output = tf.expand_dims([1], 0)

  for i in range(max_sequences):
    predictions = t_model.predict([sentence, output])
    # select the last word from the seq_len dimension
    predictions = predictions[:, -1:, :]
    predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

    if tf.equal(predicted_id, 2):
      break

    # concatenated the predicted_id to the output which is given to the decoder
    # as its input.
    output = tf.concat([output, predicted_id], axis=-1)

  output_indexes = tf.squeeze(output, axis=0)[1:].numpy()
  sentence = ''
  # 모든 문장에 대해서 반복
  for index in output_indexes:
      if index == END_INDEX:
          # 종료 인덱스면 중지
          break;
      if index_to_word.get(index) is not None:
          # 사전에 있는 인덱스면 해당 단어를 추가
          sentence += index_to_word[index]
      else:
          # 사전에 없는 인덱스면 OOV 단어를 추가
          sentence.extend([index_to_word[OOV_INDEX]])

      sentence += ' '
          
  return sentence

## Prediction

In [20]:
stc = '남성 바지는 어느 쪽에 있나요?'

In [21]:
show_prob_c(stc), show_prob_m(stc)

(('의류', 1.0), ('종류별의류제품문의요청', 0.98762506))

In [22]:
ans = Transformer_prediction(stc)
print(ans) 
show_prob_c(ans),show_prob_m(ans) 

저 뒤쪽 에 있어요 


(('의류', 0.9849696), ('종류별액세서리제품문의요청', 0.9806169))

In [23]:
stc = input()
print("입력하신 문장의 카테고리,의도 : ",show_prob_c(stc), show_prob_m(stc))
ans = Transformer_prediction(stc)
print("AI 답변 :", ans)
print("AI 답변의 카테고리,의도 : ", show_prob_c(ans), show_prob_m(ans) )

이거 얼마에요?
입력하신 문장의 카테고리,의도 :  ('가방', 0.41945946) ('제품가격문의', 0.99989533)
AI 답변 : 네 20000원 입니다 
AI 답변의 카테고리,의도 :  ('의류', 0.9999721) ('제품가격문의', 0.98077536)
