<a href="https://colab.research.google.com/github/comojin1994/Deep_Learning_Study/blob/master/3step_lecture/Vanila_Attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
%cd ./drive/My\ Drive/Deep_learning/Study/3Step

/content/drive/My Drive/Deep_learning/Study/3Step


### Korea natural language Processing

In [1]:
!pip install konlpy

Collecting konlpy
[?25l  Downloading https://files.pythonhosted.org/packages/85/0e/f385566fec837c0b83f216b2da65db9997b35dd675e107752005b7d392b1/konlpy-0.5.2-py2.py3-none-any.whl (19.4MB)
[K     |████████████████████████████████| 19.4MB 159kB/s 
Collecting colorama
  Downloading https://files.pythonhosted.org/packages/c9/dc/45cdef1b4d119eb96316b3117e6d5708a08029992b2fee2c143c7a0a5cc5/colorama-0.4.3-py2.py3-none-any.whl
Collecting beautifulsoup4==4.6.0
[?25l  Downloading https://files.pythonhosted.org/packages/9e/d4/10f46e5cfac773e22707237bfcd51bbffeaf0a576b0a847ec7ab15bd7ace/beautifulsoup4-4.6.0-py3-none-any.whl (86kB)
[K     |████████████████████████████████| 92kB 11.4MB/s 
[?25hCollecting tweepy>=3.7.0
  Downloading https://files.pythonhosted.org/packages/36/1b/2bd38043d22ade352fc3d3902cf30ce0e2f4bf285be3b304a2782a767aec/tweepy-3.8.0-py2.py3-none-any.whl
Collecting JPype1>=0.7.0
[?25l  Downloading https://files.pythonhosted.org/packages/d7/3c/1dbe5d6943b5c68e8df17c8b3a05db4725ea

In [2]:
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
from konlpy.tag import Okt

TensorFlow 2.x selected.


### Hyperparameter

In [0]:
EPOCHS = 200
NUM_WORDS = 2000

### Encoder

In [0]:
class Encoder(tf.keras.Model):
    def __init__(self):
        super(Encoder, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 64)
        ### return_state : 중간 과정을 출력할것인지 True이면 output state, hidden state, cell state 출력
        self.lstm = tf.keras.layers.LSTM(512, return_sequences=True, return_state=True)

    def call(self, x, training=False, mask=None):
        x = self.emb(x)
        H, h, c = self.lstm(x)
        return H, h, c

### Decoder

In [0]:
class Decoder(tf.keras.Model):
    def __init__(self):
        super(Decoder, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 64)
        ### return_sequences : False일 경우 마지막 output 1개만 출력됨
        ### return_sequences : True일 경우 모든 output 출력됨
        self.lstm = tf.keras.layers.LSTM(512, return_sequences=True, return_state=True)
        self.att = tf.keras.layers.Attention()
        self.dense = tf.keras.layers.Dense(NUM_WORDS, activation='softmax')

    def call(self, inputs, training=False, mask=None):
        x, s0, c0, H = inputs
        x = self.emb(x)
        S, h, c = self.lstm(x, initial_state=[s0, c0])
        
        S_ = tf.concat([s0[:, tf.newaxis, :], S[:, :-1, :]], axis=1)
        A = self.att([S_, H])
        y = tf.concat([S, A], axis=-1)
        
        return self.dense(y), h, c

### Seq2Seq

In [0]:
class Seq2seq(tf.keras.Model):
    def __init__(self, sos, eos):
        super(Seq2seq, self).__init__()
        self.enc = Encoder()
        self.dec = Decoder()
        self.sos = sos
        self.eos = eos

    def call(self, inputs, training=False, mask=None):
        if training is True:
            x, y = inputs
            H, h, c = self.enc(x)
            y, _, _ = self.dec((y, h, c, H))
            return y
        else:
            x = inputs
            H, h, c = self.enc(x)
            
            y = tf.convert_to_tensor(self.sos)
            y = tf.reshape(y, (1, 1))

            seq = tf.TensorArray(tf.int32, 64)

            for idx in tf.range(64):
                y, h, c = self.dec([y, h, c, H])
                y = tf.cast(tf.argmax(y, axis=-1), dtype=tf.int32)
                y = tf.reshape(y, (1, 1))
                seq = seq.write(idx, y)

                if y == self.eos:
                    break

            return tf.reshape(seq.stack(), (1, 64))

### Define Training, Test loop

In [0]:

@tf.function
def train_step(model, inputs, labels, loss_object, optimizer, train_loss, train_accuracy):
    output_labels = labels[:, 1:]
    shifted_labels = labels[:, :-1]
    with tf.GradientTape() as tape:
        predictions = model([inputs, shifted_labels], training=True)
        loss = loss_object(output_labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(output_labels, predictions)


@tf.function
def test_step(model, inputs):
    return model(inputs, training=False)

### Dataset

In [0]:
### http://www.aihub.or.kr
dataset_file = 'chatbot_data.csv'
okt = Okt()

with open(dataset_file, 'r') as file:
    lines = file.readlines()
    seq = [' '.join(okt.morphs(line)) for line in lines]

questions = seq[::2]
answers = ['\t' + lines for lines in seq[1::2]]

num_sample = len(questions)

perm = list(range(num_sample))
np.random.seed(0)
np.random.shuffle(perm)

train_q = list()
train_a = list()
test_q = list()
test_a = list()

for idx, qna in enumerate(zip(questions, answers)):
    q, a = qna
    if perm[idx] > num_sample//5:
        train_q.append(q)
        train_a.append(a)

    else:
        test_q.append(q)
        test_a.append(a)

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=NUM_WORDS,
                                                  filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~')

tokenizer.fit_on_texts(train_q + train_a)

train_q_seq = tokenizer.texts_to_sequences(train_q)
train_a_seq = tokenizer.texts_to_sequences(train_a)

test_q_seq = tokenizer.texts_to_sequences(test_q)
test_a_seq = tokenizer.texts_to_sequences(test_a)

x_train = tf.keras.preprocessing.sequence.pad_sequences(train_q_seq,
                                                        maxlen=64,
                                                        padding='pre',
                                                        value=0)
y_train = tf.keras.preprocessing.sequence.pad_sequences(train_a_seq,
                                                        maxlen=65,
                                                        padding='post',
                                                        value=0)

x_test = tf.keras.preprocessing.sequence.pad_sequences(test_q_seq,
                                                       maxlen=64,
                                                       padding='pre',
                                                       value=0)
y_test = tf.keras.preprocessing.sequence.pad_sequences(test_a_seq,
                                                       maxlen=65,
                                                       padding='post',
                                                       value=0)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32).prefetch(1024)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(1).prefetch(1024)

### Define Train env

In [0]:
# Create model
model = Seq2seq(sos=tokenizer.word_index['\t'],
                eos=tokenizer.word_index['\n'])

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

### Training

In [50]:
for epoch in range(EPOCHS):
    for seqs, labels in train_ds:
        train_step(model, seqs, labels, loss_object, optimizer, train_loss, train_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100))

    train_loss.reset_states()
    train_accuracy.reset_states()

Epoch 1, Loss: 2.930478811264038, Accuracy: 84.375
Epoch 2, Loss: 0.6041969656944275, Accuracy: 91.7567367553711
Epoch 3, Loss: 0.5153856873512268, Accuracy: 91.87813568115234
Epoch 4, Loss: 0.4968894422054291, Accuracy: 91.90554809570312
Epoch 5, Loss: 0.48965027928352356, Accuracy: 91.93687438964844
Epoch 6, Loss: 0.4868849217891693, Accuracy: 91.92121124267578
Epoch 7, Loss: 0.47463688254356384, Accuracy: 92.01127624511719
Epoch 8, Loss: 0.45996472239494324, Accuracy: 92.0817642211914
Epoch 9, Loss: 0.45165082812309265, Accuracy: 92.20707702636719
Epoch 10, Loss: 0.4340301752090454, Accuracy: 92.33631134033203
Epoch 11, Loss: 0.415818989276886, Accuracy: 92.5399398803711
Epoch 12, Loss: 0.40865370631217957, Accuracy: 92.90805053710938
Epoch 13, Loss: 0.39594796299934387, Accuracy: 93.08427429199219
Epoch 14, Loss: 0.38932546973228455, Accuracy: 93.24091339111328
Epoch 15, Loss: 0.3833733797073364, Accuracy: 93.2996597290039
Epoch 16, Loss: 0.3766595721244812, Accuracy: 93.3975601196

### Test loop

In [52]:
for test_seq, test_labels in test_ds:
    predictions = test_step(model, test_seq)
    test_text = tokenizer.sequences_to_texts(test_seq.numpy())
    gt_text = tokenizer.sequences_to_texts(test_labels.numpy())
    texts = tokenizer.sequences_to_texts(predictions.numpy())
    print('_')
    print('q: ', test_text)
    print('a: ', gt_text)
    print('p: ', texts)

_
q:  ['아이스 아메리카노 하나요 \n']
a:  ['\t테이크아웃 하실 건가 요 \n']
p:  ['네 배달 비 3000원 입니다 \n']
_
q:  ['여기 기프티콘 되죠 \n']
a:  ['\t네 현금영수증 해드릴까 요 \n']
p:  ['네 배달 비 됩니다 \n']
_
q:  ['진동 을 따로 주시나요 \n']
a:  ['\t주 문 번호 로 드리겠습니다 \n']
p:  ['네 담아 찍어주세요 \n']
_
q:  ['커피 에 샷 추가 가능한가요 \n']
a:  ['\t 네 가능합니다 \n']
p:  ['아뇨 매장 에서는 머그컵 만 사용 가능합니다 \n']
_
q:  ['밀크 티 있나요 \n']
a:  ['\t네 있습니다 \n']
p:  ['네 가능합니다 \n']
_
q:  ['밀크 티 종류 는 뭐 가 있어요 \n']
a:  ['\t 루이보스 두 개 있습니다 \n']
p:  ['네 가능합니다 \n']
_
q:  ['카푸치노 는 로 주시 고 아메리카노 는 스몰 로 주시겠어요 \n']
a:  ['\t 네 더 없으세요 \n']
p:  ['네 티 때 가 판매 \n']
_
q:  ['조각 케이크 도 추가 해주시겠어요 \n']
a:  ['\t 네 어떤 거 로 드릴 까요 \n']
p:  ['네 카운터 로 오시 면 테이크 아웃 잔 에 담아 드려요 \n']
_
q:  ['아메리카노 한잔 주세요 \n']
a:  ['\t드시고 가시나요 \n']
p:  ['네 더 필요한 거 없으신 가요 \n']
_
q:  ['커피 주문 할게요 \n']
a:  ['\t네 어떤 걸 로 \n']
p:  ['네 주문 적립 입니다 \n']
_
q:  ['페퍼민트 티 하나 주세요 \n']
a:  ['\t따뜻한 것 으로 \n']
p:  ['네 주문 적립 입니다 \n']
_
q:  ['아이스 아메리카노 랑 따뜻한 라떼 로 주세요 \n']
a:  ['\t사이즈 는 어떻게 드릴 까요 \n']
p:  ['네 주문 가능합니다 \n']
_
q:  ['언제 음료 가 \n']
a:  ['\t10분 내 로 나갑니다 