# **LSTM**
시계열 데이터가 길어질수록 RNN의 기울기 감소는 크게 증가하여, 그 이전 state를 기억하지 못한다. 이 기울기 소실을 극복하기 위하여 LSTM을 사용한다.

## ** Tutorial 0 : What is the LSTM?**
#### ** LSTM을 이용하여, 문장에서 다음에 올 단어를 예측해보자. **

In [1]:
import tensorflow as tf
import numpy as np

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.pipeline import Pipeline

# import time
# import random

#### **Part 0: Create the dataset**

In [2]:
data = 'long ago , the mice had a general council to consider'\
+ ' what measures they could take to outwit their common enemy ,'\
+ ' the cat . some said this , and some said that but at last a'\
+ ' young mouse got up and said he had a proposal to make ,'\
+ ' which he thought would meet the case . you will all agree ,'\
+ ' said he , that our chief danger consists in the sly and'\
+ ' treacherous manner in which the enemy approaches us . now'\
+ ' , if we could receive some signal of her approach , we '\
+ 'could easily escape from her . i venture , therefore , to'\
+ ' propose that a small bell be procured , and attached by'\
+ ' a ribbon round the neck of the cat . by this means we should'\
+ ' always know when she was about , and could easily retire'\
+ ' while she was in the neighbourhood . this proposal met'\
+ ' with general applause , until an old mouse got up and said'\
+ '  that is all very well , but who is to bell the cat ?'\
+ ' the mice looked at one another and nobody spoke . then '\
+ 'the old mouse said it is easy to propose impossible remedies .'

#### **Part 1: Preprocessing**

In this part, we attempt to split the input data.

In [3]:
tokens = data.split(' ')

We use `LabelEncoder` and `OnehotEncoder` of sklearn. So we can find words and its label quickly.

In [4]:
# We change LabelEncoder's return shape.
class ReshapedLabelEncoder(LabelEncoder):
    def fit_transform(self, y, *args, **kwargs):
        return super().fit_transform(y).reshape(-1, 1)

In [5]:
label_encoder = ReshapedLabelEncoder()
onehot_encoder = OneHotEncoder()

pipeline = Pipeline([
    ('label_encoder', label_encoder),
    ('onehot_encoder', onehot_encoder)
])

# For input
label_data = label_encoder.fit_transform(tokens)
# For target
onehot_data = pipeline.fit_transform(tokens).toarray()

We define a function which creates batch data set.

#### **Part 2: Define the LSTM model**

In this part, we define a LSTM model and placeholders, variables, parameters, loss function used in the model.

In [6]:
# Parameters
EPOCHS = 2000
BATCH_SIZE = 10
SEQUENCE_LENGTH = 3
HIDDEN_SIZE = 20
DIM_INPUT = 1
DIM_OUTPUT = len(label_encoder.classes_)  # Output의 형태는 one hot vector가 될 것이다. 

In [7]:
# Placeholders
X = tf.placeholder(tf.float32, shape=[None, SEQUENCE_LENGTH, DIM_INPUT])
t = tf.placeholder(tf.float32, shape=[None, DIM_OUTPUT])
batch_size = tf.placeholder(tf.int32, [])

In [8]:
# Model
def inference(x, batch_size):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=HIDDEN_SIZE)
    initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    cell_outputs, state = tf.nn.dynamic_rnn(lstm_cell, x, initial_state=initial_state, dtype=tf.float32)
    final_cell_output = cell_outputs[:, -1, :]
    # Weight matrix
    V = tf.Variable(tf.truncated_normal([HIDDEN_SIZE, DIM_OUTPUT]))
    # Biases
    c = tf.Variable(tf.zeros([DIM_OUTPUT]))
    print(V)
    return tf.matmul(final_cell_output, V) + c

In [9]:
# Loss function
def loss_func(y, t):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=t))

In [10]:
# Define optimizer
def train(loss):
     return tf.train.AdamOptimizer().minimize(loss)

In [11]:
# Connect graph nodes
y = inference(X, batch_size)
loss = loss_func(y, t)
train_step = train(loss)

<tf.Variable 'Variable:0' shape=(20, 113) dtype=float32_ref>


In [12]:
# Evlaution
correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#### **Part 4: Create batch data**

In [13]:
input_data = []
targets = []

for i in range(len(label_data) - SEQUENCE_LENGTH):
    input_data.append(label_data[i: i+SEQUENCE_LENGTH])
    targets.append(onehot_data[i+SEQUENCE_LENGTH])

In [14]:
# RNN cell, LSTM cell의 input 형태로 reshape 한다.
input_data = np.array(input_data).reshape(-1, SEQUENCE_LENGTH, DIM_INPUT)
targets = np.array(targets).reshape(-1, DIM_OUTPUT)

#### **Part 5: Train LSTM**

In [15]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(input_data, targets, test_size = 0.1)
num_batches = len(X_train) // BATCH_SIZE
num_validation=len(X_test)

for epoch in range(EPOCHS):
    X_samp, y_samp = shuffle(X_train, y_train)
    for i in range(num_batches):
        start = i * BATCH_SIZE
        end = start + BATCH_SIZE

        _, train_acc, train_loss = sess.run([train_step, accuracy, loss], feed_dict={
            X: X_samp[start:end],
            t: y_samp[start:end],
            batch_size:BATCH_SIZE
        })
        
    print("%d epoch's final : %f, %f" %(epoch, train_acc, train_loss))

0 epoch's final : 0.000000, 6.233857
1 epoch's final : 0.000000, 6.308107
2 epoch's final : 0.000000, 5.337230
3 epoch's final : 0.000000, 5.284284
4 epoch's final : 0.000000, 5.682048
5 epoch's final : 0.000000, 5.860044
6 epoch's final : 0.100000, 4.728438
7 epoch's final : 0.000000, 4.839405
8 epoch's final : 0.000000, 4.441713
9 epoch's final : 0.000000, 4.528519
10 epoch's final : 0.000000, 4.704720
11 epoch's final : 0.100000, 4.675909
12 epoch's final : 0.100000, 4.455920
13 epoch's final : 0.000000, 4.738754
14 epoch's final : 0.000000, 4.545429
15 epoch's final : 0.000000, 4.653287
16 epoch's final : 0.200000, 4.424229
17 epoch's final : 0.100000, 4.485203
18 epoch's final : 0.100000, 4.353419
19 epoch's final : 0.100000, 4.418495
20 epoch's final : 0.200000, 4.460942
21 epoch's final : 0.100000, 4.444122
22 epoch's final : 0.100000, 4.306799
23 epoch's final : 0.200000, 4.255879
24 epoch's final : 0.000000, 4.497341
25 epoch's final : 0.000000, 4.495961
26 epoch's final : 0.0

#### **6. Check the result**

In [17]:
# 전체 set에 대한 정확도
accuracy.eval(session = sess, feed_dict={
    X: input_data,
    t: targets,
    batch_size: len(input_data)
})

0.8613861

In [18]:
pred = y.eval(session=sess, feed_dict={
    X: input_data,
    t: targets,
    batch_size:len(input_data)
})

In [19]:
# 모델에 의해 예측된 값과 결과값을 비교한다.
[elem for elem in zip([np.argmax(e) for e in pred], [np.argmax(e) for e in targets])]

[(86, 86),
 (57, 57),
 (38, 38),
 (96, 4),
 (36, 36),
 (29, 29),
 (93, 93),
 (26, 26),
 (103, 103),
 (54, 54),
 (90, 90),
 (28, 28),
 (84, 84),
 (70, 93),
 (67, 67),
 (87, 87),
 (25, 25),
 (33, 33),
 (1, 1),
 (86, 86),
 (23, 23),
 (2, 2),
 (82, 82),
 (76, 76),
 (91, 91),
 (20, 1),
 (11, 11),
 (82, 82),
 (76, 76),
 (85, 85),
 (20, 20),
 (16, 16),
 (48, 48),
 (4, 4),
 (112, 112),
 (58, 58),
 (37, 37),
 (96, 96),
 (11, 11),
 (76, 76),
 (0, 39),
 (38, 38),
 (32, 4),
 (69, 69),
 (93, 93),
 (51, 51),
 (1, 1),
 (105, 105),
 (39, 39),
 (1, 92),
 (110, 110),
 (55, 55),
 (1, 86),
 (22, 22),
 (2, 2),
 (111, 111),
 (108, 108),
 (8, 8),
 (7, 7),
 (1, 1),
 (76, 76),
 (39, 39),
 (1, 1),
 (85, 85),
 (66, 66),
 (85, 24),
 (30, 30),
 (27, 27),
 (44, 44),
 (86, 86),
 (26, 80),
 (11, 11),
 (94, 94),
 (52, 52),
 (44, 44),
 (105, 105),
 (36, 86),
 (33, 33),
 (15, 15),
 (97, 97),
 (2, 2),
 (62, 62),
 (19, 1),
 (42, 42),
 (101, 101),
 (28, 28),
 (71, 71),
 (82, 82),
 (79, 79),
 (59, 63),
 (40, 40),
 (14, 14),