YBIGTA 10기 노혜미 박승리

# 1. RNN with long sequences

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn

#### 단어를 벡터화시키는 과정

In [10]:
sample = 'if you want you'
idx2char = list(set(sample)) # index -> char(고유값)
char2idx = {c : i for i, c in enumerate(idx2char)} # char -> index

#### hyper parameter 설정

In [12]:
dic_size = len(char2idx) # RNN input size(one hot vector size)
rnn_hidden_size = len(char2idx) # RNN output size(one hot vector size) 
num_classes = len(char2idx) # final output size
batch_size = 1 # one sample data(one batch)
sequence_length = len(sample) - 1 # X_data, Y_label에 사용되는 char의 length는 실제 길이 - 1

sample_idx = [char2idx[c] for c in sample] # sample의 character를 vector화 하여 list에 넣는다.
x_data = [sample_idx[:-1]] # X data sample (0 ~ n-1) ex) 'if you want you' : if you want yo
y_data = [sample_idx[1:]] # Y data sample (1 ~ n) ex) 'if you want you' : f you want you

X = tf.placeholder(tf.int32, [None, sequence_length]) # X data
Y = tf.placeholder(tf.int32, [None, sequence_length]) # Y label

#### 벡터화된 단어를 one hot vector로 변환

In [13]:
X_one_hot = tf.one_hot(X, num_classes) # 자동으로 벡터를 one_hot으로 변환시켜준다.

#### Cell 만들기

In [21]:
cell = tf.contrib.rnn.BasicLSTMCell(num_units=rnn_hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
with tf.variable_scope('first'):
    outputs, _states = tf.nn.dynamic_rnn(cell, X_one_hot, initial_state=initial_state, dtype=tf.float32)

#### Weight, Loss, Train

In [22]:
weights = tf.ones([batch_size, sequence_length]) # [1, sequence_length]
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets = Y, weights = weights) # sequence data
loss = tf.reduce_mean(sequence_loss) # sequence loss -> 실수형
train = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss)

prediction = tf.argmax(outputs, axis = 2)

#### Training and Results

In [23]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(3000):
        l, _ = sess.run([loss, train], feed_dict = {X : x_data, Y : y_data})
        result = sess.run(prediction, feed_dict = {X : x_data})
        
        if i%10 == 0:
            result_str = [idx2char[c] for c in np.squeeze(result)]
            print(i, 'loss :', l, 'Prediction :', ''.join(result_str))

0 loss : 2.33074 Prediction : f      t u    
10 loss : 1.56128 Prediction : f you wwn  you
20 loss : 1.23079 Prediction : f you want you
30 loss : 1.11762 Prediction : f you want you
40 loss : 1.07443 Prediction : f you want you
50 loss : 1.04878 Prediction : f you want you
60 loss : 1.03247 Prediction : f you want you
70 loss : 1.0262 Prediction : f you want you
80 loss : 1.02168 Prediction : f you want you
90 loss : 1.01807 Prediction : f you want you
100 loss : 1.01144 Prediction : f you want you
110 loss : 1.00835 Prediction : f you want you
120 loss : 1.0066 Prediction : f you want you
130 loss : 1.00514 Prediction : f you want you
140 loss : 1.0043 Prediction : f you want you
150 loss : 1.00357 Prediction : f you want you
160 loss : 1.00282 Prediction : f you want you
170 loss : 1.00208 Prediction : f you want you
180 loss : 1.00163 Prediction : f you want you
190 loss : 1.0013 Prediction : f you want you
200 loss : 1.00104 Prediction : f you want you
210 loss : 1.00083 Predictio

1750 loss : 0.997396 Prediction : f you want you
1760 loss : 0.997395 Prediction : f you want you
1770 loss : 0.997394 Prediction : f you want you
1780 loss : 0.997393 Prediction : f you want you
1790 loss : 0.997392 Prediction : f you want you
1800 loss : 0.997391 Prediction : f you want you
1810 loss : 0.99739 Prediction : f you want you
1820 loss : 0.997389 Prediction : f you want you
1830 loss : 0.997388 Prediction : f you want you
1840 loss : 0.997387 Prediction : f you want you
1850 loss : 0.997387 Prediction : f you want you
1860 loss : 0.997386 Prediction : f you want you
1870 loss : 0.997385 Prediction : f you want you
1880 loss : 0.997384 Prediction : f you want you
1890 loss : 0.997383 Prediction : f you want you
1900 loss : 0.997382 Prediction : f you want you
1910 loss : 0.997382 Prediction : f you want you
1920 loss : 0.997381 Prediction : f you want you
1930 loss : 0.99738 Prediction : f you want you
1940 loss : 0.997379 Prediction : f you want you
1950 loss : 0.997378 P

# 2. RNN with really long sequences

In [2]:
sentence = ("if you want to build a ship, don't drum up people together to "
           "collect wood and don't assign them tasks and work, but rather "
           "teach them to long for the endless immensity of the sea.")

In [3]:
sentence

"if you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea."

#### 단어를 벡터화시키는 과정

In [4]:
char_set = list(set(sentence))
char_dic = {w : i for i, w in enumerate(char_set)}

#### hyper parameter 설정

In [5]:
data_dim = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
sequence_length = 10 # 문장이 너무 길어서, 순차적으로 10개씩 골라서 batch size를 늘리려고 한다.

#### batch 만드는 과정

In [6]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length): # 문장의 처음부터, 마지막 10개 단어 전까지
    x_str = sentence[i : i+sequence_length]
    y_str = sentence[i+1 : i+sequence_length+1] # y_label은 x_data의 하나 뒤
    print(i, x_str, '->', y_str)
    
    x = [char_dic[c] for c in x_str]
    y = [char_dic[c] for c in y_str]
    
    dataX.append(x)
    dataY.append(y)
    
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])

X_one_hot = tf.one_hot(X, num_classes)

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [7]:
batch_size = len(dataX)
dataX # batch 형태

[[24, 15, 3, 18, 1, 22, 3, 8, 11, 13],
 [15, 3, 18, 1, 22, 3, 8, 11, 13, 9],
 [3, 18, 1, 22, 3, 8, 11, 13, 9, 3],
 [18, 1, 22, 3, 8, 11, 13, 9, 3, 9],
 [1, 22, 3, 8, 11, 13, 9, 3, 9, 1],
 [22, 3, 8, 11, 13, 9, 3, 9, 1, 3],
 [3, 8, 11, 13, 9, 3, 9, 1, 3, 21],
 [8, 11, 13, 9, 3, 9, 1, 3, 21, 22],
 [11, 13, 9, 3, 9, 1, 3, 21, 22, 24],
 [13, 9, 3, 9, 1, 3, 21, 22, 24, 6],
 [9, 3, 9, 1, 3, 21, 22, 24, 6, 16],
 [3, 9, 1, 3, 21, 22, 24, 6, 16, 3],
 [9, 1, 3, 21, 22, 24, 6, 16, 3, 11],
 [1, 3, 21, 22, 24, 6, 16, 3, 11, 3],
 [3, 21, 22, 24, 6, 16, 3, 11, 3, 7],
 [21, 22, 24, 6, 16, 3, 11, 3, 7, 5],
 [22, 24, 6, 16, 3, 11, 3, 7, 5, 24],
 [24, 6, 16, 3, 11, 3, 7, 5, 24, 17],
 [6, 16, 3, 11, 3, 7, 5, 24, 17, 20],
 [16, 3, 11, 3, 7, 5, 24, 17, 20, 3],
 [3, 11, 3, 7, 5, 24, 17, 20, 3, 16],
 [11, 3, 7, 5, 24, 17, 20, 3, 16, 1],
 [3, 7, 5, 24, 17, 20, 3, 16, 1, 13],
 [7, 5, 24, 17, 20, 3, 16, 1, 13, 14],
 [5, 24, 17, 20, 3, 16, 1, 13, 14, 9],
 [24, 17, 20, 3, 16, 1, 13, 14, 9, 3],
 [17, 20, 3, 16, 1, 

#### Cell 만들기

In [8]:
cell= tf.contrib.rnn.BasicLSTMCell(num_units = hidden_size)
initial_state = cell.zero_state(batch_size, tf.float32)
with tf.variable_scope('second'):
    outputs, _states = tf.nn.dynamic_rnn(cell, X_one_hot, initial_state = initial_state, dtype=tf.float32)

#### Weight, Loss, Train

In [9]:
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets = Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate = 0.1).minimize(loss)

In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(3000):
        l, _, results = sess.run([loss, train, outputs], feed_dict = {X : dataX, Y : dataY})
        for j, result in enumerate(results):
            index = np.argmax(result, axis=1)
            if i%100 == 0:
                if j%10 == 0:
                    print(i, j, ''.join([char_set[t] for t in index]), l)
                    
    for j, result in enumerate(results):
        index = np.argmax(result, axis = 1)
        if j is 0:
            print(''.join([char_set[t] for t in index]), end='')
        else:
            print(char_set[index[-1]], end='')

0 0 k bhuohooo 3.2202
0 10 dddoo,hyaa 3.2202
0 20 .m.bbbbbbb 3.2202
0 30 uiaahmmar' 3.2202
0 40 hthooooolh 3.2202
0 50 dooccoammo 3.2202
0 60 um'i''aaad 3.2202
0 70  oomomobbb 3.2202
0 80 uiaahh,kkk 3.2202
0 90 ahdoar'do, 3.2202
0 100 ds'mbbbmmm 3.2202
0 110 dudfaahdmo 3.2202
0 120 'ammdmm.o. 3.2202
0 130 doal'doo'o 3.2202
0 140 ac.bmmmmmm 3.2202
0 150 .maaaayy'y 3.2202
0 160 rrlrrl,hhh 3.2202
100 0 t t u aa t 2.02257
100 10 ha  lu ld  2.02257
100 20 t t  t  d  2.02257
100 30  gt a    t 2.02257
100 40   p  oee t 2.02257
100 50 h       t  2.02257
100 60  lllle t t 2.02257
100 70 or  a t d  2.02257
100 80  gt a s t  2.02257
100 90 dt    t  s 2.02257
100 100 s a d do   2.02257
100 110 s but r    2.02257
100 120 e  t   t t 2.02257
100 130 h   t  lo  2.02257
100 140 d t   t    2.02257
100 150 ta  e s im 2.02257
100 160 e ns t  oo 2.02257
200 0 t y u tant 1.95872
200 10  a  luild  1.95872
200 20 t t it  d  1.95872
200 30 l't a am t 1.95872
200 40   peopee t 1.95872
200 50    t e  t  1.95872


1900 0 t eod dont 2.16259
1900 10 oao tutme  2.16259
1900 20 tsthin  do 2.16259
1900 30  ut aaum t 2.16259
1900 40 p pepe e t 2.16259
1900 50 o  t e  to 2.16259
1900 60  do eent t 2.16259
1900 70 ao  a d to 2.16259
1900 80  ut a sinn 2.16259
1900 90 'the etoss 2.16259
1900 100 s and tod  2.16259
1900 110 s sut aath 2.16259
1900 120 e  toaet t 2.16259
1900 130 oe eto te  2.16259
1900 140 ' to  the  2.16259
1900 150 tae e s in 2.16259
1900 160 emmssss sd 2.16259
2000 0 t eod eont 2.14575
2000 10 oao tutme  2.14575
2000 20 tsthim  do 2.14575
2000 30  ut arum t 2.14575
2000 40 p pepp e t 2.14575
2000 50 o  t e  to 2.14575
2000 60  do eent t 2.14575
2000 70 ao  and to 2.14575
2000 80  ut ansinn 2.14575
2000 90 'the etoss 2.14575
2000 100 s and to   2.14575
2000 110 s sut aath 2.14575
2000 120 e  toant t 2.14575
2000 130 oe eto se  2.14575
2000 140 ' to  the  2.14575
2000 150 tae e s im 2.14575
2000 160 emmssts sd 2.14575
2100 0 t eod dont 2.14054
2100 10 oao tutme  2.14054
2100 20 tsthine d

학습이 잘 되지 않는다. 왜 그런 것일까? ㅠㅠ

해답은 **deep**하게 Neural Network를 쌓지 않았기 때문이다!

# 3. Stacked RNN

In [11]:
sentence = ("if you want to build a ship, don't drum up people together to "
           "collect wood and don't assign them tasks and work, but rather "
           "teach them to long for the endless immensity of the sea.")
sentence

"if you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea."

#### 단어를 벡터화시키는 과정

In [12]:
char_set = list(set(sentence))
char_dic = {w : i for i, w in enumerate(char_set)}

#### hyper parameter 설정

In [13]:
data_dim = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
sequence_length = 10 # 문장이 너무 길어서, 순차적으로 10개씩 골라서 batch size를 늘리려고 한다.

#### batch 만드는 과정

In [17]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length): # 문장의 처음부터, 마지막 10개 단어 전까지
    x_str = sentence[i : i+sequence_length]
    y_str = sentence[i+1 : i+sequence_length+1] # y_label은 x_data의 하나 뒤
    print(i, x_str, '->', y_str)
    
    x = [char_dic[c] for c in x_str]
    y = [char_dic[c] for c in y_str]
    
    dataX.append(x)
    dataY.append(y)
    
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])

X_one_hot = tf.one_hot(X, num_classes)

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [19]:
print(X_one_hot) # batch : 주는대로 / sequence_length = 10, input_dimension = 25

Tensor("one_hot_3:0", shape=(?, 10, 25), dtype=float32)


In [15]:
batch_size = len(dataX)
dataX # batch 형태

[[24, 15, 3, 18, 1, 22, 3, 8, 11, 13],
 [15, 3, 18, 1, 22, 3, 8, 11, 13, 9],
 [3, 18, 1, 22, 3, 8, 11, 13, 9, 3],
 [18, 1, 22, 3, 8, 11, 13, 9, 3, 9],
 [1, 22, 3, 8, 11, 13, 9, 3, 9, 1],
 [22, 3, 8, 11, 13, 9, 3, 9, 1, 3],
 [3, 8, 11, 13, 9, 3, 9, 1, 3, 21],
 [8, 11, 13, 9, 3, 9, 1, 3, 21, 22],
 [11, 13, 9, 3, 9, 1, 3, 21, 22, 24],
 [13, 9, 3, 9, 1, 3, 21, 22, 24, 6],
 [9, 3, 9, 1, 3, 21, 22, 24, 6, 16],
 [3, 9, 1, 3, 21, 22, 24, 6, 16, 3],
 [9, 1, 3, 21, 22, 24, 6, 16, 3, 11],
 [1, 3, 21, 22, 24, 6, 16, 3, 11, 3],
 [3, 21, 22, 24, 6, 16, 3, 11, 3, 7],
 [21, 22, 24, 6, 16, 3, 11, 3, 7, 5],
 [22, 24, 6, 16, 3, 11, 3, 7, 5, 24],
 [24, 6, 16, 3, 11, 3, 7, 5, 24, 17],
 [6, 16, 3, 11, 3, 7, 5, 24, 17, 20],
 [16, 3, 11, 3, 7, 5, 24, 17, 20, 3],
 [3, 11, 3, 7, 5, 24, 17, 20, 3, 16],
 [11, 3, 7, 5, 24, 17, 20, 3, 16, 1],
 [3, 7, 5, 24, 17, 20, 3, 16, 1, 13],
 [7, 5, 24, 17, 20, 3, 16, 1, 13, 14],
 [5, 24, 17, 20, 3, 16, 1, 13, 14, 9],
 [24, 17, 20, 3, 16, 1, 13, 14, 9, 3],
 [17, 20, 3, 16, 1, 

여기까지는 1 layer RNN과 동일하다.

#### Cell 만들기

기본적인 1 layer LSTM cell을 만든 뒤에, 이를 MultiRNNCell로 쌓아야 한다.

(기본 MultiLSTMCell과 같은 모듈이 존재하지 않는다.)  

In [22]:
def lstm_cell():
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    return cell

multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(5)], state_is_tuple=True)

initial_state = multi_cells.zero_state(batch_size, tf.float32)
with tf.variable_scope('third'):
    outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, initial_state = initial_state, dtype=tf.float32)

#### Weight, Loss, Train

In [23]:
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets = Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate = 0.1).minimize(loss)

In [24]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(3000):
        l, _, results = sess.run([loss, train, outputs], feed_dict = {X : dataX, Y : dataY})
        for j, result in enumerate(results):
            index = np.argmax(result, axis=1)
            if i%100 == 0:
                if j%10 == 0:
                    print(i, j, ''.join([char_set[t] for t in index]), l)
                    
    for j, result in enumerate(results):
        index = np.argmax(result, axis=1)
        if j is 0:
            print(''.join([char_set[t] for t in index]), end='')
        else:
            print(char_set[index[-1]], end='')

0 0 cnnnnnnnnn 3.21899
0 10 ,,,iiiiiii 3.21899
0 20 ffyyyyyfff 3.21899
0 30 dymyllllyy 3.21899
0 40 .....yyyyy 3.21899
0 50 ,cwwnnn''' 3.21899
0 60 dyyyyyyyyy 3.21899
0 70 gg'mmmmmmm 3.21899
0 80 dymyllllll 3.21899
0 90    rrtuuuu 3.21899
0 100 '''''yyyyy 3.21899
0 110 '''''''''' 3.21899
0 120 fffffffyyy 3.21899
0 130 ,,gggggfff 3.21899
0 140     rrrttn 3.21899
0 150 ffffffyyyy 3.21899
0 160 ffffffffff 3.21899
100 0  t   t tt  2.89676
100 10  t   t tt  2.89676
100 20  t   t tt  2.89676
100 30  t   t tt  2.89676
100 40  t   t tt  2.89676
100 50  t   t tt  2.89676
100 60  t   t tt  2.89676
100 70  t   t tt  2.89676
100 80  t   t tt  2.89676
100 90  t t t tt  2.89676
100 100  t   t tt  2.89676
100 110  t   t tt  2.89676
100 120  t   t tt  2.89676
100 130  t   t tt  2.89676
100 140  t t t tt  2.89676
100 150  t   t tt  2.89676
100 160  t   t tt  2.89676
200 0  ttt t t   2.89659
200 10  tt   t    2.89659
200 20  tt   t    2.89659
200 30  tt   t    2.89659
200 40  tt  t t   2.89659
200 50  t

1900 0 tn oeo     2.61787
1900 10 t          2.61787
1900 20 tndn   o   2.61787
1900 30 t          2.61787
1900 40 ttt  e     2.61787
1900 50 t e  e   o 2.61787
1900 60 t    e     2.61787
1900 70 toe  n     2.61787
1900 80 t          2.61787
1900 90 tt         2.61787
1900 100 t  ndn on  2.61787
1900 110 t          2.61787
1900 120 t   o      2.61787
1900 130 t          2.61787
1900 140 t          2.61787
1900 150 tnd        2.61787
1900 160 tt         2.61787
2000 0 t  o       2.60468
2000 10 t o        2.60468
2000 20 tn n   o o 2.60468
2000 30 t  o o     2.60468
2000 40 t    e     2.60468
2000 50 t        o 2.60468
2000 60 t          2.60468
2000 70 tooe     o 2.60468
2000 80 t  o       2.60468
2000 90 tt         2.60468
2000 100 t  ndn o   2.60468
2000 110 t    o     2.60468
2000 120 tt  o      2.60468
2000 130 t          2.60468
2000 140 t t        2.60468
2000 150 tnd        2.60468
2000 160 tt         2.60468
2100 0 tn o       2.5857
2100 10 t o        2.5857
2100 20 tn n   o   

하지만 여전히 학습이 잘 되지 않는다. 왜 그런 것일까? ㅠㅠ

해답은 **fully connected layer**를 사용하지 않았기 때문이다!

# 4. Stacked RNN + Softmax layer

In [25]:
sentence = ("if you want to build a ship, don't drum up people together to "
           "collect wood and don't assign them tasks and work, but rather "
           "teach them to long for the endless immensity of the sea.")
sentence

"if you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea."

#### 단어를 벡터화시키는 과정

In [27]:
char_set = list(set(sentence))
char_dic = {w : i for i, w in enumerate(char_set)}

#### hyper parameter 설정

In [29]:
data_dim = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
sequence_length = 10 # 문장이 너무 길어서, 순차적으로 10개씩 골라서 batch size를 늘리려고 한다.

#### batch 만드는 과정

In [31]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length): # 문장의 처음부터, 마지막 10개 단어 전까지
    x_str = sentence[i : i+sequence_length]
    y_str = sentence[i+1 : i+sequence_length+1] # y_label은 x_data의 하나 뒤
    print(i, x_str, '->', y_str)
    
    x = [char_dic[c] for c in x_str]
    y = [char_dic[c] for c in y_str]
    
    dataX.append(x)
    dataY.append(y)
    
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])

X_one_hot = tf.one_hot(X, num_classes)

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [32]:
print(X_one_hot) # batch : 주는대로 / sequence_length = 10, input_dimension = 25

Tensor("one_hot_4:0", shape=(?, 10, 25), dtype=float32)


In [33]:
batch_size = len(dataX)
dataX # batch 형태

[[24, 15, 3, 18, 1, 22, 3, 8, 11, 13],
 [15, 3, 18, 1, 22, 3, 8, 11, 13, 9],
 [3, 18, 1, 22, 3, 8, 11, 13, 9, 3],
 [18, 1, 22, 3, 8, 11, 13, 9, 3, 9],
 [1, 22, 3, 8, 11, 13, 9, 3, 9, 1],
 [22, 3, 8, 11, 13, 9, 3, 9, 1, 3],
 [3, 8, 11, 13, 9, 3, 9, 1, 3, 21],
 [8, 11, 13, 9, 3, 9, 1, 3, 21, 22],
 [11, 13, 9, 3, 9, 1, 3, 21, 22, 24],
 [13, 9, 3, 9, 1, 3, 21, 22, 24, 6],
 [9, 3, 9, 1, 3, 21, 22, 24, 6, 16],
 [3, 9, 1, 3, 21, 22, 24, 6, 16, 3],
 [9, 1, 3, 21, 22, 24, 6, 16, 3, 11],
 [1, 3, 21, 22, 24, 6, 16, 3, 11, 3],
 [3, 21, 22, 24, 6, 16, 3, 11, 3, 7],
 [21, 22, 24, 6, 16, 3, 11, 3, 7, 5],
 [22, 24, 6, 16, 3, 11, 3, 7, 5, 24],
 [24, 6, 16, 3, 11, 3, 7, 5, 24, 17],
 [6, 16, 3, 11, 3, 7, 5, 24, 17, 20],
 [16, 3, 11, 3, 7, 5, 24, 17, 20, 3],
 [3, 11, 3, 7, 5, 24, 17, 20, 3, 16],
 [11, 3, 7, 5, 24, 17, 20, 3, 16, 1],
 [3, 7, 5, 24, 17, 20, 3, 16, 1, 13],
 [7, 5, 24, 17, 20, 3, 16, 1, 13, 14],
 [5, 24, 17, 20, 3, 16, 1, 13, 14, 9],
 [24, 17, 20, 3, 16, 1, 13, 14, 9, 3],
 [17, 20, 3, 16, 1, 

#### Cell 만들기

In [36]:
def lstm_cell():
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    return cell

multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(5)], state_is_tuple=True)

initial_state = multi_cells.zero_state(batch_size, tf.float32)
with tf.variable_scope('fourth'):
    outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, initial_state = initial_state, dtype=tf.float32)

In [37]:
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes]) # outputs의 형태를 변형

X_for_softmax = tf.reshape(outputs, [-1, hidden_size]) # outputs의 형태를 softmax의 input에 알맞은 형태로 변형

#### Softmax layer 만들기

In [38]:
softmax_W = tf.get_variable('softmax_W', [hidden_size, num_classes]) # input dim = hidden_size, output dim = num_classes(동일하다.) 
softmax_b = tf.get_variable('softmax_b', [num_classes]) # bias의 dim은 output dim과 같아야 한다.
outputs = tf.matmul(X_for_softmax, softmax_W) + softmax_b

outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes]) # softmax layer의 ouptut을 다시 sequence_loss 계산 형태로 변형

#### Weight, Loss, Train

In [41]:
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss)

In [42]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(3000):
        l, _, results = sess.run([loss, train, outputs], feed_dict = {X : dataX, Y : dataY})
        for j, result in enumerate(results):
            index = np.argmax(result, axis=1)
            if i%100 == 0:
                if j%10 == 0:
                    print(i, j, ''.join([char_set[t] for t in index]), l)
                    
    for j, result in enumerate(results):
        index = np.argmax(result, axis=1)
        if j is 0:
            print(''.join([char_set[t] for t in index]), end='')
        else:
            print(char_set[index[-1]], end='')

0 0            3.24438
0 10            3.24438
0 20            3.24438
0 30            3.24438
0 40            3.24438
0 50            3.24438
0 60            3.24438
0 70            3.24438
0 80            3.24438
0 90            3.24438
0 100            3.24438
0 110            3.24438
0 120            3.24438
0 130            3.24438
0 140            3.24438
0 150            3.24438
0 160            3.24438
100 0    ou want 0.80807
100 10 hw  luild  0.80807
100 20 t ship, do 0.80807
100 30 n't dnum u 0.80807
100 40 t people t 0.80807
100 50 h ethen to 0.80807
100 60 nlolleht w 0.80807
100 70 ood and do 0.80807
100 80 n't dhsign 0.80807
100 90 dthe  to k 0.80807
100 100 s and woo' 0.80807
100 110 s lut r th 0.80807
100 120 e  teahh t 0.80807
100 130 hem to bon 0.80807
100 140 dtfor the  0.80807
100 150 tod ess im 0.80807
100 160  ensity of 0.80807
200 0 g you want 0.264466
200 10 haa build  0.264466
200 20 tnship, do 0.264466
200 30  't drum u 0.264466
200 40   people t 0.264466
200 

1800 0 t you want 0.228659
1800 10  ro build  0.228659
1800 20 tnship, do 0.228659
1800 30  't drum u 0.228659
1800 40 i people t 0.228659
1800 50   ether to 0.228659
1800 60  collect w 0.228659
1800 70 ord and do 0.228659
1800 80  't dssign 0.228659
1800 90 dthem task 0.228659
1800 100 s and work 0.228659
1800 110 s but rath 0.228659
1800 120 er toach t 0.228659
1800 130  er to lon 0.228659
1800 140 d for the  0.228659
1800 150 tndless im 0.228659
1800 160  ensity of 0.228659
1900 0 t you want 0.228652
1900 10 hao build  0.228652
1900 20 tnship, do 0.228652
1900 30  't arum u 0.228652
1900 40 t people t 0.228652
1900 50 h ether to 0.228652
1900 60  collect w 0.228652
1900 70 ord and do 0.228652
1900 80  't assign 0.228652
1900 90 dthem task 0.228652
1900 100 s and work 0.228652
1900 110 s but rath 0.228652
1900 120 er teach t 0.228652
1900 130 her ta lon 0.228652
1900 140 d for the  0.228652
1900 150 tndless im 0.228652
1900 160  ensity of 0.228652
2000 0 g you want 0.22879
2000 10 ht

2700번을 넘어서면서 다시 loss가 증가하는 문제가 발생했다. 

반복 횟수를 2000번으로 줄여보자.

In [44]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(2000):
        l, _, results = sess.run([loss, train, outputs], feed_dict = {X : dataX, Y : dataY})
        for j, result in enumerate(results):
            index = np.argmax(result, axis=1)
            if i%100 == 0:
                if j%10 == 0:
                    print(i, j, ''.join([char_set[t] for t in index]), l)
                    
    for j, result in enumerate(results):
        index = np.argmax(result, axis=1)
        if j is 0:
            print(''.join([char_set[t] for t in index]), end='')
        else:
            print(char_set[index[-1]], end='')

0 0 ssssssssss 3.243
0 10 ssssssssss 3.243
0 20 ssssssssss 3.243
0 30 ssssssssss 3.243
0 40 ssssssssss 3.243
0 50 ssssssssss 3.243
0 60 ssssssssss 3.243
0 70 ssssssssss 3.243
0 80 ssssssssss 3.243
0 90 ssssssssss 3.243
0 100 ssssssssss 3.243
0 110 ssssssssss 3.243
0 120 ssssssssss 3.243
0 130 ssssssssss 3.243
0 140 ssssssssss 3.243
0 150 ssssssssss 3.243
0 160 ssssssssss 3.243
100 0          t 2.69238
100 10   t     t  2.69238
100 20         t  2.69238
100 30          t 2.69238
100 40          t 2.69238
100 50       t t  2.69238
100 60   t t  t   2.69238
100 70     t t t  2.69238
100 80            2.69238
100 90   t t    t 2.69238
100 100       t    2.69238
100 110   t t      2.69238
100 120          t 2.69238
100 130   t        2.69238
100 140     t t  t 2.69238
100 150     t t    2.69238
100 160            2.69238
200 0 t ton aont 1.34123
200 10 hth but t  1.34123
200 20 tnmhebo to 1.34123
200 30 nst asut u 1.34123
200 40   asodla t 1.34123
200 50 hekthet to 1.34123
200 60 nto lect a

1800 0 g you want 0.25162
1800 10 hwo build  0.25162
1800 20 tnship, do 0.25162
1800 30 n't drum u 0.25162
1800 40 m people t 0.25162
1800 50 h ether to 0.25162
1800 60 ncollect w 0.25162
1800 70 ood and do 0.25162
1800 80 n't dssign 0.25162
1800 90 dthem task 0.25162
1800 100 , and work 0.25162
1800 110 , but rath 0.25162
1800 120 em toach t 0.25162
1800 130 hem ta lon 0.25162
1800 140 d for the  0.25162
1800 150 todless im 0.25162
1800 160  ensity of 0.25162
1900 0 f you want 0.244033
1900 10  wo build  0.244033
1900 20 tnship, do 0.244033
1900 30 n't drum u 0.244033
1900 40 m people t 0.244033
1900 50   ether to 0.244033
1900 60 ncollect w 0.244033
1900 70 ord and do 0.244033
1900 80 n't dssign 0.244033
1900 90 dthem task 0.244033
1900 100 , and work 0.244033
1900 110 , but rath 0.244033
1900 120 em toach t 0.244033
1900 130  em ta lon 0.244033
1900 140 d for the  0.244033
1900 150 todless im 0.244033
1900 160  ensity of 0.244033
f you want to build a ship, don't drum up people toge

훌륭한 결과가 나왔다!

하지만 계속 loss가 늘었다 줄었다 하는 문제는 어떻게 해결해야 할 것인가? 

**parameter 조정**이 답으로 보인다.