# **Tutorial 2 : Predicting next integer**

In [1]:
import numpy as np
import tensorflow as tf

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

#### **Problem : RNN 모델을 이용하여 다음 정수를 예측해보자.**

In [2]:
# Integer sequence 생성
int_seq = [i for i in range(101)]

### **Input data**

In [3]:
# Parameters
SEQUENCE_LENGTH = 15
HIDDEN_SIZE = 20
DIM_INPUT = 1
DIM_OUTPUT = 1
# Epochs를 2000 이상 일 때, 0~100사이의 정수들에 대한 학습이 제대로 이루어졌음
EPOCHS = 2000
BATCH_SIZE = 10

In [4]:
input_data = []
targets = []
for i in range(len(int_seq) - SEQUENCE_LENGTH):
    input_data.append(int_seq[i: i+SEQUENCE_LENGTH])
    targets.append(int_seq[i+SEQUENCE_LENGTH])

In [5]:
# Input data의 shape 주의
input_data = np.array(input_data).reshape(len(input_data), SEQUENCE_LENGTH, 1)
targets = np.array(targets).reshape(len(targets), 1)

### **Model**
> - Inference

In [6]:
# Model inference 정의
def inference(x, batch_size):
    rnn_cell = tf.contrib.rnn.BasicRNNCell(HIDDEN_SIZE)
    # Initial state를 생성한다. Batch_size는 placeholder가 되야 한다.
    initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
    # Dynamic_rnn vs BasicRNNCell __call__
    cell_outputs, state = tf.nn.dynamic_rnn(rnn_cell, x, initial_state=initial_state, dtype=tf.float32)
    final_output = cell_outputs[:, -1, :]
    # Weight for final output
    V = tf.Variable(tf.truncated_normal([HIDDEN_SIZE, DIM_OUTPUT], stddev=0.01, dtype=tf.float32))
    # Biase for final output
    c = tf.Variable(tf.zeros([DIM_OUTPUT], dtype=tf.float32))
    # Output
    y = tf.matmul(final_output, V) + c
    
    return y

> - Loss function

In [7]:
# Loss function 정의
def loss_func(labels, predictions):
    return tf.losses.mean_squared_error(labels, predictions)

> - Train

In [8]:
# Train 정의
def train(loss):
    return tf.train.AdamOptimizer().minimize(loss)

> - Placeholder

In [9]:
# Model에서 사용하는 placeholder 정의
X = tf.placeholder(dtype=tf.float32, shape=(None, SEQUENCE_LENGTH, DIM_INPUT))
t = tf.placeholder(dtype=tf.float32, shape=(None, DIM_OUTPUT))
# Scala 또는 vector로 받기 위해서, 빈 리스트 형태로 받는다.
batch_size = tf.placeholder(tf.int32, [])

### **Graph node 연결**

In [10]:
# Prediction(inference)
y = inference(X, batch_size)
# Loss
loss = loss_func(t, y)
# Eval this train step
train_step = train(loss)

### **Session 생성 및 run**

In [11]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(input_data, targets, test_size=0.1)

In [13]:
len(X_train)

77

In [14]:
num_batches = len(X_train) // BATCH_SIZE
num_validation = len(X_test)

for epoch in range(EPOCHS):
    X_samp, y_samp = shuffle(X_train, y_train)
    for i in range(num_batches):
        start = i * BATCH_SIZE
        end = start + BATCH_SIZE
        sess.run(train_step, feed_dict={
            X: X_samp[start:end],
            t: y_samp[start:end],
            batch_size: BATCH_SIZE
        })
        
        loss_val = loss.eval(session=sess, feed_dict={
            X: X_test,
            t: y_test,
            batch_size: num_validation
        })
    print('%d epoch: %f' %(epoch, loss_val))

0 epoch: 4528.158203
1 epoch: 4509.878906
2 epoch: 4490.870605
3 epoch: 4471.804688
4 epoch: 4452.841309
5 epoch: 4433.900391
6 epoch: 4415.004883
7 epoch: 4396.171387
8 epoch: 4377.408691
9 epoch: 4358.882812
10 epoch: 4340.114746
11 epoch: 4321.684570
12 epoch: 4303.610352
13 epoch: 4285.490234
14 epoch: 4267.546875
15 epoch: 4249.431641
16 epoch: 4231.505859
17 epoch: 4213.898926
18 epoch: 4196.068359
19 epoch: 4178.578613
20 epoch: 4161.514160
21 epoch: 4143.934570
22 epoch: 4126.459961
23 epoch: 4108.950684
24 epoch: 4091.304199
25 epoch: 4074.170898
26 epoch: 4057.105469
27 epoch: 4039.734863
28 epoch: 4022.396240
29 epoch: 4005.145752
30 epoch: 3988.095947
31 epoch: 3971.322998
32 epoch: 3954.280029
33 epoch: 3937.247803
34 epoch: 3920.555664
35 epoch: 3903.817383
36 epoch: 3887.282959
37 epoch: 3870.997070
38 epoch: 3854.786865
39 epoch: 3838.280029
40 epoch: 3821.858887
41 epoch: 3805.545654
42 epoch: 3789.156250
43 epoch: 3772.991211
44 epoch: 3756.421875
45 epoch: 3740.17480

### **Prediction**

In [18]:
y.eval(session=sess, feed_dict={
    X: input_data,
    batch_size:86
})

array([[15.00689 ],
       [16.001541],
       [16.997845],
       [17.99496 ],
       [18.992609],
       [19.990704],
       [20.989212],
       [21.98809 ],
       [22.9873  ],
       [23.986729],
       [24.98624 ],
       [25.985645],
       [26.98471 ],
       [27.98322 ],
       [28.981052],
       [29.978275],
       [30.975204],
       [31.972431],
       [32.97073 ],
       [33.97082 ],
       [34.973103],
       [35.97753 ],
       [36.983814],
       [37.991493],
       [39.000053],
       [40.008984],
       [41.0177  ],
       [42.025578],
       [43.032032],
       [44.036526],
       [45.038715],
       [46.03851 ],
       [47.03623 ],
       [48.03249 ],
       [49.028194],
       [50.02422 ],
       [51.021366],
       [52.020237],
       [53.02125 ],
       [54.024654],
       [55.0305  ],
       [56.03876 ],
       [57.049427],
       [58.06255 ],
       [59.077755],
       [60.093   ],
       [61.103794],
       [62.105885],
       [63.100254],
       [64.093506],
