## Multilayer Perceptron practice (with MNIST hand written dataset)

In [23]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
from IPython.display import Image

Instructions for updating:
non-resource variables are not supported in the long term


In [12]:
Image(url = "https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/simple_mlp_mnist.png", width=500, height=250)

In [2]:
(x_train, y_train), (x_test, y_test)= tf.keras.datasets.mnist.load_data()

In [3]:
print(x_train.shape)
print(x_test.shape)

(60000, 28, 28)
(10000, 28, 28)


#### train data has 60,000 samples
#### test data has 10,000 samples
#### every data is 28 * 28 pixels

### split train data into train and validation data

In [4]:
x_val = x_train[50000:60000]
x_train = x_train[0:50000]

y_val = y_train[50000:60000]
y_train = y_train[0:50000]

In [7]:
print("train data has " + str(x_train.shape[0]) + "samples")
print("every train data is " + str(x_train.shape[1])
     + " * " + str(x_train.shape[2]) + " images")

train data has 50000samples
every train data is 28 * 28 images


In [8]:
print("validation data has " + str(x_val.shape[0]) + "samples")
print("every validation data is " + str(x_val.shape[1])
     + " * " + str(x_train.shape[2]) + " images")

validation data has 10000samples
every validation data is 28 * 28 images


In [9]:
#sample to show gray scale values
print(x_train[0][8])

[  0   0   0   0   0   0   0  18 219 253 253 253 253 253 198 182 247 241
   0   0   0   0   0   0   0   0   0   0]


In [10]:
#sample to show labels for first train data to 10th train data
print(y_train[0:9])

[5 0 4 1 9 2 1 3 1]


### Reshape
#### In order to fully connect all pixels to hidden layer,
#### we will reshape (28 * 28) into (28 x 28,1) shape
#### It means we flatten row x column shape to an array having 28 x 28 (756) items

In [13]:
Image(url="https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/reshape_mnist.png", width=500, height=250)

In [14]:
x_train = x_train.reshape(50000,784)
x_val = x_val.reshape(10000, 784)
x_test = x_test.reshape(10000, 784)

print(x_train.shape)
print(x_test.shape)

(50000, 784)
(10000, 784)


In [15]:
x_train[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

### Normalize the data
#### normalization usually helps faster learning speed, better performance
#### by reducing variance and giving sample range to all input features
#### since MNIST dataset all input has 0 to 255, normalization only helps reducing variances.
#### It turned out normalization is better than standardization for MNIST data with this MLP architecture,
#### I believe this is because relu handles 0 differently on both feed forward and back propagation.

In [16]:
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_test = x_test.astype('float32')

gray_scale = 255
x_train /= gray_scale
x_val/= gray_scale
x_test /= gray_scale

### Label to one hot encoding value

In [17]:
num_classes = 10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [18]:
y_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

### Tensorflow MLP Graph

In [20]:
Image(url = "https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/simple_mlp_mnist.png", width=500, height=250)

In [24]:
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

In [25]:
def mlp(x):
    #hidden layer1
    w1 = tf.Variable(tf.random_uniform([784,256]))
    b1 = tf.Variable(tf.zeros([256]))
    h1 = tf.nn.relu(tf.matmul(x, w1) + b1)
    
    #hidden layer2
    w2 = tf.Variable(tf.random_uniform([256,128]))
    b2 = tf.Variable(tf.zeros([128]))
    h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
    
    #output layer
    w3 = tf.Variable(tf.random_uniform([128,10]))
    b3 = tf.Variable(tf.zeros([10]))
    logits = tf.matmul(h2, w3) + b3
    
    return logits

In [26]:
logits = mlp(x)

In [27]:
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y))

In [28]:
train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss_op)

In [29]:
#Initialize
init = tf.global_variables_initializer()

#train hyperparameters
epoch_cnt = 30
batch_size = 1000
iteration = len(x_train) // batch_size

#start training
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epoch_cnt):
        avg_loss = 0.
        start = 0; end = batch_size
        
        for i in range(iteration):
            _, loss = sess.run([train_op, loss_op],
                              feed_dict={x: x_train[start: end], y: y_train[start: end]})
            start += batch_size; end += batch_size
            #Compute average loss
            avg_loss += loss / iteration
        
        #Validation model
        preds = tf.nn.softmax(logits)  #Apply softmax to logits
        correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))
        #Calculation accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        cur_val_acc = accuracy.eval({x: x_val, y: y_val})
        print("epoch: " + str(epoch)+ ", validation accuracy: "
             + str(cur_val_acc) + ', loss: ' + str(avg_loss))
        
    #Test model
    preds = tf.nn.softmax(logits) #Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y,1))
    
    #Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("[Test Accuracy]: ", accuracy.eval({x: x_test, y:y_test}))

epoch: 0, validation accuracy: 0.2411, loss: 8729.012952880857
epoch: 1, validation accuracy: 0.8024, loss: 316.9726511383056
epoch: 2, validation accuracy: 0.8839, loss: 13.898555307388305
epoch: 3, validation accuracy: 0.8905, loss: 8.915013351440432
epoch: 4, validation accuracy: 0.8918, loss: 7.073236160278321
epoch: 5, validation accuracy: 0.886, loss: 5.9495095634460435
epoch: 6, validation accuracy: 0.9012, loss: 5.210494146347043
epoch: 7, validation accuracy: 0.8986, loss: 4.741097044944763
epoch: 8, validation accuracy: 0.9019, loss: 4.8435790872573845
epoch: 9, validation accuracy: 0.9114, loss: 5.493424115180967
epoch: 10, validation accuracy: 0.8852, loss: 4.279828128814699
epoch: 11, validation accuracy: 0.882, loss: 4.192748355865479
epoch: 12, validation accuracy: 0.8986, loss: 5.780694804191589
epoch: 13, validation accuracy: 0.8772, loss: 5.873708848953247
epoch: 14, validation accuracy: 0.9161, loss: 4.3180318069458
epoch: 15, validation accuracy: 0.7365, loss: 26.30

#### reference: https://www.dacon.io/competitions/open/235596/talkboard/400664/