##### Further improving the simple net in TensorFlow with Dropout

In [19]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.regularizers import l2

In [20]:
EPOCHS = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3

In [21]:
# Loading MNIST dataset
# Labels have one-hot representation
mnist = keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [22]:
RESHAPED = 784
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [23]:
# Normalize inputs within [0, 1]
X_train, X_test = X_train / 255.0, X_test / 255.0
print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


In [24]:
# one-hot representations for labels
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

In [25]:
print(Y_train.shape)
print(Y_test.shape)

(60000, 10)
(10000, 10)


In [26]:
# Building the model
model = tf.keras.models.Sequential()
# kernel_initializer 파라미터는 가중치의 초기값을 지정할 때 사용한다. 사용가능한 값은 아래와 같다
# random_uniform : -0.05 to 0.05의 uniform dustribution 값으로 초기화
# random_normal : 표준편차 0.05의 정규분포 값으로 초기화
# zero : 0으로 초기화
# 더 많은 정보는 https://www.tensorflow.org/api_docs/python/tf/keras/initializers
model.add(keras.layers.Dense(N_HIDDEN, input_shape = (RESHAPED,), name='dense_layer', activation='relu', kernel_initializer='random_normal', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(N_HIDDEN, name='dense_layer_2', activation='relu'))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(NB_CLASSES, name='dense_layer_3', activation='softmax'))

In [27]:
# Summary of the model
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          (None, 128)               100480    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_layer_2 (Dense)        (None, 128)               16512     
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_layer_3 (Dense)        (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [28]:
# Compiling the model
# 사용 가능한 objective function은 아래와 같다
# MSE : mean squared error between predictions and the true values
# binary_crossentropy : 바이너리 레이블 예측에 적합하다
# categorical_crossentropy : 멀티클래스 로그 손실함수이다. 예측값의 분포와 실제 분포를 비교한다. 멀티클래스 레이블 예측에 적합하다. softmax activation과 함께 사용하는 것이 기본선택이다.
# 더 많은 정보는 https://www.tensorflow.org/api_docs/python/tf/keras/losses
# metrics 파라미터는 오직 모델을 평가하는데에 사용할 방법을 설정한다. 모델 학습에는 사용되지 않는다
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
# Training the model
model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

Train on 48000 samples, validate on 12000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x19022984a88>

In [30]:
# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, Y_test)
print(test_loss)
print(test_acc)

0.2080107888698578
0.9628


In [40]:
model.predict(X_test[0].reshape((1,784)))

array([[8.6493987e-05, 4.7528636e-05, 4.0056676e-04, 7.3539105e-04,
        2.3515262e-05, 2.0036303e-05, 3.6781494e-06, 9.9791437e-01,
        1.8494517e-05, 7.4992259e-04]], dtype=float32)

In [41]:
Y_test[0]

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)

In [42]:
model.predict_classes(X_test[0].reshape((1,784)))

array([7], dtype=int64)

In [43]:
model.predict_proba(X_test[0].reshape((1,784)))

array([[8.6493987e-05, 4.7528636e-05, 4.0056676e-04, 7.3539105e-04,
        2.3515262e-05, 2.0036303e-05, 3.6781494e-06, 9.9791437e-01,
        1.8494517e-05, 7.4992259e-04]], dtype=float32)