## Further improving the simple net in TensorFlow with Dropout

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.regularizers import l2

In [2]:
EPOCHS = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3

In [3]:
# Loading MNIST dataset
# Labels have one-hot representation
mnist = keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [4]:
RESHAPED = 784
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [5]:
# Normalize inputs within [0, 1]
X_train, X_test = X_train / 255.0, X_test / 255.0
print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


In [6]:
# one-hot representations for labels
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

In [7]:
print(Y_train.shape)
print(Y_test.shape)

(60000, 10)
(10000, 10)


In [8]:
print(type(Y_train.shape))

<class 'tuple'>


In [9]:
# Building the model
model = tf.keras.models.Sequential()
# kernel_initializer 파라미터는 가중치의 초기값을 지정할 때 사용한다. 사용가능한 값은 아래와 같다
# random_uniform : -0.05 to 0.05의 uniform dustribution 값으로 초기화
# random_normal : 표준편차 0.05의 정규분포 값으로 초기화
# zero : 0으로 초기화
# 더 많은 정보는 https://www.tensorflow.org/api_docs/python/tf/keras/initializers
model.add(keras.layers.Dense(N_HIDDEN, input_shape = (RESHAPED,), name='dense_layer', activation='relu', kernel_initializer='random_normal', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(N_HIDDEN, name='dense_layer_2', activation='relu'))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(NB_CLASSES, name='dense_layer_3', activation='softmax'))

In [10]:
# Summary of the model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_layer_2 (Dense)        (None, 128)               16512     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_layer_3 (Dense)        (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Compiling the model
# 사용 가능한 objective function은 아래와 같다
# MSE : mean squared error between predictions and the true values
# binary_crossentropy : 바이너리 레이블 예측에 적합하다
# categorical_crossentropy : 멀티클래스 로그 손실함수이다. 예측값의 분포와 실제 분포를 비교한다. 멀티클래스 레이블 예측에 적합하다. softmax activation과 함께 사용하는 것이 기본선택이다.
# 더 많은 정보는 https://www.tensorflow.org/api_docs/python/tf/keras/losses
# metrics 파라미터는 오직 모델을 평가하는데에 사용할 방법을 설정한다. 모델 학습에는 사용되지 않는다
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
# Training the model
#model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)
model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=10, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

Train on 48000 samples, validate on 12000 samples
Epoch 1/10
  128/48000 [..............................] - ETA: 5:02

InternalError:  Blas GEMM launch failed : a.shape=(128, 784), b.shape=(784, 128), m=128, n=128, k=784
	 [[node sequential/dense_layer/MatMul (defined at <ipython-input-13-07ec93085e4a>:3) ]] [Op:__inference_distributed_function_962]

Function call stack:
distributed_function


In [13]:
# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, Y_test)
print(test_loss)
print(test_acc)

0.9439802231788635
0.9262


In [14]:
model.predict(X_test[0].reshape((1,784)))

array([[1.0022186e-03, 1.8917049e-04, 2.5988149e-03, 4.8188246e-03,
        1.9230421e-04, 3.1679703e-04, 5.5968205e-05, 9.8494798e-01,
        1.2582738e-04, 5.7521351e-03]], dtype=float32)

In [15]:
Y_test[0]

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)

In [16]:
model.predict_classes(X_test[0].reshape((1,784)))

array([7], dtype=int64)

In [17]:
model.predict_proba(X_test[0].reshape((1,784)))

array([[1.0022186e-03, 1.8917049e-04, 2.5988149e-03, 4.8188246e-03,
        1.9230421e-04, 3.1679703e-04, 5.5968205e-05, 9.8494798e-01,
        1.2582738e-04, 5.7521351e-03]], dtype=float32)

#### get layer outputs

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_layer_2 (Dense)        (None, 128)               16512     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_layer_3 (Dense)        (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [22]:
for i in model.layers:
    print(i.output)

Tensor("dense_layer/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dropout/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dense_layer_2/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dropout_1/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dense_layer_3/Identity:0", shape=(None, 10), dtype=float32)


In [26]:
layer_output = model.get_layer('dense_layer').output
print(layer_output)

Tensor("dense_layer/Identity:0", shape=(None, 128), dtype=float32)


In [28]:
intermediate_model = tf.keras.models.Model(inputs=model.input, outputs=layer_output)
print(intermediate_model)

<tensorflow.python.keras.engine.training.Model object at 0x000002015B944108>


In [34]:
intermediate_prediction = intermediate_model.predict(X_test[0].reshape((1,784)))
print(len(intermediate_prediction[0]))
print(intermediate_prediction)

128
[[0.         0.         0.         0.24375299 0.         0.
  0.00944293 0.10080329 0.         0.         0.5456904  0.
  0.44569224 0.         0.         0.05823062 0.         0.56302917
  0.18117218 0.         0.         0.         0.         0.6864022
  0.14672112 0.24486373 0.         0.         0.5718766  0.
  0.         0.         0.5674582  0.32475635 0.         0.
  0.25054878 0.27350983 0.         0.         0.39051893 0.
  0.07821328 0.4749813  0.         0.         0.7769445  0.
  0.         0.         0.8559241  0.01234642 0.1517911  0.39506087
  0.         0.         0.         0.         0.         0.
  0.         0.21880376 0.1171114  0.         0.         0.
  0.         0.         0.         0.         0.         0.6183039
  0.06013675 0.48903206 0.         0.         0.2492919  0.
  0.15973702 0.         0.         0.01269192 0.5001807  0.2905659
  0.         0.         0.         0.27305    0.         0.
  0.         0.00930531 0.25227898 0.         0.         0.

In [35]:
layer_output = model.get_layer('dense_layer_3').output
print(layer_output)
intermediate_model = tf.keras.models.Model(inputs=model.input, outputs=layer_output)
print(intermediate_model)
intermediate_prediction = intermediate_model.predict(X_test[0].reshape((1,784)))
print(len(intermediate_prediction[0]))
print(intermediate_prediction)

Tensor("dense_layer_3/Identity:0", shape=(None, 10), dtype=float32)
<tensorflow.python.keras.engine.training.Model object at 0x000002015BE89D48>
10
[[1.0022186e-03 1.8917049e-04 2.5988149e-03 4.8188246e-03 1.9230421e-04
  3.1679703e-04 5.5968205e-05 9.8494798e-01 1.2582738e-04 5.7521351e-03]]


In [36]:
model.predict_proba(X_test[0].reshape((1,784)))

array([[1.0022186e-03, 1.8917049e-04, 2.5988149e-03, 4.8188246e-03,
        1.9230421e-04, 3.1679703e-04, 5.5968205e-05, 9.8494798e-01,
        1.2582738e-04, 5.7521351e-03]], dtype=float32)

## Further improving the simple net in TensorFlow with Dropout (Using subclassing)

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.regularizers import l2

In [2]:
EPOCHS = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3

In [3]:
# Loading MNIST dataset
# Labels have one-hot representation
mnist = keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [4]:
RESHAPED = 784
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [5]:
# Normalize inputs within [0, 1]
X_train, X_test = X_train / 255.0, X_test / 255.0
print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


In [6]:
# one-hot representations for labels
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

In [7]:
print(Y_train.shape)
print(Y_test.shape)

(60000, 10)
(10000, 10)


In [8]:
print(type(Y_train.shape))

<class 'tuple'>


In [83]:
class MNISTModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super(MNISTModel, self).__init__(**kwargs)
        #self.input_layer = tf.keras.layers.InputLayer(input_shape=(RESHAPED,))
        self.dense_1 = tf.keras.layers.Dense(N_HIDDEN, input_shape = (RESHAPED,), name='dense_layer', activation='relu', kernel_initializer='random_normal', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01))
        #self.dense_1 = tf.keras.layers.Dense(N_HIDDEN, name='dense_layer', activation='relu', kernel_initializer='random_normal', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01))
        self.dropout_1 = keras.layers.Dropout(DROPOUT)
        self.dense_2 = keras.layers.Dense(N_HIDDEN, name='dense_layer_2', activation='relu')
        self.dropout_2 = keras.layers.Dropout(DROPOUT)
        self.dense_3 = keras.layers.Dense(NB_CLASSES, name='dense_layer_3', activation='softmax')

    def call(self, x):
        print("\n(from call)", x, "\n")
        #x = self.input_layer(x)
        x = self.dense_1(x)
        x = self.dropout_1(x)
        x = self.dense_2(x)
        x = self.dropout_2(x)
        x = self.dense_3(x)
        return x

In [84]:
model = MNISTModel()

In [85]:
model.build(input_shape=(BATCH_SIZE, RESHAPED))
#model.call()


(from call) Tensor("Placeholder:0", shape=(128, 784), dtype=float32) 



In [86]:
# Summary of the model
model.summary()

Model: "mnist_model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          multiple                  100480    
_________________________________________________________________
dropout_18 (Dropout)         multiple                  0         
_________________________________________________________________
dense_layer_2 (Dense)        multiple                  16512     
_________________________________________________________________
dropout_19 (Dropout)         multiple                  0         
_________________________________________________________________
dense_layer_3 (Dense)        multiple                  1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [87]:
# Compiling the model
# 사용 가능한 objective function은 아래와 같다
# MSE : mean squared error between predictions and the true values
# binary_crossentropy : 바이너리 레이블 예측에 적합하다
# categorical_crossentropy : 멀티클래스 로그 손실함수이다. 예측값의 분포와 실제 분포를 비교한다. 멀티클래스 레이블 예측에 적합하다. softmax activation과 함께 사용하는 것이 기본선택이다.
# 더 많은 정보는 https://www.tensorflow.org/api_docs/python/tf/keras/losses
# metrics 파라미터는 오직 모델을 평가하는데에 사용할 방법을 설정한다. 모델 학습에는 사용되지 않는다
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

In [88]:
# Training the model
#model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)
model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)


(from call) Tensor("input_1_8:0", shape=(None, 784), dtype=float32) 

Train on 48000 samples, validate on 12000 samples

(from call) Tensor("IteratorGetNext:0", shape=(128, 784), dtype=float32) 


(from call) Tensor("IteratorGetNext:0", shape=(128, 784), dtype=float32) 

(from call) Tensor("IteratorGetNext:0", shape=(None, 784), dtype=float32) 



<tensorflow.python.keras.callbacks.History at 0x134f4346588>

In [89]:
# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, Y_test)
print(test_loss)
print(test_acc)

3.435132928466797
0.7904


In [90]:
model.predict(X_test[0].reshape((1,784)))


(from call) Tensor("IteratorGetNext:0", shape=(None, 784), dtype=float32) 



array([[0.04691237, 0.05356425, 0.0387825 , 0.03331324, 0.0776775 ,
        0.06659386, 0.02509583, 0.46809772, 0.05975781, 0.13020496]],
      dtype=float32)

In [76]:
Y_test[0]

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)

In [77]:
#model.predict_classes(X_test[0].reshape((1,784)))
np.argmax(model.predict(X_test[0].reshape((1,784))), axis=1)

array([7], dtype=int64)

In [78]:
#model.predict_proba(X_test[0].reshape((1,784)))
model.predict(X_test[0].reshape((1,784)))

array([[0.01732846, 0.04090693, 0.02851238, 0.05173352, 0.05279667,
        0.05809215, 0.03244508, 0.5316876 , 0.03964854, 0.14684866]],
      dtype=float32)

#### get layer outputs

In [79]:
model.summary()

Model: "mnist_model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          multiple                  100480    
_________________________________________________________________
dropout_14 (Dropout)         multiple                  0         
_________________________________________________________________
dense_layer_2 (Dense)        multiple                  16512     
_________________________________________________________________
dropout_15 (Dropout)         multiple                  0         
_________________________________________________________________
dense_layer_3 (Dense)        multiple                  1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [80]:
output_model = MNISTModel()
inputs = tf.keras.Input(shape=(RESHAPED,))
output_model.call(inputs)
#output_model.build(input_shape=(BATCH_SIZE, RESHAPED))


(from call) Tensor("input_8:0", shape=(None, 784), dtype=float32) 



<tf.Tensor 'dense_layer_3/Identity:0' shape=(None, 10) dtype=float32>

In [81]:
output_model.input

AttributeError: Layer mnist_model_8 is not connected, no input to return.

In [82]:
for i in output_model.layers:
    print(i.output)

Tensor("dense_layer/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dropout_16/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dense_layer_2/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dropout_17/Identity:0", shape=(None, 128), dtype=float32)
Tensor("dense_layer_3/Identity:0", shape=(None, 10), dtype=float32)


In [53]:
model.input

AttributeError: Layer mnist_model_5 is not connected, no input to return.

In [None]:
layer_output = model_output.get_layer('dense_layer').output
print(layer_output)

In [146]:
inputs = tf.keras.Input(shape=(RESHAPED,))
model.call(inputs)

<tf.Tensor 'dense_layer_3_6/Identity:0' shape=(None, 10) dtype=float32>

In [26]:
#intermediate_model = tf.keras.models.Model(inputs=model.input, outputs=model.dense_1)
intermediate_model = tf.keras.models.Model(inputs=tf.keras.Input(shape=(RESHAPED,)), outputs=model.dense_1)
print(intermediate_model)

AttributeError: 'Dense' object has no attribute 'op'

In [98]:
intermediate_model = tf.keras.models.Model(inputs=model.input, outputs=layer_output)
print(intermediate_model)

AttributeError: Layer mnist_model_10 is not connected, no input to return.

In [34]:
intermediate_prediction = intermediate_model.predict(X_test[0].reshape((1,784)))
print(len(intermediate_prediction[0]))
print(intermediate_prediction)

128
[[0.         0.         0.         0.24375299 0.         0.
  0.00944293 0.10080329 0.         0.         0.5456904  0.
  0.44569224 0.         0.         0.05823062 0.         0.56302917
  0.18117218 0.         0.         0.         0.         0.6864022
  0.14672112 0.24486373 0.         0.         0.5718766  0.
  0.         0.         0.5674582  0.32475635 0.         0.
  0.25054878 0.27350983 0.         0.         0.39051893 0.
  0.07821328 0.4749813  0.         0.         0.7769445  0.
  0.         0.         0.8559241  0.01234642 0.1517911  0.39506087
  0.         0.         0.         0.         0.         0.
  0.         0.21880376 0.1171114  0.         0.         0.
  0.         0.         0.         0.         0.         0.6183039
  0.06013675 0.48903206 0.         0.         0.2492919  0.
  0.15973702 0.         0.         0.01269192 0.5001807  0.2905659
  0.         0.         0.         0.27305    0.         0.
  0.         0.00930531 0.25227898 0.         0.         0.

In [35]:
layer_output = model.get_layer('dense_layer_3').output
print(layer_output)
intermediate_model = tf.keras.models.Model(inputs=model.input, outputs=layer_output)
print(intermediate_model)
intermediate_prediction = intermediate_model.predict(X_test[0].reshape((1,784)))
print(len(intermediate_prediction[0]))
print(intermediate_prediction)

Tensor("dense_layer_3/Identity:0", shape=(None, 10), dtype=float32)
<tensorflow.python.keras.engine.training.Model object at 0x000002015BE89D48>
10
[[1.0022186e-03 1.8917049e-04 2.5988149e-03 4.8188246e-03 1.9230421e-04
  3.1679703e-04 5.5968205e-05 9.8494798e-01 1.2582738e-04 5.7521351e-03]]


In [36]:
model.predict_proba(X_test[0].reshape((1,784)))

array([[1.0022186e-03, 1.8917049e-04, 2.5988149e-03, 4.8188246e-03,
        1.9230421e-04, 3.1679703e-04, 5.5968205e-05, 9.8494798e-01,
        1.2582738e-04, 5.7521351e-03]], dtype=float32)