In [38]:
# Keras import
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization
from keras.wrappers.scikit_learn import KerasClassifier

# Sklearn import
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

import time

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
num_classes = 10

print("X_train shape: {}".format(X_train.shape))
print("X_train type: {}".format(type(X_train)))
print("y_train shape: {}".format(y_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))

X_train shape: (60000, 28, 28)
X_train type: <class 'numpy.ndarray'>
y_train shape: (60000,)
X_test shape: (10000, 28, 28)
y_test shape: (10000,)


In [3]:
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))

X_train shape: (60000, 784)
X_test shape: (10000, 784)


In [4]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
print(y_train[0])

y_train shape: (60000, 10)
y_test shape: (10000, 10)
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [18]:
#Two-Layer Network
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(784,)))
model.add(Dense(64, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(
            loss=keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy']
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 64)                50240     
_________________________________________________________________
dense_20 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_21 (Dense)             (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


In [19]:
start_time = time.time()
model.fit(
    X_train, 
    y_train,
    epochs=10,
    batch_size=100,
    verbose=1
)
print("End Time: ", time.time() - start_time)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
End Time:  13.654205083847046


## Without Batch Normalization

In [20]:
model = Sequential()

#input layer
model.add(Dense(64, input_shape=(784,)))
model.add(Activation('relu'))

#Hidden layer
model.add(Dense(64))
model.add(Activation('relu'))

#output layer
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(
            loss=keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy']
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 64)                50240     
_________________________________________________________________
activation_9 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_23 (Dense)             (None, 64)                4160      
_________________________________________________________________
activation_10 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_24 (Dense)             (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


In [21]:
start_time = time.time()
model.fit(
    X_train, 
    y_train,
    epochs=10,
    batch_size=100,
    verbose=1
)
print("End Time: ", time.time() - start_time)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
End Time:  13.867414236068726


## Added Batch Normalization

In [22]:
model = Sequential()

#input layer
model.add(Dense(64, input_shape=(784,)))
model.add(BatchNormalization())
model.add(Activation('relu'))

#Hidden layer
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))

#output layer
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(
            loss=keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy']
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 64)                50240     
_________________________________________________________________
batch_normalization_5 (Batch (None, 64)                256       
_________________________________________________________________
activation_11 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_26 (Dense)             (None, 64)                4160      
_________________________________________________________________
batch_normalization_6 (Batch (None, 64)                256       
_________________________________________________________________
activation_12 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_27 (Dense)             (None, 10)                650       
Total para

In [23]:
start_time = time.time()
model.fit(
    X_train, 
    y_train,
    epochs=10,
    batch_size=100,
    verbose=1
)
print("End Time: ", time.time() - start_time)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
End Time:  21.03902792930603


## initializer 

In [39]:
def create_model(init='zeros'):
    model = Sequential()

    #input layer
    model.add(Dense(64, kernel_initializer=init, input_shape=(784,)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    #Hidden layer
    model.add(Dense(64, kernel_initializer=init))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    #output layer
    model.add(Dense(num_classes, activation='softmax'))

    model.summary()
    model.compile(
                loss=keras.losses.categorical_crossentropy,
                optimizer='adam',
                metrics=['accuracy']
    )
    
    return model

In [40]:
param_grid = {
    'init': ['zeros', 'ones', 'glorot_uniform', 'normal', 'uniform'],
}

In [44]:
start_time = time.time()
model = KerasClassifier(build_fn=create_model, epochs=10, verbose=1)

grid = GridSearchCV(estimator=model, cv=2, param_grid=param_grid)
grid_result = grid.fit(X_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
print("End Time: ", time.time() - start_time)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_40 (Dense)             (None, 64)                50240     
_________________________________________________________________
batch_normalization_14 (Batc (None, 64)                256       
_________________________________________________________________
activation_20 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_41 (Dense)             (None, 64)                4160      
_________________________________________________________________
batch_normalization_15 (Batc (None, 64)                256       
_________________________________________________________________
activation_21 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_42 (Dense)             (None, 10)                650       
Total para