In [11]:
import tensorflow.keras as keras
import numpy as np
from functools import partial

In [47]:
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.mnist.load_data()

In [48]:
x_train_full = x_train_full / 255.
x_test = x_test/255.

In [49]:
x_train, x_valid = x_train_full[:-5000], x_train_full[-5000:]
y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]

In [50]:
x_train = x_train[..., np.newaxis]
x_valid = x_valid[..., np.newaxis]
x_test = x_test[..., np.newaxis]

In [12]:
DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3,
                        padding='same', activation='relu', kernel_initializer='he_normal')

In [16]:
model = keras.Sequential([
    DefaultConv2D(filters=64, input_shape=[28, 28, 1]),
    DefaultConv2D(filters=64, input_shape=[28, 28, 1]),
    keras.layers.MaxPool2D(),
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    keras.layers.MaxPool2D(),
    keras.layers.Flatten(),
    keras.layers.Dense(units=120, activation='relu', kernel_initializer='he_normal'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=64, activation='relu', kernel_initializer='he_normal'),
    keras.layers.Dropout(0.7),
    keras.layers.Dense(10, activation='softmax')
])

In [17]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [18]:
history = model.fit(x_train, y_train, epochs=5, validation_data=(x_valid, y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Things to be done to the model
The following model uses 2 convolutional layers, followed by 1 pooling layer, then dropout 25%, then a dense layer, another dropout layer but with 50% dropout, and finally the output layer. It reaches about 99.2% accuracy on the test set. This places this model roughly in the top 20% in the MNIST Kaggle competition (if we ignore the models with an accuracy greater than 99.79% which were most likely trained on the test set, as explained by Chris Deotte in this post). Can you do better? To reach 99.5 to 99.7% accuracy on the test set, you need to add image augmentation, batch norm, use a learning schedule such as 1-cycle, and possibly create an ensemble.

In [20]:
model_second_gen = keras.Sequential([
    DefaultConv2D(filters=32),
    DefaultConv2D(filters=64),
    keras.layers.MaxPool2D(),
    keras.layers.Flatten(),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(128, activation='relu', kernel_initializer='he_normal'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(10, activation='relu')
])

In [21]:
model_second_gen.compile(loss='sparse_categorical_crossentropy', optimizer='nadam', metrics='accuracy')

In [23]:
history = model_second_gen.fit(x_train, y_train, epochs = 10, validation_data=(x_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
model_second_gen.evaluate(x_test, y_test)



[2.30259108543396, 0.09799999743700027]

In [26]:
third_model = keras.Sequential([
    DefaultConv2D(filters=32, kernel_size=7),
    DefaultConv2D(filters=32, kernel_size=7),
    keras.layers.MaxPool2D(),
    DefaultConv2D(filters=64),
    DefaultConv2D(filters=64),
    keras.layers.MaxPool2D(),
    keras.layers.Flatten(),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(128, kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(0.5),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation='softmax')
    
])

In [27]:
third_model.compile(loss='sparse_categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])

Using the resnet model 

In [53]:
x_train = x_train.repeat(3, axis=-1)

In [55]:
x_valid = x_valid.repeat(3, axis=-1)

In [56]:
x_test = x_test.repeat(3, axis=-1)

In [57]:
x_train = keras.applications.resnet50.preprocess_input(x_train)
x_valid = keras.applications.resnet50.preprocess_input(x_valid)

In [59]:
x_test = keras.applications.resnet50.preprocess_input(x_test)

In [60]:
base_model = keras.applications.resnet50.ResNet50(weights='imagenet', include_top=False)
avg = keras.layers.GlobalAvgPool2D()(base_model.output)
output = keras.layers.Dense(10, activation='softmax')(avg)
fourth_model = keras.Model(inputs=base_model.input, outputs=output)

In [61]:
for layer in base_model.layers:
    layer.trainable = False

In [62]:
optimizer=keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
fourth_model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [63]:
history = fourth_model.fit(x_train, y_train, epochs=3, validation_data=(x_valid, y_valid))

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [64]:
history = fourth_model.fit(x_train, y_train, epochs=5, validation_data=(x_valid, y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [65]:
history = fourth_model.fit(x_train, y_train, epochs=20, validation_data=(x_valid, y_valid))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20

KeyboardInterrupt: 