In [None]:
import tensorflow as tf
from matplotlib import pyplot as plt

plt.gray()

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(path="mnist.npz")

In [None]:
y_train_onehot = tf.cast(tf.one_hot(y_train, 10), tf.float32)
y_test_onehot = tf.cast(tf.one_hot(y_test, 10), tf.float32)

In [None]:
x_train = tf.expand_dims(tf.cast(x_train, tf.float32), -1) / 255.0
x_test = tf.expand_dims(tf.cast(x_test, tf.float32), -1) / 255.0

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train_onehot))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test_onehot))

In [None]:
train_dataset = train_dataset.shuffle(x_train.shape[0], reshuffle_each_iteration=True)
test_dataset = test_dataset.shuffle(x_test.shape[0], reshuffle_each_iteration=False)

In [None]:
train_dataset = train_dataset.batch(32)
test_dataset = test_dataset.batch(32)

In [None]:
for el in train_dataset.take(1):
  print(el[0].shape, el[1].shape)

  plt.imshow(el[0][0,:,:,0])
  plt.show()

In [None]:
def plot_history(h):
  plt.plot(h.history['accuracy'])
  plt.plot(h.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()

  plt.plot(h.history['loss'])
  plt.plot(h.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()

In [None]:
input_im = tf.keras.Input(shape=(28, 28, 1))
x = tf.keras.layers.Conv2D(32, 5, strides=2, padding='same', activation=None, kernel_initializer='he_uniform')(input_im)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Conv2D(64, 3, strides=2, padding='same', activation=None, kernel_initializer='he_uniform')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
#x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_uniform')(x)

cnn_1_model_adam = tf.keras.Model(input_im, x)

cnn_1_model_adam.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.001,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07
)

cnn_1_model_adam.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics='accuracy')

h_cnn_1_model_adam = cnn_1_model_adam.fit(train_dataset, validation_data = test_dataset, epochs = 6)

In [None]:
plot_history(h_cnn_1_model_adam)

In [None]:
input_im = tf.keras.Input(shape=(28, 28, 1))
x = tf.keras.layers.Conv2D(32, 5, strides=1, padding='same', activation=None, kernel_initializer='he_uniform')(input_im)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.AveragePooling2D()(x)

x = tf.keras.layers.Conv2D(64, 3, strides=1, padding='same', activation=None, kernel_initializer='he_uniform')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.AveragePooling2D()(x)

#x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_uniform')(x)

cnn_2_model_adam = tf.keras.Model(input_im, x)

cnn_2_model_adam.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.001,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07
)

cnn_2_model_adam.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics='accuracy')

h_cnn_2_model_adam = cnn_2_model_adam.fit(train_dataset, validation_data = test_dataset, epochs = 6)

In [None]:
plot_history(h_cnn_2_model_adam)

https://paperswithcode.com/sota/image-classification-on-mnist

In [None]:
input_im = tf.keras.Input(shape=(28, 28, 1))
x = tf.keras.layers.Conv2D(32, 5, strides=1, padding='same', activation=None, kernel_initializer='he_uniform')(input_im)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.AveragePooling2D()(x)

x = tf.keras.layers.Conv2D(64, 3, strides=1, padding='same', activation=None, kernel_initializer='he_uniform')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.AveragePooling2D()(x)

x = tf.keras.layers.GlobalAveragePooling2D()(x)
#x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_uniform')(x)

cnn_3_model_adam = tf.keras.Model(input_im, x)

cnn_3_model_adam.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.001,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07
)

cnn_3_model_adam.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics='accuracy')

h_cnn_3_model_adam = cnn_3_model_adam.fit(train_dataset, validation_data = test_dataset, epochs = 6)

In [None]:
plot_history(h_cnn_3_model_adam)