# Part A

In [3]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = keras.Sequential(
  [
    keras.Input(shape=input_shape),
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation="softmax"),
  ]
)
model.summary()

# TRAIN THE MODEL
batch_size = 128
epochs = 2
np.random.seed(0)
model.compile(loss="categorical_crossentropy", optimizer="adam", 
metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, 
validation_split=0.1)

# EVALUATE THE MODEL
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 13, 13, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 1600)              0         
                                               

# Part B

In [5]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = keras.Sequential(
  [
    keras.Input(shape=input_shape),
    # Create a convolution neural network (with 30 channels) with relu activation function and with kernel of size (5,5)
    layers.Conv2D(30, kernel_size=(5, 5), activation="relu"),
    # MaxPooling for 2D with a pool size of (2,2)
    layers.MaxPooling2D(pool_size=(2, 2)),
    # Create a convolution neural network (with 15 channels) with relu activation function and with kernel of size (3,3)
    layers.Conv2D(15, kernel_size=(3, 3), activation="relu"),
    # MaxPooling for 2D with a pool size of (2,2)
    layers.MaxPooling2D(pool_size=(2, 2)),
    # Dropout layer with a probability of 20%
    layers.Dropout(0.2),
    # Flatten layer
    layers.Flatten(),
    # Densely connected neural network with 128 output neurons and with relu activation function
    layers.Dense(128, activation="relu"),
    # Densely connected neural network with 50 output neurons and with relu activation function
    layers.Dense(50,activation='relu'),
    # Densely connected neural network with 10 output neurons and with softmax activation function
    layers.Dense(10, activation='softmax')
  ]
)
model.summary()

# TRAIN THE MODEL
batch_size = 128
epochs = 2
np.random.seed(0)
model.compile(loss="categorical_crossentropy", optimizer="adam", 
metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, 
validation_split=0.1)

# EVALUATE THE MODEL
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 24, 24, 30)        780       
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 12, 12, 30)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 10, 10, 15)        4065      
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 5, 5, 15)         0         
 2D)                                                             
                                                                 
 dropout_4 (Dropout)         (None, 5, 5, 15)          0         
                                               

Model A:
  Test accuracy: 0.9811000227928162

Model B:
  Test accuracy: 0.9837999939918518

Model B gives better accuracy. This is a combination of Model B having: 

*   an overall higher quantity of layer
*   a smaller rate in the dropout layer (0.2 vs 0.5)
*   multiple dense layers directly before the output
*   more trainable parameters
