In [None]:
import numpy as np
import gzip
import numpy as np
import random
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

def load_mnist(path, kind, subset=None):
    labels_path = os.path.join(path, '%s-labels-idx1-ubyte.gz'%kind)
    images_path = os.path.join(path, '%s-images-idx3-ubyte.gz'%kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)
    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)
    
    if subset is not None:
        selected_images, selected_labels = [], []
        for label in range(10):
            indices = np.where(labels == label)[0]
            selected_indices = np.random.choice(indices, subset, replace=False)
            selected_images.append(images[selected_indices])
            selected_labels.append(labels[selected_indices])
        images = np.concatenate(selected_images, axis=0)
        labels = np.concatenate(selected_labels, axis=0)

        paired = list(zip(images, labels))
        random.shuffle(paired)
        images, labels = zip(*paired)
    
    return np.array(images), np.array(labels)

# Data Preprocessing.
X_train, y_train = load_mnist('./data/', kind='train')
X_test, y_test = load_mnist('./data/', kind='t10k')

X_train = X_train.reshape((X_train.shape[0], 28, 28, 1)).astype('float32') / 255
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1)).astype('float32') / 255
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

#### **Task1:**

At the beginning, we need to build a very simple CNN model with the structure of
1. A 2D convolutional layer with 16 filters with each size 3*3 (RELU activation function)
2. A 2D maxpooling layer with 2*2 pooling window
3. A flatten layer to convert 2D feature into 1D vector
4. A fully connected layer using Softmax activation

Remember that we are doing a image classification task, so we shall use categorical cross entropy function as the loss funtion. 

In [7]:
model = models.Sequential()
model.add(layers.Input((28, 28, 1)))
model.add(layers.Conv2D(16, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(10, activation = 'softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

model.fit(X_train, y_train, epochs=2, batch_size=32, validation_split=0.1)  # Train the model
test_loss, test_acc = model.evaluate(X_test, y_test)                        #  Test the model

Epoch 1/2
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7644 - loss: 0.6751 - val_accuracy: 0.8577 - val_loss: 0.4005
Epoch 2/2
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8697 - loss: 0.3749 - val_accuracy: 0.8762 - val_loss: 0.3440
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8784 - loss: 0.3532


#### **Task2:**

Then we will take a challenge to implement a more complex CNN model to have a better classification performance. Here is a structure for your reference, or you can also design your own CNN model. The only requirement is to have a better performance than the simple CNN in Task 1 (a larger accuracy score on test set).

The reference structure is devided into three parts:
1. Primary Feature Extraction Part
    1. A 2D convolutional layer with 32 filters with each size 3*3 (RELU activation function)
    2. A normalization layer
    3. A 2D maxpooling layer with 2*2 pooling window
    4. A dropout layer (randomly drops 25% of units) (designed for preventing overfitting)
2. Advanced Feature Extraction Part

    This part is mostly similar to the previous section. The only difference is to use more filters (like 64) in convolutional layer to gain high-level features
3. Classification Part
    1. A flatten layer to convert 2D feature into 1D vector
    2. A fully connected layer with 512 units using RELU to summerize high-dimensinal features
    3. Another connected layer for Softmax classification

Remember that we are doing a image classification task, so we shall use categorical cross entropy function as the loss funtion. 

In [8]:
model = models.Sequential()
model.add(layers.Input((28, 28, 1)))

# Extract Feature.
model.add(layers.Conv2D(32, (3, 3), activation = 'relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25)) 

# Advanced Feature Extract.
model.add(layers.Conv2D(64, (3, 3), activation='relu'))  # More filters for high-level features.
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))

# Classify.
model.add(layers.Flatten())
model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dense(10, activation = 'softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

model.fit(X_train, y_train, epochs=2, batch_size=32, validation_split=0.1)
test_loss, test_acc = model.evaluate(X_test, y_test)

Epoch 1/2
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.7948 - loss: 0.6297 - val_accuracy: 0.8527 - val_loss: 0.4147
Epoch 2/2
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.8806 - loss: 0.3201 - val_accuracy: 0.8910 - val_loss: 0.2978
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8869 - loss: 0.3113
