# Solving MNIST & CIFAR-10 Using Convolution Layers

### Goal:
Design a simple ANN for handwritten digit classification where the model will be trained, validated, and tested on: MNIST and CIFAR-10

### Import Data

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

### Keras Sequential API

In [12]:
# Hyperparameters
epochs = 15
batch_size = 128

# Build and Train the Model
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()
"""model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1, verbose=0)

# Model Evaluation
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])"""

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 13, 13, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_13 (Conv2D)          (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 5, 5, 64)         0         
 g2D)                                                            
                                                                 
 flatten_6 (Flatten)         (None, 1600)              0         
                                                                 
 dropout_6 (Dropout)         (None, 1600)             

'model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])\nmodel.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1, verbose=0)\n\n# Model Evaluation\nscore = model.evaluate(x_test, y_test, verbose=0)\nprint("Test loss:", score[0])\nprint("Test accuracy:", score[1])'

### Keras Functional API

In [11]:
# Hyperparameters
epochs = 15
batch_size = 128


inputs = keras.Input(shape=input_shape)
x = layers.Conv2D(32, kernel_size=(3, 3), activation="relu")(inputs)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Conv2D(64, kernel_size=(3, 3), activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
output = layers.Dense(num_classes, activation="softmax")(x)

# Build and Train the Model
model = keras.Model(inputs=inputs, outputs=output, name="mnist_model")

model.summary()
"""model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1, verbose=0)

# Model Evaluation
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])"""

Model: "mnist_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_10 (Conv2D)          (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 13, 13, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 5, 5, 64)         0         
 g2D)                                                            
                                                                 
 flatten_5 (Flatten)         (None, 1600)              

'model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])\nmodel.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1, verbose=0)\n\n# Model Evaluation\nscore = model.evaluate(x_test, y_test, verbose=0)\nprint("Test loss:", score[0])\nprint("Test accuracy:", score[1])'

# Convolution Layer (From Scratch)

In [None]:


class Convolution2D(layers.Layer):
    """2D convolution layer (e.g. spatial convolution over images).

    This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. If `use_bias` 
    is True, a bias vector is created and added to the outputs. Finally, if `activation` is not `None`, it is applied to the 
    outputs as well.
    """
    def __init__(self, 
                 num_filters, 
                 kernel_size,
                 strides=(1, 1),
                 padding="valid",
                 data_format=None,
                 dilation_rate=(1, 1),
                 groups=1,
                 activation=None,
                 use_bias=True,
                 kernel_initializer="glorot_uniform",
                 bias_initializer="zeros",
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None):
        """
        Parameters
        ----------
        name : str
            The name of the animal
        sound : str
            The sound the animal makes
        num_legs : int, optional
            The number of legs the animal (default is 4)
            
        num_filters : int 
            The dimensionality of the output space (i.e. the number of output filters in the convolution).
        kernel_size : int | tuple/list of 2 integers
            Specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for 
            all spatial dimensions.
        strides : int | or tuple/list of 2 integers
            Specifying the strides of the convolution along the height and width. Can be a single integer to specify the same 
            value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate 
            value != 1.
        padding : one of "valid" or "same" (case-insensitive). 
            "valid" means no padding. "same" results in padding with zeros evenly to the left/right or up/down of the input. 
            When padding="same" and strides=1, the output has the same size as the input.
        data_format : str
            one of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last 
            corresponds to inputs with shape (batch_size, height, width, channels) while channels_first corresponds to inputs 
            with shape (batch_size, channels, height, width). It defaults to the image_data_format value found in your Keras 
            config file at ~/.keras/keras.json. If you never set it, then it will be channels_last.
        dilation_rate : int | tuple/list of 2 ints
            Specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all 
            spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride 
            value != 1.
        groups : A positive int 
            Specifying the number of groups in which the input is split along the channel axis. Each group is convolved separately 
            with filters / groups filters. The output is the concatenation of all the groups results along the channel axis. Input 
            channels and filters must both be divisible by groups.
        activation : callable
            Activation function to use. If you don't specify anything, no activation is applied.
        use_bias : bool 
            Whether the layer uses a bias vector.
        kernel_initializer : tf.Variable
            Initializer for the kernel weights matrix. Defaults to 'uniform'.
        bias_initializer : tf.Variable
            Initializer for the bias vector. Defaults to 'zeros'.
        kernel_regularizer : callable
            Regularizer function applied to the kernel weights matrix.
        bias_regularizer : callable
            Regularizer function applied to the bias vector.
        activity_regularizer : callable
            Regularizer function applied to the output of the layer (its "activation").
        """
        super(Convolution2D, self).__init__()
        # TODO make kernel and bias initializable
        self.num_filters = num_filters
        kernel_size,
        strides=(1, 1),
        padding="valid",
        data_format=None,
        dilation_rate=(1, 1),
        groups=1,
        activation=None,
        use_bias=True,
        kernel_initializer="glorot_uniform",
        bias_initializer="zeros",
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None
        
        
        
        def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )
        