# Convolutional Neural Network (CNN)

In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

np.random.seed(42)

#### Helper functions

In [11]:
# function for loading data from disk
def load_mnist_digits():

    train_df = pd.read_csv('datasets/mnist-digits.csv')

    y = np.array(pd.get_dummies(train_df['label']))

    X = train_df.drop(['label'], axis=1)
    X = np.array(X)/255.0
    X = X - 0.5

    X = X.reshape(X.shape[0],28,28)
    y = np.expand_dims(y, axis=1)
    del train_df

    return X, y

In [12]:
# activation function
def softmax(z):
    exp = np.exp(z)
    return exp / np.sum(exp, axis=1)

In [13]:
# loss function
def cross_entropy_loss(y, pred, epsilon=1e-15, num_classes=10):
    pred = np.clip(pred, epsilon, 1-epsilon)
    loss = -np.sum(y * np.log(pred))/num_classes
    return loss

In [14]:
# function for calculating model accuracy on given features
def calculate_accuracy_score(features, labels, layers):
    truths, preds = [], []
    for i in range(0, features.shape[0]):
        truths.append(np.argmax(labels[i]))
        preds.append(np.argmax(predict(layers, features[i])))
    
    return accuracy_score(truths, preds)

### Dataset: MNIST Digits
The MNIST dataset is a collection of 28x28 pixel grayscale images of handwritten digits (0-9), widely used for training and evaluating machine learning models. It consists of 60,000 training samples and 10,000 test samples, making it a popular benchmark for digit recognition tasks.

For simplicity, Here we are using only 12000 samples for faster training process.
<br>This dataset is collected from [Kaggle - Digit Recognizer Dataset](https://www.kaggle.com/competitions/digit-recognizer/data), full dataset credits goes to them.

In [15]:
X, y = load_mnist_digits()

X_train, X_test, y_train, y_test = train_test_split(X[:12000], y[:12000], test_size=2000, random_state=42)
X_train[0].shape, y_train[0].shape

((28, 28), (1, 10))

### Model: Convolutional Neural Networks (CNNs)
Introduced by Yann LeCun in the 1990s, CNNs are designed for processing grid-like data, such as images. They use convolutional layers to automatically and adaptively learn spatial hierarchies of features from input images.

The architecture of a Convolutional Neural Network (CNN) includes:

1. Convolutional Layers: These layers apply filters to extract spatial features from input data, making them well-suited for image and spatial data analysis.

2. Pooling Layers: These layers downsample the features to reduce the computational load and improve translation invariance.

3. Fully Connected Layers: These layers process the extracted features to make predictions or classifications.

4. Training: CNNs are trained using backpropagation and optimization techniques to learn patterns in data.

CNNs are commonly used in computer vision tasks, image recognition, and other spatial data analysis applications due to their ability to capture local patterns efficiently.

In [16]:
class DenseLayer:
    
    last_input = np.array([])
    pool_out_shape = []
    
    def __init__(self,nodes_in=0, nodes_out=10):
        self.nodes_in, self.nodes_out = nodes_in, nodes_out
        self.weights = np.random.randn(self.nodes_out, self.nodes_in)/np.sqrt(self.nodes_out)
        self.biases = np.random.randn(1, self.nodes_out)
    
    def forward(self, x):
        
        if len(self.pool_out_shape) <=0: self.pool_out_shape = x.shape
        x = x.flatten()
        x = x.reshape(x.shape + (1,)).T
        self.last_input = x
        self.a = softmax(np.dot(x, self.weights.T) + self.biases)
        return self.a

    def backward(self, y, lr):
        error = y - self.a
        pool_error = error.dot(self.weights).reshape(self.pool_out_shape)
        self.weights += lr * error.T.dot(self.last_input)
        self.biases  += lr * error
        return pool_error

class MaxPoolLayer:

    image_shape = [0, 0]
    num_filters = 0
    
    def forward(self, input_image):
        
        self.last_input = input_image
        self.image_shape[0], self.image_shape[1], self.num_filters = input_image.shape
        output = np.zeros(((self.image_shape[0]//2), (self.image_shape[1]//2), self.num_filters))
        for i in range((self.image_shape[0] // 2)):
            for j in range((self.image_shape[1] // 2)):
                selected_region = input_image[(i*2):(i*2+2),(j*2):(j*2+2)]
                output[i, j] = np.amax(selected_region, axis=(0, 1))
        
        return output

    def backprop(self, error):
        
        conv_error = np.zeros(self.last_input.shape)
        for i in range(self.last_input.shape[0]//2):
            for j in range(self.last_input.shape[1]//2):   
                selected_region = self.last_input[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                h, w, f = selected_region.shape
                amax = np.amax(selected_region, axis=(0, 1))
        
        for i2 in range(h):
            for j2 in range(w):
                for f2 in range(f):
                    # If this pixel was the max value, copy the gradient to it.
                    if selected_region[i2, j2, f2] == amax[f2]:
                        conv_error[i * 2 + i2, j * 2 + j2, f2] = error[i, j, f2]
        
        return conv_error

class ConvLayer:
    
    def __init__(self, num_filters, filter_shape = (2,2)):
        self.num_filters = num_filters
        self.filter_shape = filter_shape
        self.filters = np.random.randn(num_filters, filter_shape[0], filter_shape[1]) / (filter_shape[0] * filter_shape[1])
    
    def forward(self, input_image):
        self.last_input = input_image
        output = np.zeros((input_image.shape[0] - 2, input_image.shape[1] - 2, self.num_filters))
        for i in range(input_image.shape[0] - 2):
            for j in range(input_image.shape[1] - 2):
                selected_region = input_image[i:(i+self.filter_shape[0]), j:(j+self.filter_shape[1])]
                output[i, j] = np.sum(selected_region * self.filters, axis=(1, 2))
        
        return output
    
    def backprop(self, conv_error, lr):
        new_filters_weights = np.zeros(self.filters.shape)
        for i in range(self.last_input.shape[0] - 2):
            for j in range(self.last_input.shape[1] - 2):
                selected_region = self.last_input[i:(i+self.filter_shape[0]), j:(j+self.filter_shape[1])]
                for k in range(self.num_filters):
                    new_filters_weights[k] += conv_error[i, j, k] * selected_region
        
        self.filters += lr * new_filters_weights

### Training

In [17]:
def train(layers, X_train, y_train, epochs, learning_rate):
    
    conv_layer, pool_layer, dense_layer = layers
    batch_size = X_train.shape[0]
    epoch_losses = []
    for e in range(epochs):
        
        batch_loss = []
        for i in range(0, batch_size):
            
            # feedforward
            conv_layer_output = conv_layer.forward(X_train[i])
            pool_layer_output = pool_layer.forward(conv_layer_output)
            dense_layer_output = dense_layer.forward(pool_layer_output)
            
            loss = cross_entropy_loss(y_train[i], dense_layer_output)
            
            # backpropagation
            pool_error = dense_layer.backward(y_train[i], lr=learning_rate)
            conv_error = pool_layer.backprop(pool_error)
            conv_layer.backprop(conv_error, lr=learning_rate)
            batch_loss.append(loss)

        epoch_losses.append(np.sum(batch_loss)/batch_size)
        print("epoch ",(e+1),"\t...\tloss:",np.sum(epoch_losses)/(e+1))

def predict(layers, x):
    pred = x
    for layer in layers:
        pred = layer.forward(pred)
    return pred

In [18]:
conv_layer = ConvLayer(20, (3,3))
pool_layer = MaxPoolLayer()
dense_layer = DenseLayer(3380, 10)

layers = [conv_layer, pool_layer, dense_layer]

epochs = 3
learning_rate = 0.001

predict(layers, X_train[0])
train(layers, X_train, y_train, epochs, learning_rate)

epoch  1 	...	loss: 0.15261140851375674
epoch  2 	...	loss: 0.1123394032620471
epoch  3 	...	loss: 0.09371198967027322


In [19]:
print("Train accuracy: ", calculate_accuracy_score(X_train, y_train, layers), "%")
print("Test  accuracy: ", calculate_accuracy_score(X_test, y_test, layers), "%")

Train accuracy:  0.839 %
Test  accuracy:  0.821 %


#### Looks like our model learned something!