In [176]:
# %pip install numpy pandas pickle
# %pip install matplotlib

In [177]:
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
import pickle
import cv2

In [178]:
import numpy as np





class Conv2D:
    """
    An implementation of the convolutional layer. We convolve the input with out_channels different filters
    and each filter spans all channels in the input.
    """
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0):
        """
        :param in_channels: the number of channels of the input data
        :param out_channels: the number of channels of the output(aka the number of filters applied in the layer)
        :param kernel_size: the specified size of the kernel(both height and width)
        :param stride: the stride of convolution
        :param padding: the size of padding. Pad zeros to the input with padding size.
        """
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        self.cache = None

        self._init_weights()

    def _init_weights(self):
        self.weight = 1e-3 * np.random.randn(self.out_channels, self.in_channels,  self.kernel_size, self.kernel_size)
        self.bias = np.zeros(self.out_channels)

    

In [179]:
class ConvolutionLayer:

    def __str__(self) -> str:
        return "ConvolutionLayer"

    def __init__(self, n_filter, kernel_size, stride, padding):
        self.kernel_size = kernel_size
        self.n_filter = n_filter
        self.stride = stride
        self.padding = padding

        self.weights = None
        self.biases = None

        self.cache = None
    
    def _get_windows(self, input, output_size, kernel_size, padding=0, stride=1, dilate=0):
        working_input = input
        working_pad = padding
        # dilate the input if necessary
        if dilate != 0:
            working_input = np.insert(working_input, range(1, input.shape[2]), 0, axis=2)
            working_input = np.insert(working_input, range(1, input.shape[3]), 0, axis=3)

        # pad the input if necessary
        if working_pad != 0:
            working_input = np.pad(working_input, pad_width=((0,), (0,), (working_pad,), (working_pad,)), mode='constant', constant_values=(0.,))

        in_b, in_c, out_h, out_w = output_size
        out_b, out_c, _, _ = input.shape
        batch_str, channel_str, kern_h_str, kern_w_str = working_input.strides

        return np.lib.stride_tricks.as_strided(
            working_input,
            (out_b, out_c, out_h, out_w, kernel_size, kernel_size),
            (batch_str, channel_str, stride * kern_h_str, stride * kern_w_str, kern_h_str, kern_w_str)
        )

    def forward(self, input):
        """
        The forward pass of convolution
        :param x: input data of shape (N, C, H, W)
        :return: output data of shape (N, self.out_channels, H', W') where H' and W' are determined by the convolution
                 parameters.
        """
        n, c, h, w = input.shape

        if self.weights is None:
            self.weights = np.random.randn(self.n_filter, c, self.kernel_size, self.kernel_size) / np.sqrt(2 / (self.kernel_size * self.kernel_size * c))
        if self.biases is None:
            self.biases = np.random.randn(self.n_filter)


        out_h = (h - self.kernel_size + 2 * self.padding) // self.stride + 1
        out_w = (w - self.kernel_size + 2 * self.padding) // self.stride + 1

        windows = self._get_windows(input, (n, c, out_h, out_w), self.kernel_size, self.padding, self.stride)

        out = np.einsum('bihwkl,oikl->bohw', windows, self.weights)

        # add bias to kernels
        out += self.biases[None, :, None, None]

        self.cache = input, windows
        return out

    def backward(self, dout, learning_rate):
        """
        The backward pass of convolution
        :param dout: upstream gradients
        :return: dx, dw, and db relative to this module
        """
        x, windows = self.cache

        padding = self.kernel_size - 1 if self.padding == 0 else self.padding

        dout_windows = self._get_windows(dout, x.shape, self.kernel_size, padding=padding, stride=1, dilate=self.stride - 1)
        # rot_kern = np.rot90(self.weights, 2, axes=(2, 3))
        rot_kern = np.rot90(self.weights, 2, axes=(2, 3))

        db = np.sum(dout, axis=(0, 2, 3))
        dw = np.einsum('bihwkl,bohw->oikl', windows, dout)
        dx = np.einsum('bohwkl,oikl->bihw', dout_windows, rot_kern)

        # return db, dw, dx
        self.weights -= learning_rate * dw
        self.biases -= learning_rate * db

        return dx




    # def __forward(self, input):        
    #     batch_size, n_channel, height, width = input.shape
    #     output_shape = (batch_size, self.n_filter, int((height - self.kernel_size + 2*self.padding)/self.stride + 1), int((width - self.kernel_size + 2*self.padding)/self.stride + 1))
    #     output = np.zeros(output_shape)

    #     if self.weights is None:
    #         self.weights = np.random.randn(self.n_filter, n_channel, self.kernel_size, self.kernel_size) / np.sqrt(2 / (self.kernel_size * self.kernel_size * n_channel))
    #     if self.biases is None:
    #         self.biases = np.random.randn(self.n_filter)

    #     if self.padding > 0:
    #         input = np.pad(input, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)), 'constant')
        
    #     for b in range(batch_size):
    #         for c in range(self.n_filter):
    #             for h in range(height):
    #                 for w in range(width):
    #                     output[b, c, h, w] = np.sum(input[b, :, h*self.stride :h*self.stride + self.kernel_size, w*self.stride : w*self.stride + self.kernel_size] * self.filters[c, :, :, :]) + self.biases[c]

    #     return output


    # def __backward(self, output, learning_rate):
    #     # perform back propagation for convolution

    #     batch_size, n_channel, height, width = output.shape
    #     input_shape = (batch_size, n_channel, height, width)
    #     input = np.zeros(input_shape)

    #     if self.padding > 0:
    #         output = np.pad(output, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)), 'constant')

    #     for b in range(batch_size):
    #         for c in range(self.n_filter):
    #             for h in range(height):
    #                 for w in range(width):
    #                     print("left.shape")
    #                     print(input[b, :, h*self.stride :h*self.stride + self.kernel_size, w*self.stride : w*self.stride + self.kernel_size].shape)                        

    #                     print("self.filters.shape")
    #                     print(self.filters.shape)

    #                     print("right 2.shape")
    #                     print(self.filters[c, :, :, :].shape)

    #                     input[b, :, h*self.stride :h*self.stride + self.kernel_size, w*self.stride : w*self.stride + self.kernel_size] += output[b, c, h, w] * self.filters[:, c, :, :]
    #                     self.filters[:, c, :, :] += learning_rate * output[b, c, h, w] * input[b, :, h*self.stride :h*self.stride + self.kernel_size, w*self.stride : w*self.stride + self.kernel_size]
    #                     self.biases[c] += learning_rate * output[b, c, h, w]

    #     return input


In [180]:
class ReLUActivationLayer:

    def __str__(self) -> str:
        return "ReLUActivationLayer"

    def forward(self, input):
        return np.maximum(input, 0)

    def backward(self, output, learning_rate):
        return np.where(output > 0, 1, 0)


class MaxPoolingLayer:

    def __str__(self) -> str:
        return "MaxPoolingLayer"

    def __init__(self, pool_size, stride):
        self.pool_size = pool_size
        self.stride = stride
        self.input = None

    def forward(self, input):
        self.input = input
        batch_size, n_channel, height, width = input.shape

        output_h = int((height - self.pool_size)/self.stride + 1)
        output_w = int((width  - self.pool_size)/self.stride + 1)

        output_shape = (batch_size, n_channel, output_h, output_w)
        output = np.zeros(output_shape)

        for b in range(batch_size):
            for c in range(n_channel):
                for h in range(output_h):
                    for w in range(output_w):
                        output[b, c, h, w] = np.max(input[b, :, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size])

        return output

    def backward(self, output, learning_rate):
        batch_size, n_channel, height, width = output.shape
        input = np.zeros(self.input.shape)

        for b in range(batch_size):
            for c in range(n_channel):
                for h in range(height):
                    for w in range(width):
                        input[b, c, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size] = np.where(input[b, c, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size] == np.max(input[b, c, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size]), output[b, c, h, w], 0)

        return input    

class FlatteningLayer:

    def __init__(self) -> None:
        self.input = None

    def __str__(self) -> str:
        return "FlatteningLayer"

    def forward(self, input):
        self.input = input
        return input.reshape(input.shape[0], -1)

    def backward(self, output, learning_rate):
        return output.reshape(self.input.shape)


class DenseLayer:

    def __init__(self, n_output):
        self.n_output = n_output
        self.weights = None
        self.biases = None
        self.input = None

    def __str__(self) -> str:
        return "DenseLayer"

    def forward(self, input):
        
        self.input = input
        batch_size, n_input = input.shape

        if self.weights is None:
            self.weights = np.random.randn(n_input, self.n_output) / np.sqrt(n_input)
        if self.biases is None:
            self.biases = np.random.randn(self.n_output)

        output = np.dot(input, self.weights) + self.biases
        return output


    def backward(self, output, learning_rate):
            
            batch_size, n_input = output.shape    
            grad_weights = np.dot(self.input.T, output)/n_input
            
            grad_biases = np.mean(output, axis=0)
            grad_input = np.dot(output, grad_weights.T)

            self.weights -= learning_rate * grad_weights
            self.biases -= learning_rate * grad_biases

            return grad_input



class SoftMaxLayer:

    def __str__(self) -> str:
        return "SoftMaxLayer"

    def forward(self, input):
        val = input - np.max(input, axis=1, keepdims=True)
        val = np.exp(val) / np.exp(val).sum(axis=1, keepdims=True)
        return val

    def backward(self, output, learning_rate):
        return output


In [181]:
def loss(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def loss_prime(y_true, y_pred):
    return 2*(y_pred - y_true)

BASEDIR = "../../../numta"

def load_dataset():
    """Load the dataset from the base directory"""
    
    dataset = f"{BASEDIR}/training-a.csv"
    df = pd.read_csv(dataset)
    df = df[["filename", "digit"]]
    
    return df


def load_image(image_name):
    
    img = cv2.imread(f"{BASEDIR}/training-a/{image_name}")
    img = cv2.resize(img, (64, 64))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.invert(img)

    plt.imshow(img)
    plt.show()

    print(img)
    return img

# data = load_dataset()
# data.head

# X = data["filename"].values
# y = data["digit"].values

# load_image(X[0])

## Debug Cell

In [182]:
# batch_size, n_channel, height, width
input_shape = (10, 4, 32, 32)
input = np.random.randn(*input_shape)

print("input shape: ", input.shape)

# n_filter, filter_size, stride, padding
con = ConvolutionLayer(n_filter=5, kernel_size=3, stride=1, padding=1)
relu = ReLUActivationLayer()
max = MaxPoolingLayer(pool_size=2, stride=1)
flat = FlatteningLayer()
dens = DenseLayer(n_output=10)
smax = SoftMaxLayer()

output = con.forward(input)
print("Convolution done")
print("output shape: ", output.shape)

output = relu.forward(output)
print("ReLU done")
print("output shape: ", output.shape)

output = max.forward(output)
print("MaxPooling done")
print("output shape: ", output.shape)

output = flat.forward(output)
print("Flattening done")
print("output shape: ", output.shape)

output = dens.forward(output)
print("Dense done")
print("output shape: ", output.shape)

output = smax.forward(output)
print("Softmax done")
print("output shape: ", output.shape)

print("*" * 30)
print("*" * 30)

learning_rate = 0.1
output = smax.backward(output, learning_rate)
print("Softmax backward done")
print("output shape: ", output.shape)

output = dens.backward(output, learning_rate)
print("Dense backward done")
print("output shape: ", output.shape)

output = flat.backward(output, learning_rate)
print("Flattening backward done")
print("output shape: ", output.shape)

output = max.backward(output, learning_rate)
print("MaxPooling backward done")
print("output shape: ", output.shape)

output = relu.backward(output, learning_rate)
print("ReLU backward done")
print("output shape: ", output.shape)

output = con.backward(dout=output, learning_rate=learning_rate)
print("Convolution backward done")
print("output shape: ", output.shape)


input shape:  (10, 4, 32, 32)
Convolution done
output shape:  (10, 5, 32, 32)
ReLU done
output shape:  (10, 5, 32, 32)
MaxPooling done
output shape:  (10, 5, 31, 31)
Flattening done
output shape:  (10, 4805)
Dense done
output shape:  (10, 10)
Softmax done
output shape:  (10, 10)
******************************
******************************
Softmax backward done
output shape:  (10, 10)
Dense backward done
output shape:  (10, 4805)
Flattening backward done
output shape:  (10, 5, 31, 31)
MaxPooling backward done
output shape:  (10, 5, 32, 32)
ReLU backward done
output shape:  (10, 5, 32, 32)
Convolution backward done
output shape:  (10, 4, 32, 32)
