In [68]:
# %pip install numpy pandas pickle
# %pip install matplotlib
# %pip install opencv-python

In [69]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss, accuracy_score
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
import pickle
import cv2

np.random.seed(0)

In [70]:
class ConvolutionLayer:

    def __str__(self) -> str:
        return "ConvolutionLayer"

    def __init__(self, n_filter, kernel_size, stride, padding):
        self.kernel_size = kernel_size
        self.n_filter = n_filter
        self.stride = stride
        self.padding = padding

        self.weights = None
        self.biases = None

        self.cache = None
    
    def _get_windows(self, input, output_size, kernel_size, padding=0, stride=1, dilate=0):
        working_input = input
        working_pad = padding
        # dilate the input if necessary
        if dilate != 0:
            working_input = np.insert(working_input, range(1, input.shape[2]), 0, axis=2)
            working_input = np.insert(working_input, range(1, input.shape[3]), 0, axis=3)

        # pad the input if necessary
        if working_pad != 0:
            working_input = np.pad(working_input, pad_width=((0,), (0,), (working_pad,), (working_pad,)), mode='constant', constant_values=(0.,))

        in_b, in_c, out_h, out_w = output_size
        out_b, out_c, _, _ = input.shape
        batch_str, channel_str, kern_h_str, kern_w_str = working_input.strides

        return np.lib.stride_tricks.as_strided(
            working_input,
            (out_b, out_c, out_h, out_w, kernel_size, kernel_size),
            (batch_str, channel_str, stride * kern_h_str, stride * kern_w_str, kern_h_str, kern_w_str)
        )

    def og_forward(self, input):        
        batch_size, n_channel, height, width = input.shape
        output_shape = (batch_size, self.n_filter, int((height - self.kernel_size + 2*self.padding)/self.stride + 1), int((width - self.kernel_size + 2*self.padding)/self.stride + 1))
        output = np.zeros(output_shape)

        if self.weights is None:
            self.weights = np.random.randn(self.n_filter, n_channel, self.kernel_size, self.kernel_size) / np.sqrt(2 / (self.kernel_size * self.kernel_size * n_channel))
        if self.biases is None:
            self.biases = np.random.randn(self.n_filter)

        if self.padding > 0:
            input = np.pad(input, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)), 'constant')
        
        for b in range(batch_size):
            for c in range(self.n_filter):
                for h in range(height):
                    for w in range(width):
                        print("mul left shape")
                        print(input[b, :, h*self.stride :h*self.stride + self.kernel_size, w*self.stride : w*self.stride + self.kernel_size].shape)
                        print("b, :, h*self.stride :h*self.stride + self.kernel_size, w*self.stride : w*self.stride + self.kernel_size")
                        print(f"{b}, , {h*self.stride}-{h*self.stride + self.kernel_size}, {w*self.stride}-{w*self.stride + self.kernel_size}")

                        print("mul right shape")
                        print(self.weights[c, :, :, :].shape)
                        print(self.weights[c, :, :, :])
                        print()
                        
                        print("self.biases[c]")
                        print(self.biases[c])

                        # output[b, c, h, w] = np.sum(input[b, :, h*self.stride : h*self.stride+self.kernel_size, w*self.stride : w*self.stride+self.kernel_size] * self.weights[c, :, :, :]) + self.biases[c]

        return output


    def forward(self, input):
        n, c, h, w = input.shape

        if self.weights is None:
            print("self.weights is None")
            self.weights = np.random.randn(self.n_filter, c, self.kernel_size, self.kernel_size) / np.sqrt(2 / (self.kernel_size * self.kernel_size * c))
        if self.biases is None:
            print("self.biases is None")
            self.biases = np.random.randn(self.n_filter)

        out_h = (h - self.kernel_size + 2 * self.padding) // self.stride + 1
        out_w = (w - self.kernel_size + 2 * self.padding) // self.stride + 1

        windows = self._get_windows(input, (n, c, out_h, out_w), self.kernel_size, self.padding, self.stride)
        out = np.einsum('bihwkl,oikl->bohw', windows, self.weights)
        out += self.biases[None, :, None, None]

        self.cache = input, windows
        return out

    def backward(self, dout, learning_rate):
        x, windows = self.cache

        padding = self.kernel_size - 1 if self.padding == 0 else self.padding
        dout_windows = self._get_windows(dout, x.shape, self.kernel_size, padding=padding, stride=1, dilate=self.stride - 1)
        rot_kern = np.rot90(self.weights, 2, axes=(2, 3))

        db = np.sum(dout, axis=(0, 2, 3))
        dw = np.einsum('bihwkl,bohw->oikl', windows, dout)
        dx = np.einsum('bohwkl,oikl->bihw', dout_windows, rot_kern)

        self.weights -= learning_rate * dw
        self.biases -= learning_rate * db
        return dx


class ReLUActivationLayer:

    def __str__(self) -> str:
        return "ReLUActivationLayer"

    def forward(self, input):
        return np.maximum(input, 0)

    def backward(self, output, learning_rate):
        return np.where(output > 0, 1, 0)


class MaxPoolingLayer:

    def __str__(self) -> str:
        return "MaxPoolingLayer"

    def __init__(self, pool_size, stride):
        self.pool_size = pool_size
        self.stride = stride
        self.input = None
        self.cache = None


    def tt_forward(self, x, is_training=True):
        
        n_batch, ch_x, h_x, w_x = x.shape
        h_poolwindow, w_poolwindow = self.pool_size, self.pool_size

        out_h = int((h_x - h_poolwindow) / self.stride) + 1
        out_w = int((w_x - w_poolwindow) / self.stride) + 1
        
        windows = np.lib.stride_tricks.as_strided(x,
                     shape=(n_batch, ch_x, out_h, out_w, self.pool_size, self.pool_size),
                     strides=(x.strides[0], x.strides[1],
                              self.stride * x.strides[2],
                              self.stride * x.strides[3],
                              x.strides[2], x.strides[3])
                     )
        out = np.max(windows, axis=(4, 5))

        maxs = out.repeat(2, axis=2).repeat(2, axis=3)
        x_window = x[:, :, :out_h * self.stride, :out_w * self.stride]

        print("x_window")
        print(x_window.shape)

        print("maxs")
        print(maxs.shape)

        mask = np.equal(x_window, maxs).astype(int)

        if is_training:
            self.cache['X'] = x
            self.cache['mask'] = mask

        return out

    def tt_backward(self, dA_prev):
    
        x = self.cache['X']
        n_batch, ch_x, h_x, w_x = x.shape
        h_poolwindow, w_poolwindow = self.pool_size, self.pool_size
        dA = np.zeros(shape=x.shape)  # dC/dA --> gradient of the input
        
        mask = self.cache['mask']
        dA = dA_prev.repeat(h_poolwindow, axis=2).repeat(w_poolwindow, axis=3)
        dA = np.multiply(dA, mask)
        pad = np.zeros(x.shape)
        pad[:, :, :dA.shape[2], :dA.shape[3]] = dA

        return pad


    def forward(self, input):
        self.input = input
        batch_size, n_channel, height, width = input.shape

        output_h = int((height - self.pool_size)/self.stride + 1)
        output_w = int((width  - self.pool_size)/self.stride + 1)

        output_shape = (batch_size, n_channel, output_h, output_w)
        output = np.zeros(output_shape)

        for b in range(batch_size):
            for c in range(n_channel):
                for h in range(output_h):
                    for w in range(output_w):
                        output[b, c, h, w] = np.max(input[b, :, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size])

        return output

    def backward(self, output, learning_rate):
        batch_size, n_channel, height, width = output.shape
        input = np.zeros(self.input.shape)

        for b in range(batch_size):
            for c in range(n_channel):
                for h in range(height):
                    for w in range(width):
                        input[b, c, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size] = np.where(input[b, c, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size] == np.max(input[b, c, h*self.stride :h*self.stride + self.pool_size, w*self.stride : w*self.stride + self.pool_size]), output[b, c, h, w], 0)

        return input    


class FlatteningLayer:

    def __init__(self) -> None:
        self.input = None

    def __str__(self) -> str:
        return "FlatteningLayer"

    def forward(self, input):
        self.input = input
        return input.reshape(input.shape[0], -1)

    def backward(self, output, learning_rate):
        return output.reshape(self.input.shape)


class DenseLayer:

    def __init__(self, n_output):
        self.n_output = n_output
        self.weights = None
        self.biases = None
        self.input = None

    def __str__(self) -> str:
        return "DenseLayer"

    def forward(self, input):
        
        self.input = input
        batch_size, n_input = input.shape

        if self.weights is None:
            self.weights = np.random.randn(n_input, self.n_output) / np.sqrt(n_input)
        if self.biases is None:
            self.biases = np.random.randn(self.n_output)

        output = np.dot(input, self.weights) + self.biases
        return output


    def backward(self, output, learning_rate):
        
        batch_size, n_input = output.shape    
        grad_weights = np.dot(self.input.T, output)/n_input
        
        grad_biases = np.mean(output, axis=0)
        grad_input = np.dot(output, grad_weights.T)

        self.weights -= learning_rate * grad_weights
        self.biases -= learning_rate * grad_biases

        return grad_input


class SoftMaxLayer:

    def __str__(self) -> str:
        return "SoftMaxLayer"

    def forward(self, input):
        val = input - np.max(input, axis=1, keepdims=True)
        val = np.exp(val) / np.exp(val).sum(axis=1, keepdims=True)
        return val

    def backward(self, output, learning_rate):
        return output


In [71]:
DATASET_DIR = "../../../numta"
DATASET_NAME = "training-b"
IMAGE_SHAPE = (32, 32)

class CNN:

    def __init__(self, learning_rate=0.01):
        self.layers = []
        self.learning_rate = learning_rate
    
    def add(self, layer):
        self.layers.append(layer)
    
    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return np.array(input)
         
    def backward(self, output) -> None:
        for layer in reversed(self.layers):
            output = layer.backward(output, self.learning_rate)
    
    def fit(self, X_train, y_train, X_valid, y_valid, epochs=10, batch_size=32):

        X_valid = np.array([ 255 - load_image_as_grayscale(f"{DATASET_DIR}/{DATASET_NAME}/{img_name}") for img_name in X_valid ])
        y_valid = np.array([ np.eye(10)[digit] for digit in y_valid ], dtype=int)

        print("Training started...")
        # print("y_valid")
        # print(y_valid)

        for epoch in range(epochs):
            n_batch = int(np.ceil(len(X_train)/batch_size))

            for batch in range(n_batch):
                #   reading batch of image_name from CSV and loading them as grayscale
                batch_X = X_train[batch*batch_size : (batch+1)*batch_size]
                batch_X = np.array([ 255 - load_image_as_grayscale(f"{DATASET_DIR}/{DATASET_NAME}/{img_name}") for img_name in batch_X ])

                #   reading batch of labels from CSV and converting them to one-hot encoding
                y_true = y_train[batch*batch_size : (batch+1)*batch_size]
                y_true = np.array([ np.eye(10)[digit] for digit in y_true ], dtype=int)

                y_pred_batch = self.forward(batch_X)
                # y_hat = y_pred_batch - y_true
                # self.backward(y_hat)

                print("y_pred_batch")
                print(y_pred_batch)

                print(f"Epoch: {epoch+1}/{epochs} Batch: {batch+1}/{n_batch}", end="\n")

            # y_pred = self.predict(X_valid)
            # # y_pred = np.array(y_pred, dtype=int)

            # y_pred = np.argmax(y_pred, axis=1)
            # y_pred = np.array([ np.eye(10)[digit] for digit in y_pred ], dtype=int)

            # print("y_pred")
            # print(y_pred)

            # accuracy = accuracy_score(y_valid, y_pred)
            # error = mean_squared_error(y_valid, y_pred)
            # ce_loss = log_loss(y_valid, y_pred)

            # print(f"Epoch: {epoch+1}/{epochs} Batch: {batch+1}/{n_batch} Accuracy: {accuracy} Error: {error} CE Loss: {ce_loss}")


    def predict(self, X):
        return self.forward(X)


# def cross_entropy_loss(y_true, y_pred):
#     return np.sum(-1 * np.sum(y_true * np.log(y_pred), axis=0))


def load_dataset(shuffle=False):
    dataset = f"{DATASET_DIR}/{DATASET_NAME}.csv"
    df = pd.read_csv(dataset)
    X = df["filename"]
    y = df["digit"]
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=shuffle)
    return X_train, X_valid, y_train, y_valid


def load_image_as_grayscale(image_path):    
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, IMAGE_SHAPE)
    return np.array([ img ])


def test():

    cnn = CNN(learning_rate=0.1)
    cnn.add(ConvolutionLayer(n_filter=3, kernel_size=3, stride=1, padding=0))

    X_train, X_valid, y_train, y_valid = load_dataset(shuffle=False)
    print("Dataset Loaded")

    cnn.fit(
        X_train=X_train,
        y_train=y_train,
        X_valid=X_valid,
        y_valid=y_valid,
        batch_size=5
    )
    


def main():

    cnn = CNN(learning_rate=0.1)
    cnn.add(ConvolutionLayer(n_filter=3, kernel_size=3, stride=1, padding=0))
    cnn.add(ReLUActivationLayer())
    cnn.add(MaxPoolingLayer(pool_size=2, stride=1))

    cnn.add(ConvolutionLayer(n_filter=3, kernel_size=3, stride=2, padding=2))
    cnn.add(ReLUActivationLayer())
    cnn.add(MaxPoolingLayer(pool_size=2, stride=1))

    cnn.add(FlatteningLayer())
    cnn.add(DenseLayer(n_output=10))

    cnn.add(SoftMaxLayer())
    
    X_train, X_valid, y_train, y_valid = load_dataset(shuffle=False)
    print("Dataset Loaded")

    cnn.fit(
        X_train=X_train,
        y_train=y_train,
        X_valid=X_valid,
        y_valid=y_valid,

        batch_size=16
    )

    pickle.dump(cnn, open("cnn.pkl", "wb"))
    print("Pickle File Dumped")

    cnn_x = pickle.load(open("cnn.pkl", "rb"))
    print("Pickle File Loaded")

    if np.allclose(cnn.layers[0].biases, cnn_x.layers[0].biases):
        print("Biases are same")

if __name__ == "__main__":
    main()
    # test()

Dataset Loaded
Training started...
self.weights is None
self.biases is None
self.weights is None
self.biases is None
y_pred_batch
[[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]
Epoch: 1/10 Batch: 1/18
y_pred_batch
[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 1.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 1.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.0000