## Creating a 2-D convolutional layer

In [None]:
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import OneHotEncoder
import numpy as np

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:, np.newaxis])
y_test_one_hot = enc.transform(y_test[:, np.newaxis])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if __name__ == '__main__':
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  # Remove the CWD from sys.path while we load stuff.


In [None]:
from sklearn.model_selection import train_test_split
X_train_new, X_val, y_train_new, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2)

In [1]:
class SimpleInitializer:
    """
    Simple initialization with Gaussian distribution
    Parameters
    ----------
    sigma : float
      Standard deviation of Gaussian distribution
    """
    def __init__(self, sigma):
        self.sigma = sigma
    def W(self,  n,c, s):
        """
        Weight initialization
        Parameters
        ----------
        n_nodes1 : int
          Number of nodes in the previous layer
        n_nodes2 : int
          Number of nodes in the later layer

        Returns
        ----------
        W :
        """
        W = self.sigma *np.random.randn( n,c, s,s)
        return W
    def B(self, n):
        """
        Bias initialization
        Parameters
        ----------
        n_nodes2 : int
          Number of nodes in the later layer

        Returns
        ----------
        B :
        """
        B = self.sigma *np.random.randn(n)
        return B
    
class XavierInitializer:
    def __init__(self,sigma):
        self.sigma =sigma
        
    def W(self,  n_nodes1, n_nodes2):
      
        self.sigma = 1/np.sqrt(n_nodes1)
        W = self.sigma *np.random.randn(n_nodes1, n_nodes2)
        return W
    
    def B(self,n_nodes2):
        B = self.sigma *np.random.randn(n_nodes2)
        return B

class HeInitializer:
    def __init__(self,sigma):
        self.sigma =sigma
    def W(self,  n_nodes1, n_nodes2):
      
        self.sigma = np.sqrt(2/n_nodes1)
        W = self.sigma *np.random.randn( n_nodes1, n_nodes2)
        return W
    
    def B(self,n_nodes2):
        B = self.sigma *np.random.randn(n_nodes2)
        return B

In [2]:
class SGD:
    """
    Stochastic gradient descent
    Parameters
    ----------
    lr : Learning rate
    """
    def __init__(self, lr):
        self.lr = lr
    def update(self, layer):
        """
        Update weights and biases for a layer
        Parameters
        ----------
        layer : Instance of the layer before update
        """
        layer.W = layer.W - self.lr*layer.dW
        layer.B = layer.B - self.lr*layer.dB
        return layer

In [3]:
class Softmax:
    def forward(self, A): 
        Z = np.exp(A) / np.sum(np.exp(A), axis=1).reshape(-1, 1)
        return Z
        
    def backward(self, Z, y):
        dA = Z - y

        return dA
    def loss(self,Z,y):
        L = - np.sum(y * np.log(Z)) / len(y)
        return L
    
class ReLU:
    def forward(self, A): 
        self.A = A
        A[A <= 0] = 0
        return A
    
    def backward(self, dZ):
        dA = dZ*np.array(self.A > 0, np.int)
        return dA
    
class Sigmoid:

    def forward(self, A):
        self.A = A
        Z = 1 / (1 + np.exp(-self.A))
        return Z
    
    def backward(self, dZ):
        dA = dZ * ((1 / (1 + np.exp(-self.A))) - (1 / (1 + np.exp(-self.A)))**2)
        return dA
    
class Tanh:

    def forward(self, A):
        self.A = A
        Z = np.tanh(self.A)
        return Z
    
    def backward(self, dZ):
        dA = dZ * (1 - np.tanh(self.A)**2)
        return dA

In [4]:
class FC:
    """
    Number of nodes Fully connected layer from n_nodes1 to n_nodes2
    Parameters
    ----------
    n_nodes1 : int
      Number of nodes in the previous layer
    n_nodes2 : int
      Number of nodes in the later layer
    initializer: instance of initialization method
    optimizer: instance of optimization method
    """
    def __init__(self, n_nodes1, n_nodes2, initializer, optimizer):
        self.optimizer = optimizer
        # Initialize
        # Initialize self.W and self.B using the initializer method
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        self.W = initializer.W(n_nodes1,n_nodes2)
        self.B = initializer.B(n_nodes2)
        self.Hw = 0
        self.Hb = 0
    def forward(self, X):
        """
        forward
        Parameters
        ----------
        X : The following forms of ndarray, shape (batch_size, n_nodes1)
            入力
        Returns
        ----------
        A : The following forms of ndarray, shape (batch_size, n_nodes2)
            output
        """        
#         print(X.shape)
        A =  X @self.W + self.B
        self.Z = X
        return A
    def backward(self, dA):
        """
        Backward
        Parameters
        ----------
        dA : The following forms of ndarray, shape (batch_size, n_nodes2)
            Gradient flowing from behind
        Returns
        ----------
        dZ : The following forms of ndarray, shape (batch_size, n_nodes1)
            Gradient to flow forward
        """
        self.dB = np.sum(dA,axis = 0)
        self.dW = self.Z.T@dA
        self.dZ = dA @ self.W.T
        
        # update
        self = self.optimizer.update(self)
        return self.dZ

In [5]:

class Conv2d:
 
    def __init__(self, initializer, optimizer, filter_num, C, filter_size, stride=1, pad=0):
        
        self.optimizer = optimizer
        self.stride = stride
        self.pad = pad
        
        self.initializer = initializer
        self.W = self.initializer.W(n=filter_num, c=C, s=filter_size)
        self.B = self.initializer.B(filter_num)
        
        
        self.X = None 
        self.col = None 
        self.col_W = None 
        
        
    def forward(self, X):
 
        FN, C, FH, FW = self.W.shape 
        N, C, H, W = X.shape 
        
        
        out_h, out_w = self._out_shape(H, FH, W, FW)
        
        
        self.col = self._im2col(X, FH, FW, self.stride, self.pad)
        
        
        self.col_W = self.W.reshape(FN, -1).T

        
#         A = np.dot(self.col, self.col_W) + self.B
        A = self.col@ self.col_W + self.B
        A = A.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) 
        
        
        self.X = X
        
        return A


    def backward(self, dA):

        FN, C, FH, FW = self.W.shape
        
        
        dA = dA.transpose(0,2,3,1).reshape(-1, FN) 
        
        
        self.dB = np.sum(dA, axis=0)
        self.dW = np.dot(self.col.T, dA)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
        dcol = np.dot(dA, self.col_W.T)
        
        
        dX = self._col2im(dcol, self.X.shape, FH, FW, self.stride, self.pad)
        
        
        self = self.optimizer.update(self)        

        return dX
    
    
    def _im2col(self, X, FH, FW, stride=1, pad=0):

        N, C, H, W = X.shape
        
        
        out_h, out_w = self._out_shape(H, FH, W, FW)

        
        img = np.pad(X, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
        
        
        col = np.zeros((N, C, FH, FW, out_h, out_w))
        for y in range(FH):
            y_max = y + stride * out_h
            for x in range(FW):
                x_max = x + stride * out_w
                col[:, :, y, x, :, :] = img[:, :, y: y_max: stride, x: x_max: stride]
        
        col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1)

        return col
    
    
    def _col2im(self, dcol, X_shape, FH, FW, stride=1, pad=0):
 
        N, C, H, W = X_shape

       
        out_h, out_w = self._out_shape(H, FH, W, FW)
        
        
        dcol = dcol.reshape(N, out_h, out_w, C, FH, FW).transpose(0, 3, 4, 5, 1, 2)
        
       
        img = np.zeros((N, C, H + 2 * pad + stride - 1, W + 2 * pad + stride - 1))
        for y in range(FH):
            y_max = y + stride * out_h
            for x in range(FW):
                x_max = x + stride * out_w
                img[:, :, y: y_max: stride, x: x_max: stride] += dcol[:, :, y, x, :, :]

        return img[:, :, pad: H + pad, pad: W + pad]
    
    
    def _out_shape(self, H, FH, W, FW):

        out_h = 1 + int((H + 2 * self.pad - FH) / self.stride)
        out_w = 1 + int((W + 2 * self.pad - FW) / self.stride)
        
        return out_h, out_w

## Experiments with 2D convolutional layers on small arrays

In [None]:
import numpy as np
# CNN2 のフォワードを流す時の入力データ
# (1,1,4,4)
x = np.array([[[[ 1,  2,  3,  4],
                [ 5,  6,  7,  8],
                [ 9, 10, 11, 12],
                [13, 14, 15, 16]]]])

# (2,3,3)
w = np.array([[[ 0.,  0.,  0.],
               [ 0.,  1.,  0.],
               [ 0., -1.,  0.]],

              [[ 0.,  0.,  0.],
               [ 0., -1.,  1.],
               [ 0.,  0.,  0.]]])

In [None]:
w = np.expand_dims(w,axis = 1)

In [None]:
test = Conv2d(filter_num=2, filter_size=3,C=1, stride=1, pad=0, initializer=SimpleInitializer(0.2),optimizer=SGD(0.1))
# testing = test.forward(mini_X_train)

In [None]:
test.W = w
test.B = 0

In [None]:
result = test.forward(x)

In [None]:
result

array([[[[-4., -4.],
         [-4., -4.]],

        [[ 1.,  1.],
         [ 1.,  1.]]]])

In [None]:
delta = np.array([[[ -4,  -4],
                   [ 10,  11]],

                  [[  1,  -7],
                   [  1, -11]]])

In [None]:
delta = np.expand_dims(delta,axis = 1)

In [None]:
test.backward(delta)

array([[[[  0.,   0.,   0.,   0.],
         [  0.,   0.,  -5.,  11.],
         [  0.,  12.,  -5., -11.],
         [  0.,  -1.,  -1.,   0.]]]])

## Creation of maximum pooling layer

In [6]:
class MaxPool2D:

    def __init__(self, pool_h=3, pool_w=3, stride=1, pad=0):
       
        self.pool_h = pool_h 
        self.pool_w = pool_w 
        self.stride = stride 
        self.pad = pad 
        
       
        self.X = None 
        self.arg_max = None 

    def forward(self, X):
 
        N, C, H, W = X.shape
        
        
        out_h, out_w = self._out_shape(H, self.pool_h, W, self.pool_w)
        
        
        col = self._im2col(X, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h * self.pool_w)
        
        
        arg_max = np.argmax(col, axis=1)
        out = np.max(col, axis=1)
#         print(col)
#         print(np.max(col, axis=1))
#         print(np.argmax(col, axis=1).size)
        
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) #(N, out_h, out_w, C)→(N, C, out_h, out_w)

        
        self.X = X
        self.arg_max = arg_max

        return out

    def backward(self, dout):

        dout = dout.transpose(0, 2, 3, 1)
        
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dX = self._col2im(dcol, self.X.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dX

    def _im2col(self, input_data, FH, FW, stride=1, pad=0):

        N, C, H, W = input_data.shape
        
       
        out_h, out_w = self._out_shape(H, FH, W, FW)

        
        img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
        
       
        col = np.zeros((N, C, FH, FW, out_h, out_w))
        for y in range(FH):
            y_max = y + stride * out_h
            for x in range(FW):
                x_max = x + stride * out_w
                col[:, :, y, x, :, :] = img[:, :, y: y_max: stride, x: x_max: stride]

        
        col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1) 

        return col
    
    
    def _col2im(self, col, input_shape, FH, FW, stride=1, pad=0):

        N, C, H, W = input_shape

        
        out_h, out_w = self._out_shape(H, FH, W, FW)
        
        
        col = col.reshape(N, out_h, out_w, C, FH, FW).transpose(0, 3, 4, 5, 1, 2) 

       
        img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
        for y in range(FH):
            y_max = y + stride*out_h
            for x in range(FW):
                x_max = x + stride*out_w
                img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]

        return img[:, :, pad:H + pad, pad:W + pad]
    
    
    def _out_shape(self, H, FH, W, FW):

        out_h = 1 + int((H - FH) / self.stride)
        out_w = 1 + int((W- FW) / self.stride)
        
        return out_h, out_w

## Creating average pooling

In [7]:
class AveragePool2D:

    def __init__(self, pool_h=3, pool_w=3, stride=1, pad=0):
       
        self.pool_h = pool_h 
        self.pool_w = pool_w 
        self.stride = stride 
        self.pad = pad 
        
       
        self.X = None 
#         self.arg_max = None 

    def forward(self, X):
 
        N, C, H, W = X.shape
        
        
        out_h, out_w = self._out_shape(H, self.pool_h, W, self.pool_w)
        
        
        col = self._im2col(X, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h * self.pool_w)
        
        

        out = np.mean(col, axis=1)
        self.length = col.shape[0]

        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) #(N, out_h, out_w, C)→(N, C, out_h, out_w)

        
        self.X = X
        # self.arg_max = arg_max

        return out

    def backward(self, dout):

        dout = dout.transpose(0, 2, 3, 1)
        
        
        pool_size = self.pool_h * self.pool_w
        davg = np.zeros((dout.size, pool_size))
        flatten = dout.flatten()
        for i in range(self.length):
            davg[i,:] = flatten[i]/len(davg[i,:])

        davg = davg.reshape(dout.shape + (pool_size,)) 
        dcol = davg.reshape(davg.shape[0] * davg.shape[1] * davg.shape[2], -1)
        dX = self._col2im(dcol, self.X.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dX

    def _im2col(self, input_data, FH, FW, stride=1, pad=0):

        N, C, H, W = input_data.shape
        
       
        out_h, out_w = self._out_shape(H, FH, W, FW)

        
        img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
        
       
        col = np.zeros((N, C, FH, FW, out_h, out_w))
        for y in range(FH):
            y_max = y + stride * out_h
            for x in range(FW):
                x_max = x + stride * out_w
                col[:, :, y, x, :, :] = img[:, :, y: y_max: stride, x: x_max: stride]

        
        col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1) 

        return col
    
    
    def _col2im(self, col, input_shape, FH, FW, stride=1, pad=0):

        N, C, H, W = input_shape

        
        out_h, out_w = self._out_shape(H, FH, W, FW)
        
        
        col = col.reshape(N, out_h, out_w, C, FH, FW).transpose(0, 3, 4, 5, 1, 2) 

       
        img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
        for y in range(FH):
            y_max = y + stride*out_h
            for x in range(FW):
                x_max = x + stride*out_w
                img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]

        return img[:, :, pad:H + pad, pad:W + pad]
    
    
    def _out_shape(self, H, FH, W, FW):

        out_h = 1 + int((H - FH) / self.stride)
        out_w = 1 + int((W- FW) / self.stride)
        
        return out_h, out_w

## Smoothing

In [8]:
class Flatten():

    def __init__(self):
        self.X_shape = None
    
    def forward(self, X):
        
        X_1d = X.reshape(X.shape[0], -1)
        
      
        self.X_shape = X.shape
        
        return X_1d
    

    def backward(self, X):
        
        X = X.reshape(self.X_shape)
        
        return X

## Learning and estimation

In [9]:
class ScratchConv2Classifier():
    """
    Simple three-layer neural network classifier
    Parameters
    ----------
    Attributes
    ----------
    """
    def __init__(self,filter_num, C, filter_size,n_output,stride=1, pad=0,batch_size = 20 ,epochs=10,sigma=0.02,optimizer=SGD,activation_function = "sigmoid",lr = 0.01,bias = False,verbose = True):
        self.verbose = verbose
        self.filter_size= filter_size
        self.filter_num = filter_num
        self.C = C 
        self.batch_size = batch_size
        self.n_output = n_output 
        self.lr = lr
        self.sigma = sigma
        self.stride = stride
        self.pad = pad

        self.check_bias = bias
        self.activation_function = activation_function
        self.epochs = epochs
        self.optimizer = optimizer(self.lr)
    def fit(self, X, y, X_val=None, y_val=None):
        self.initializer = self.get_initializer()

        self.conv= Conv2d(filter_num =self.filter_num , filter_size = self.filter_size,C =X.shape[1], stride= self.stride, pad =self.pad, initializer=SimpleInitializer(sigma=0.02),optimizer=self.optimizer)
        self.pool = MaxPool2D(pool_h= 2,pool_w =2)
        self.flatten = Flatten()
        out_h, out_w = self._out_shape(X.shape[-2], self.filter_size, X.shape[-1], self.filter_size,self.pad,self.stride)
        out_h, out_w = self._out_shape(out_h, 2, out_w, 2, self.pool.pad, self.pool.stride)
        nodes = self.filter_num * out_h * out_w 
        self.FC = FC(nodes,self.n_output,self.initializer,self.optimizer)
        
#         self.conv1d = Conv1d(kernel_size=7, initializer=SimpleInitializer(self.sigma), optimizer=self.optimizer, input_channels=1, output_channels=1, padding=3, stride=2)
#         nodes_1 = output_size_calculation(X.shape[-1], self.conv1d.padding, self.conv1d.kernel_size, self.conv1d.stride)
#         self.FC2 = FC(nodes_1,self.n_nodes2,self.initializer,self.optimizer)
#         self.FC3 = FC(self.n_nodes2, self.n_output,self.initializer,self.optimizer)
        
        self.activation1 = self.get_activation()
#         self.activation2 = self.get_activation()
        self.activation2 = Softmax()
            
        get_mini_batch = GetMiniBatch(X,y,self.batch_size)
        self.loss_train = []
        self.loss_val = []
        for epoch in range(self.epochs):  
            for mini_X_train, mini_y_train in get_mini_batch:

                self.forward(mini_X_train)
#                
                self.backward(mini_X_train,mini_y_train)
#             break
            self.forward(X)
            self.loss_train.append(self.activation3.loss(self.result,y))
            if X_val is not None:
                self.forward(X_val)
                self.loss_val.append(self.activation3.loss(self.result,y_val))
            

    
        
        """
        Learn a neural network classifier.
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            Features of training data
        y : The following form of ndarray, shape (n_samples,)
            Correct answer value of training data
        X_val : The following forms of ndarray, shape (n_samples, n_features)
            Features of verification data
        y_val : The following form of ndarray, shape (n_samples,)
            Correct value of verification data
        """
        
        if self.verbose:
             #verbose is set to True, the learning process etc. is output.
            print(self.loss_train)
    def get_initializer(self):
        if self.activation_function == "sigmoid" or self.activation_function == "tanh":
            return XavierInitializer(self.sigma)
        elif self.activation_function == "relu":
            return HeInitializer(self.sigma)
    
    def get_activation(self):
        if self.activation_function == "sigmoid" :
            return Sigmoid()
        elif self.activation_function == "tanh":
            return Tanh()
        elif self.activation_function == "relu":
            return ReLU()

    def _out_shape(self, H, FH, W, FW,pad,stride):

        out_h = 1 + int((H + 2 * pad - FH) / stride)
        out_w = 1 + int((W + 2 * pad - FW) / stride)
        
        return out_h, out_w
    
        
       
    def forward(self,X):
        A1 = self.conv.forward(X)
        Z1 = self.activation1.forward(A1)
        A2 = self.pool.forward(Z1)
        Z2 = self.flatten.forward(A2)

        A3 = self.FC.forward(Z2)

        Z3 = self.activation2.forward(A3)
        self.result = Z3

        

    def backward(self,X,y):
#         print(self.result.shape)
#         print(y.shape)
        dA3 = self.activation2.backward(self.result,y)
        dZ2 = self.FC.backward(dA3)
        dA2 = self.flatten.backward(dZ2)
        dZ1 = self.pool.backward(dA2)
        dA1 = self.activation1.backward(dZ1)
        dZ0 = self.conv.backward(dA1)
       
        

    def derivative(self,A,d):
        if self.activation_function == "sigmoid":
            dA = d*(1 / (1 + np.exp(-A)))*(1-(1 / (1 + np.exp(-A))))
            return dA
        elif self.activation_function == " tanh":
            dA = d*(1- np.tanh(A)**2)
            return dA
        

        

        
    def predict(self, X):
        """
        Estimate using a neural network classifier.
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            sample
        Returns
        -------
            The following form of ndarray, shape (n_samples, 1)
            Estimated result
        """
        self.forward(X)
        return np.argmax(self.result,axis = 1)
        
      
    
class GetMiniBatch:
    """
Iterator to get a mini-batch
    Parameters
    ----------
    X : The following forms of ndarray, shape (n_samples, n_features)
      Training data
    y : The following form of ndarray, shape (n_samples, 1)
      Correct answer value
    batch_size : int
      Batch size
    seed : int
      NumPy random number seed
    """
    def __init__(self, X, y, batch_size = 20, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self._X = X[shuffle_index]
        self._y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    def __len__(self):
        return self._stop
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self._X[p0:p1], self._y[p0:p1]        
    def __iter__(self):
        self._counter = 0
        return self
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self._X[p0:p1], self._y[p0:p1]

In [10]:
from tensorflow.keras.datasets import mnist
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

(X_train2, y_train2), (X_test2, y_test2) = mnist.load_data()

X_train2 = X_train2.astype(np.float)
X_test2 = X_test2.astype(np.float)
X_train2 /= 255 
X_test2 /= 255

X_train2 = X_train2[:, np.newaxis, :, :] 
X_test2 = X_test2[:, np.newaxis, :, :]

X_train2, X_val2, y_train2, y_val2 = train_test_split(X_train2, y_train2, test_size=0.2)

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot2 = enc.fit_transform(y_train2[:, np.newaxis])
y_test_one_hot2 = enc.transform(y_val2[:, np.newaxis])

In [11]:
model = ScratchConv2Classifier(5,3, 7,10, stride=1, pad=3, epochs =5, verbose=False)
model.fit(X_train2, y_train_one_hot2, X_val2, y_test_one_hot2)

MemoryError: Unable to allocate 13.7 GiB for an array with shape (48000, 1, 7, 7, 28, 28) and data type float64

In [None]:
result = model.predict(X_test2[0:100])

In [None]:
acc = (result == y_test[0:100]).sum()*100/100

In [None]:
acc

## LeNet

In [None]:
class LeNet():
    """
    Simple three-layer neural network classifier
    Parameters
    ----------
    Attributes
    ----------
    """
    def __init__(self,filter_num=6, filter_size =5,n_output=10,stride=1, pad=2,batch_size = 20 ,epochs=10,sigma=0.02,optimizer=SGD,activation_function = "sigmoid",lr = 0.01,bias = False,verbose = False):
        self.verbose = verbose
        self.filter_size= filter_size
        self.filter_num = filter_num
        # self.C = C 
        self.batch_size = batch_size
        self.n_output = n_output 
        self.lr = lr
        self.sigma = sigma
        self.stride = stride
        self.pad = pad

        self.check_bias = bias
        self.activation_function = activation_function
        self.epochs = epochs
        self.optimizer = optimizer(self.lr)
    def fit(self, X, y, X_val=None, y_val=None):
        self.initializer = self.get_initializer()

        self.conv1= Conv2d(filter_num =self.filter_num , filter_size = self.filter_size,C =X.shape[1], stride= self.stride, pad =self.pad, initializer=SimpleInitializer(sigma=0.02),optimizer=self.optimizer)
        self.pool1 = AveragePool2D(pool_h= 2,pool_w =2,stride =2)
        self.conv2 = Conv2d(filter_num =16 , filter_size = self.filter_size,C =6, stride= 1, pad =0, initializer=SimpleInitializer(sigma=0.02),optimizer=self.optimizer)
        self.pool2 = AveragePool2D(pool_h= 2,pool_w =2,stride =2)
        self.flatten = Flatten()
        # out_h, out_w = self._out_shape(X.shape[-2], self.filter_size, X.shape[-1], self.filter_size,self.pad,self.stride)
        # out_h, out_w = self._out_shape(out_h, 2, out_w, 2, self.pool.pad, self.pool.stride)
        self.FC1 = FC(400,120,self.initializer,self.optimizer)
        self.FC2 = FC(120,84,self.initializer,self.optimizer)
        self.FC3 = FC(84,self.n_output,self.initializer,self.optimizer)
        
#         self.conv1d = Conv1d(kernel_size=7, initializer=SimpleInitializer(self.sigma), optimizer=self.optimizer, input_channels=1, output_channels=1, padding=3, stride=2)
#         nodes_1 = output_size_calculation(X.shape[-1], self.conv1d.padding, self.conv1d.kernel_size, self.conv1d.stride)
#         self.FC2 = FC(nodes_1,self.n_nodes2,self.initializer,self.optimizer)
#         self.FC3 = FC(self.n_nodes2, self.n_output,self.initializer,self.optimizer)
        
        self.activation1 = self.get_activation()
        self.activation2 = self.get_activation()
        self.activation3 = self.get_activation()
        self.activation4 = self.get_activation()
        self.activation5 = Softmax()
            
        get_mini_batch = GetMiniBatch(X,y,self.batch_size)
        self.loss_train = []
        self.loss_val = []
        for epoch in range(self.epochs):  
            for mini_X_train, mini_y_train in get_mini_batch:

                self.forward(mini_X_train)
#                
                self.backward(mini_X_train,mini_y_train)
#             break
            self.forward(X)
            self.loss_train.append(self.activation3.loss(self.result,y))
            if X_val is not None:
                self.forward(X_val)
                self.loss_val.append(self.activation3.loss(self.result,y_val))
            

    
        
        """
        Learn a neural network classifier.
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            Features of training data
        y : The following form of ndarray, shape (n_samples,)
            Correct answer value of training data
        X_val : The following forms of ndarray, shape (n_samples, n_features)
            Features of verification data
        y_val : The following form of ndarray, shape (n_samples,)
            Correct value of verification data
        """
        
        if self.verbose:
             #verbose is set to True, the learning process etc. is output.
            print(self.loss_train)
    def get_initializer(self):
        if self.activation_function == "sigmoid" or self.activation_function == "tanh":
            return XavierInitializer(self.sigma)
        elif self.activation_function == "relu":
            return HeInitializer(self.sigma)
    
    def get_activation(self):
        if self.activation_function == "sigmoid" :
            return Sigmoid()
        elif self.activation_function == "tanh":
            return Tanh()
        elif self.activation_function == "relu":
            return ReLU()

    def _out_shape(self, H, FH, W, FW,pad,stride):

        out_h = 1 + int((H + 2 * pad - FH) / stride)
        out_w = 1 + int((W + 2 * pad - FW) / stride)
        
        return out_h, out_w
    
        
       
    def forward(self,X):
        A1 = self.conv1.forward(X)
        Z1 = self.activation1.forward(A1)
        A2 = self.pool1.forward(Z1)
        Z2 = self.conv2.forward(A2)
        A3 = self.activation2.forward(Z2)
        Z3 = self.pool2.forward(A3)
        A4 = self.flatten.forward(Z3)
        Z4 = self.FC1.forward(A4)
        A5 = self.activation3.forward(Z4)
        Z5 = self.FC2.forward(A5)
        A6 = self.activation4.forward(Z5)
        Z6 = self.FC3.forward(A6)
        A7 = self.activation5.forward(Z6)

        self.result = A7

        

    def backward(self,X,y):

        dZ6 = self.activation5.backward(self.result,y)
        dA6 = self.FC3.backward(dZ6)
        dZ5 = self.activation4.backward(dA6)
        dA5 = self.FC2.backward(dZ5)
        dZ4 = self.activation3.backward(dA5)
        dA4 = self.FC1.backward(dZ4)
        dZ3 = self.flatten.backward(dA4)
        dA3 = self.pool2.backward(dZ3)
        dZ2 = self.activation2.backward(dA3)
        dA2 = self.conv2.backward(dZ2)
        dZ1 = self.pool1.backward(dA2)
        dA1 = self.activation1.backward(dZ1)
        dZ0 = self.conv1.backward(dA1)
    
       
        

    def derivative(self,A,d):
        if self.activation_function == "sigmoid":
            dA = d*(1 / (1 + np.exp(-A)))*(1-(1 / (1 + np.exp(-A))))
            return dA
        elif self.activation_function == " tanh":
            dA = d*(1- np.tanh(A)**2)
            return dA
        

        

        
    def predict(self, X):
        """
        Estimate using a neural network classifier.
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            sample
        Returns
        -------
            The following form of ndarray, shape (n_samples, 1)
            Estimated result
        """
        self.forward(X)
        return np.argmax(self.result,axis = 1)

In [None]:
model = LeNet(verbose=False)

In [None]:
model.fit(X_train2, y_train_one_hot2, X_val2, y_test_one_hot2)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


## Survey of famous image recognition models

- AlexNet: AlexNet is the name of a convolutional neural network (CNN) architecture, designed by Alex Krizhevsky in collaboration with Ilya Sutskever and Geoffrey Hinton, who was Krizhevsky's Ph.D. advisor.AlexNet competed in the ImageNet Large Scale Visual Recognition Challenge on September 30, 2012. The network achieved a top-5 error of 15.3%, more than 10.8 percentage points lower than that of the runner up. The original paper's primary result was that the depth of the model was essential for its high performance, which was computationally expensive, but made feasible due to the utilization of graphics processing units (GPUs) during training
- VGG16: VGG16 is a simple and widely used Convolutional Neural Network (CNN) Architecture used for ImageNet, a large visual database project used in visual object recognition software research. The VGG16 Architecture was developed and introduced by Karen Simonyan and Andrew Zisserman from the University of Oxford, in the year 2014, through their article “Very Deep Convolutional Networks for Large-Scale Image Recognition.” ‘VGG’ is the abbreviation for Visual Geometry Group, which is a group of researchers at the University of Oxford who developed this architecture, and ‘16’ implies that this architecture has 16 layers

## Calculation of output size and number of parameters

In [5]:
def calc_outshape_parameter(H, FH, W, FW, FC, P, S, IC):
    FN = int(FC/IC)
    out_h = int((H + 2 * P - FH) / S) + 1
    out_w = int((W + 2 * P - FW) / S) + 1
    parameter = FH * FW * IC * FN + FN
        
    return out_h, out_w, parameter

In [6]:
calc_outshape_parameter(144,3,144,3,6,0,1,3)

(142, 142, 56)

In [7]:
calc_outshape_parameter(60,3,60,3,48,0,1,24)

(58, 58, 434)

In [8]:
calc_outshape_parameter(20,3,20,3,20,0,2,10)

(9, 9, 182)

## Survey on filter size

### Why 3×3 filters are commonly used instead of larger ones such as 7×7

- Less filter less computation, big filter more computation.

- It learns large complex features easily, where as large filters learns simple features.

- Output Layers will be less when we use 3x3 filters as compared to 5x5 or bigger filters.

- Also since there will be more output layers when using 3x3 filters more memory will be required to store them as compared to 5x5 or bigger filters.

### The effect of a 1 x 1 filter with no height or width direction

- 1 x 1 filter is used when we want to keep the height and width of input through layers but change the number of channel