## Creating a one-dimensional convolutional layer class that limits the number of channels to one

In [45]:
class SimpleConv1d():

    def forward(self, x, w, b):
        lst = []
        for i in range(len(w) - 1):
            lst.append((x[i:i+len(w)] * w).sum() + b[0] )
        return np.array(lst)
    
    def backward(self,x,w,da):
        dw = np.array([da @ x[i:i+len(da)] for i in range(len(w))])
        db = np.sum(da)
        dx = []
        new_w = np.insert(w[::-1], 0, 0)
        new_w = np.append(new_w, 0)
        for i in range(len(new_w)-1):
            dx.append(new_w[i:i+len(da)] @ da)
        dx = np.array(dx[::-1])
        return db, dw, dx
        
            
        
    


## Output size calculation after one-dimensional convolution

In [7]:
def output_size_calculation(n_in, P, F, S):
    n_out = int((n_in + 2*P - F) / S + 1)
    return n_out

## Experiment of one-dimensional convolutional layer with small array

In [5]:
import numpy as np
x = np.array([1,2,3,4])
w = np.array([3, 5, 7])
b = np.array([1])
delta_a = np.array([10, 20])


In [50]:

conv = SimpleConv1d()
# db, dw, dx = SC1D.backward(x, w, da)
print(conv.forward(x, w, b))
db, dw, dx = conv.backward(x, w, delta_a)
print(db)
print(dw)
print(dx)

[35 50]
30
[ 50  80 110]
[ 30 110 170 140]


## Creating a one-dimensional convolutional layer class that does not limit the number of channels

In [8]:
class SimpleInitializer:
    """
    Simple initialization with Gaussian distribution
    Parameters
    ----------
    sigma : float
      Standard deviation of Gaussian distribution
    """
    def __init__(self, sigma):
        self.sigma = sigma
    def W(self, n_nodes1, n_nodes2,kernel_size):
        """
        Weight initialization
        Parameters
        ----------
        n_nodes1 : int
          Number of nodes in the previous layer
        n_nodes2 : int
          Number of nodes in the later layer

        Returns
        ----------
        W :
        """
        W = self.sigma *np.random.randn(n_nodes1,n_nodes2,kernel_size)
        return W
    def B(self, n_nodes2):
        """
        Bias initialization
        Parameters
        ----------
        n_nodes2 : int
          Number of nodes in the later layer

        Returns
        ----------
        B :
        """
        B = self.sigma *np.random.randn(n_nodes2)
        return B

class Conv1d():
    def __init__(self, input_channels,output_channels, features, initializer,kernel_size):
#         self.optimizer = optimizer
        self.kernel_size = kernel_size
        # Initialize
        # Initialize self.W and self.B using the initializer method
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.features = features
        self.W = initializer.W(output_channels,self.input_channels,kernel_size)
        self.B = initializer.B(output_channels)
    
    def forward(self,X):
        lst = []
#         X = np.pad(X, ((0,0), ((self.kernel_size-1), 0)))

        new_X = np.tile(X,(self.output_channels,1))
        self.new_X = new_X.reshape(-1,X.shape[0],X.shape[1])

        for i in range(self.new_X.shape[-1] - self.kernel_size + 1):
            lst.append((self.new_X[:,:,i:i+self.W.shape[-1]] * self.W).sum(axis = (1,2)) + self.B )
        return np.array(lst).T
    
    def backward(self,dA):
        new_dA = np.tile(np.expand_dims(dA,axis = 1),(1,self.input_channels,1))
        dW = np.sum(np.array([new_dA * self.new_X[:,:,i:i+dA.shape[-1]] for i in range(self.W.shape[-1])]),axis = -1).T.reshape(self.W.shape)
        dB = np.sum(dA,axis = 1)
        new_w = np.concatenate((self.W,np.zeros((self.W.shape[0],self.W.shape[1],self.kernel_size-2))),axis = -1)
        new_w = np.concatenate((np.zeros((self.W.shape[0],self.W.shape[1],self.kernel_size-2)),new_w),axis = -1)
        dx = []
        for i in range(new_w.shape[-1]- dA.shape[-1] + 1):
            dx.append(np.sum(new_w[:,:,i:i+dA.shape[-1]] * new_dA,axis = (0,2)))
        return np.array(dx[::-1]).T
            
        
        
            
        
        
        
        

In [6]:
x = np.array([[1, 2, 3, 4], [2, 3, 4, 5]]) #shape (2, 4), (number of input channels, number of features).
w = np.ones((3, 2, 3)) # Set to 1 for simplification of the example. (Number of output channels, number of input channels, filter size).
b = np.array([1, 2, 3]) #

In [166]:
model = Conv1d(2,3,4,SimpleInitializer(0.2),3)

In [167]:
model.W = w
model.B = b

In [168]:
testing = model.forward(x)
testing

array([[16., 22.],
       [17., 23.],
       [18., 24.]])

In [169]:
model.backward(testing)

array([[ 69., 120., 120.,  51.],
       [ 69., 120., 120.,  51.]])

## Implementing padding

In [434]:
class Conv1d():
    def __init__(self, input_channels,output_channels, features, initializer,kernel_size,padding):
        self.kernel_size = kernel_size
        self.padding = padding
        # Initialize
        # Initialize self.W and self.B using the initializer method
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.features = features
        self.W = initializer.W(output_channels,self.input_channels,kernel_size)
        self.B = initializer.B(output_channels)
    
    def forward(self,X):
        lst = []
        X = np.pad(X, ((0,0), (self.padding, self.padding)))
        self.X =X
        new_X = np.tile(X,(self.output_channels,1))
        self.new_X = new_X.reshape(-1,X.shape[0],X.shape[1])
        for i in range(self.new_X .shape[-1] - self.kernel_size + 1):
            lst.append((self.new_X[:,:,i:i+self.W.shape[-1]] * self.W).sum(axis = (1,2)) + self.B )
        return np.array(lst).T
    
    def backward(self,dA):
        new_dA = np.tile(np.expand_dims(dA,axis = 1),(1,self.input_channels,1))
        dW = np.sum(np.array([new_dA * self.new_X[:,:,i:i+dA.shape[-1]] for i in range(self.W.shape[-1])]),axis = -1).T.reshape(self.W.shape)
    
        dB = np.sum(dA,axis = 1)
        w_shape = int((dA.shape[-1] - 1 + self.X.shape[-1] - self.kernel_size)/2)
        new_w = np.concatenate((self.W,np.zeros((self.W.shape[0],self.W.shape[1],w_shape))),axis = -1)
        new_w = np.concatenate((np.zeros((self.W.shape[0],self.W.shape[1],w_shape)),new_w),axis = -1)
        dx = []

        for i in range(new_w.shape[-1]- dA.shape[-1] + 1):
            dx.append(np.sum(new_w[:,:,i:i+new_dA.shape[-1]] * new_dA,axis = (0,2)))

        return np.array(dx[::-1]).T

In [435]:
model = Conv1d(2,3,4,SimpleInitializer(0.2),3,padding = 3)

In [436]:
model.W = w
model.B = b

In [437]:
testing = model.forward(x)


In [439]:
testing

array([[ 1.,  4.,  9., 16., 22., 17., 10.,  1.],
       [ 2.,  5., 10., 17., 23., 18., 11.,  2.],
       [ 3.,  6., 11., 18., 24., 19., 12.,  3.]])

In [438]:
model.backward(testing)

array([[  6.,  21.,  51.,  96., 150., 174., 156.,  93.,  39.,   6.],
       [  6.,  21.,  51.,  96., 150., 174., 156.,  93.,  39.,   6.]])

## Response to mini batch

In [1]:
class GetMiniBatch:

    def __init__(self, X, y, batch_size = 20, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self._X = X[shuffle_index]
        self._y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
        
    def __len__(self):
        return self._stop
    
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self._X[p0:p1], self._y[p0:p1] 
    
    def __iter__(self):
        self._counter = 0
        return self
    
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self._X[p0:p1], self._y[p0:p1]

In [718]:
class Conv1d():
    def __init__(self,input_channels,output_channels, features, initializer,kernel_size,padding):

        self.kernel_size = kernel_size
        self.padding = padding
        # Initialize
        # Initialize self.W and self.B using the initializer method
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.features = features
        self.W = initializer.W(output_channels,self.input_channels,kernel_size)
        self.B = initializer.B(output_channels)
    
    def forward(self,X):
        lst = []
        X = np.pad(X, ((0,0),(0,0), (self.padding, self.padding)))
        self.X =X
        new_X = np.tile(X,(1,self.output_channels,1))
        self.new_X = new_X.reshape(X.shape[0],-1,X.shape[1],X.shape[2])
     
        for i in range(self.new_X.shape[-1] - self.kernel_size + 1):
            lst.append((self.new_X[:,:,:,i:i+self.W.shape[-1]] * np.expand_dims(self.W,axis = 0)).sum(axis = (2,3)) + self.B )
#         return np.concatenate(lst,axis = 1)
        return np.array(lst).T.transpose((1,0,2))
    
    def backward(self,dA):
        new_dA = np.tile(np.expand_dims(dA,axis = 2),(1,1,self.input_channels,1))
        dW = np.sum(np.array([new_dA * self.new_X[:,:,:,i:i+dA.shape[-1]] for i in range(self.W.shape[-1])]),axis = -1).transpose((1,2,3,0))
        dW = np.sum(dW,axis = 0)

#         dW = np.sum(np.array([new_dA * self.new_X[:,:,:,i:i+dA.shape[-1]] for i in range(self.W.shape[-1])]),axis = (0,-1)).T.reshape(self.W.shape)
    
        dB = np.sum(dA,axis = (0,-1))
        w_shape = int((dA.shape[-1] - 1 + self.X.shape[-1] - self.kernel_size)/2)
        new_w = np.concatenate((self.W,np.zeros((self.W.shape[0],self.W.shape[1],w_shape))),axis = -1)
        new_w = np.concatenate((np.zeros((self.W.shape[0],self.W.shape[1],w_shape)),new_w),axis = -1)
        new_w = np.expand_dims(new_w,axis = 0)
        dx = []

        for i in range(new_w.shape[-1]- dA.shape[-1] + 1):
            dx.append(np.sum(new_w[:,:,:,i:i+new_dA.shape[-1]] * new_dA,axis = (1,3)))

        return np.array(dx)[::-1].transpose((1,2,0))

In [726]:
model = Conv1d(2,3,4,SimpleInitializer(0.2),3,padding = 2)

In [727]:
model.W = w
model.B = b

In [728]:
np.random.seed(0)
test = np.random.rand(3,2,4)

In [729]:
testing = model.forward(test)


In [731]:
testing.shape

(3, 3, 6)

In [730]:
model.backward(testing)

array([[[ 8.91740491, 21.91806026, 38.03976737, 46.63663321,
         47.06699818, 41.25525962, 23.74098886, 10.30996855],
        [ 8.91740491, 21.91806026, 38.03976737, 46.63663321,
         47.06699818, 41.25525962, 23.74098886, 10.30996855]],

       [[10.59512196, 25.1173584 , 42.22787813, 45.99622658,
         41.91034609, 32.64789902, 18.28442861,  7.84807266],
        [10.59512196, 25.1173584 , 42.22787813, 45.99622658,
         41.91034609, 32.64789902, 18.28442861,  7.84807266]],

       [[ 8.99651022, 22.88835567, 40.49910946, 51.06846678,
         51.84715364, 45.18802383, 25.62215629, 10.95162397],
        [ 8.99651022, 22.88835567, 40.49910946, 51.06846678,
         51.84715364, 45.18802383, 25.62215629, 10.95162397]]])

# Arbitrary number of strides

In [22]:
class Conv1d():
    def __init__(self,input_channels,output_channels, initializer,kernel_size,optimizer,padding = 0,stride = 1):

        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        self.optimizer = optimizer
        # Initialize
        # Initialize self.W and self.B using the initializer method
        self.input_channels = input_channels
        self.output_channels = output_channels
        
        self.W = initializer.W(output_channels,input_channels,kernel_size)
        self.B = initializer.B(output_channels)
    
    def forward(self,X):
        self.n_samples = X.shape[0]
        self.n_in = X.shape[-1]

        X = X.reshape(self.n_samples, self.input_channels, self.n_in)
        self.X = np.pad(X, ((0,0), (0,0), ((self.kernel_size-1), 0)))
        self.X1 = np.zeros((self.n_samples, self.input_channels, self.kernel_size, self.n_in+(self.kernel_size-1)))
        for i in range(self.kernel_size):
            self.X1[:, :, i] = np.roll(self.X, -i, axis=-1)
        A = np.sum(self.X1[:, np.newaxis, :, :, self.kernel_size-1-self.padding:self.n_in+self.padding:self.stride]*self.W[:, :, :, np.newaxis], axis=(2, 3)) + self.B.reshape(-1,1)
        return A
    
    def backward(self,dA):
        self.dW = np.sum(dA[:, :, np.newaxis, np.newaxis]*self.X1[:, np.newaxis, :, :, self.kernel_size-1-self.padding:self.n_in+self.padding:self.stride], axis=(0, -1))
        self.dB = np.sum(dA, axis=(0, -1))
        self.dA = np.pad(dA, ((0,0), (0,0), (0, (self.kernel_size-1))))
        self.dA1 = np.zeros((self.n_samples, self.output_channels, self.kernel_size, self.dA.shape[-1]))
        for i in range(self.kernel_size):
            self.dA1[:, :, i] = np.roll(self.dA, i, axis=-1)
        dX = np.sum(self.W[:, :, :, np.newaxis]*self.dA1[:, :, np.newaxis], axis=(1,3))
        self.optimizer.update(self)
        return dX

## Learning and estimation

In [17]:
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import OneHotEncoder
import numpy as np

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:, np.newaxis])
y_test_one_hot = enc.transform(y_test[:, np.newaxis])


In [18]:
from sklearn.model_selection import train_test_split
X_train_new, X_val, y_train_new, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2)

In [331]:
# get_mini_batch = GetMiniBatch(X_train_new, y_train_new, batch_size=3)

In [44]:
class SimpleInitializer:
    """
    Simple initialization with Gaussian distribution
    Parameters
    ----------
    sigma : float
      Standard deviation of Gaussian distribution
    """
    def __init__(self, sigma):
        self.sigma = sigma
    def W(self,  n_nodes1, n_nodes2,kernel_size):
        """
        Weight initialization
        Parameters
        ----------
        n_nodes1 : int
          Number of nodes in the previous layer
        n_nodes2 : int
          Number of nodes in the later layer

        Returns
        ----------
        W :
        """
        W = self.sigma *np.random.randn(n_nodes1, n_nodes2,kernel_size)
        return W
    def B(self, n_nodes2):
        """
        Bias initialization
        Parameters
        ----------
        n_nodes2 : int
          Number of nodes in the later layer

        Returns
        ----------
        B :
        """
        B = self.sigma *np.random.randn(n_nodes2)
        return B
    
class XavierInitializer:
    def __init__(self,sigma):
        self.sigma =sigma
        
    def W(self,  n_nodes1, n_nodes2):
      
        self.sigma = 1/np.sqrt(n_nodes1)
        W = self.sigma *np.random.randn(n_nodes1, n_nodes2)
        return W
    
    def B(self,n_nodes2):
        B = self.sigma *np.random.randn(n_nodes2)
        return B

class HeInitializer:
    def __init__(self,sigma):
        self.sigma =sigma
    def W(self,  n_nodes1, n_nodes2):
      
        self.sigma = np.sqrt(2/n_nodes1)
        W = self.sigma *np.random.randn( n_nodes1, n_nodes2)
        return W
    
    def B(self,n_nodes2):
        B = self.sigma *np.random.randn(n_nodes2)
        return B

In [5]:
class SGD:
    """
    Stochastic gradient descent
    Parameters
    ----------
    lr : Learning rate
    """
    def __init__(self, lr):
        self.lr = lr
    def update(self, layer):
        """
        Update weights and biases for a layer
        Parameters
        ----------
        layer : Instance of the layer before update
        """
        layer.W = layer.W - self.lr*layer.dW
        layer.B = layer.B - self.lr*layer.dB
        return layer

In [48]:
class Softmax:
    def forward(self, A): 
        Z = np.exp(A) / np.sum(np.exp(A), axis=1).reshape(-1, 1)
        return Z
        
    def backward(self, Z, y):
        dA = Z - y

        return dA
    def loss(self,Z,y):
        L = - np.sum(y * np.log(Z)) / len(y)
        return L
    
class ReLU:
    def forward(self, A): 
        self.A = A
        A[A <= 0] = 0
        return A
    
    def backward(self, dZ):
        dA = dZ*np.array(self.A > 0, np.int)
        return dA
    
class Sigmoid:

    def forward(self, A):
        self.A = A
        Z = 1 / (1 + np.exp(-self.A))
        return Z
    
    def backward(self, dZ):
        dA = dZ * ((1 / (1 + np.exp(-self.A))) - (1 / (1 + np.exp(-self.A)))**2)
        return dA
    
class Tanh:

    def forward(self, A):
        self.A = A
        Z = np.tanh(self.A)
        return Z
    
    def backward(self, dZ):
        dA = dZ * (1 - np.tanh(self.A)**2)
        return dA

In [74]:
class FC:
    """
    Number of nodes Fully connected layer from n_nodes1 to n_nodes2
    Parameters
    ----------
    n_nodes1 : int
      Number of nodes in the previous layer
    n_nodes2 : int
      Number of nodes in the later layer
    initializer: instance of initialization method
    optimizer: instance of optimization method
    """
    def __init__(self, n_nodes1, n_nodes2, initializer, optimizer):
        self.optimizer = optimizer
        # Initialize
        # Initialize self.W and self.B using the initializer method
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        self.W = initializer.W(n_nodes1,n_nodes2)
        self.B = initializer.B(n_nodes2)
        self.Hw = 0
        self.Hb = 0
    def forward(self, X):
        """
        forward
        Parameters
        ----------
        X : The following forms of ndarray, shape (batch_size, n_nodes1)
            入力
        Returns
        ----------
        A : The following forms of ndarray, shape (batch_size, n_nodes2)
            output
        """        
#         print(X.shape)
        A =  X @self.W + self.B
        self.Z = X
        return A
    def backward(self, dA):
        """
        Backward
        Parameters
        ----------
        dA : The following forms of ndarray, shape (batch_size, n_nodes2)
            Gradient flowing from behind
        Returns
        ----------
        dZ : The following forms of ndarray, shape (batch_size, n_nodes1)
            Gradient to flow forward
        """
        self.dB = np.sum(dA,axis = 0)
        self.dW = self.Z.T@dA
        self.dZ = dA @ self.W.T
        
        # update
        self = self.optimizer.update(self)
        return self.dZ

In [None]:
def output_size_calculation(n_in, P, F, S):
    n_out = int((n_in + 2*P - F) / S + 1)
    return n_out

In [89]:
class ScratchConv1Classifier():
    """
    Simple three-layer neural network classifier
    Parameters
    ----------
    Attributes
    ----------
    """
    def __init__(self,n_nodes2, n_output ,batch_size ,epochs,sigma=0.02,optimizer=SGD,activation_function = "sigmoid",lr = 0.01,bias = False,verbose = True):
        self.verbose = verbose
#         self.n_nodes1= n_nodes1
        self.n_nodes2 = n_nodes2
        self.n_output = n_output 
        self.batch_size = batch_size
        self.lr = lr
        self.sigma = sigma

        self.check_bias = bias
        self.activation_function = activation_function
        self.epochs = epochs
        self.optimizer = optimizer(self.lr)
    def fit(self, X, y, X_val=None, y_val=None):
        self.initializer = self.get_initializer()
#         print(X.shape[-1])
        self.conv1d = Conv1d(kernel_size=7, initializer=SimpleInitializer(self.sigma), optimizer=self.optimizer, input_channels=1, output_channels=1, padding=3, stride=2)
        nodes_1 = output_size_calculation(X.shape[-1], self.conv1d.padding, self.conv1d.kernel_size, self.conv1d.stride)
        self.FC2 = FC(nodes_1,self.n_nodes2,self.initializer,self.optimizer)
        self.FC3 = FC(self.n_nodes2, self.n_output,self.initializer,self.optimizer)
        
        self.activation1 = self.get_activation()
        self.activation2 = self.get_activation()
        self.activation3 = Softmax()
            
        get_mini_batch = GetMiniBatch(X,y,self.batch_size)
        self.loss_train = []
        self.loss_val = []
        for epoch in range(self.epochs):  
            for mini_X_train, mini_y_train in get_mini_batch:

                self.forward(mini_X_train)
#                
                self.backward(mini_X_train,mini_y_train)
#             break
            self.forward(X)
            self.loss_train.append(self.activation3.loss(self.result,y))
            if X_val is not None:
                self.forward(X_val)
                self.loss_val.append(self.activation3.loss(self.result,y_val))
            

    
        
        """
        Learn a neural network classifier.
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            Features of training data
        y : The following form of ndarray, shape (n_samples,)
            Correct answer value of training data
        X_val : The following forms of ndarray, shape (n_samples, n_features)
            Features of verification data
        y_val : The following form of ndarray, shape (n_samples,)
            Correct value of verification data
        """
        
        if self.verbose:
             #verbose is set to True, the learning process etc. is output.
            print(self.loss_train)
    def get_initializer(self):
        if self.activation_function == "sigmoid" or self.activation_function == "tanh":
            return XavierInitializer(self.sigma)
        elif self.activation_function == "relu":
            return HeInitializer(self.sigma)
    
    def get_activation(self):
        if self.activation_function == "sigmoid" :
            return Sigmoid()
        elif self.activation_function == "tanh":
            return Tanh()
        elif self.activation_function == "relu":
            return ReLU()

        
    
        
       
    def forward(self,X):
        A1 = self.conv1d.forward(X)
        A1 = A1.reshape(A1.shape[0], A1.shape[-1])
        Z1 = self.activation1.forward(A1)
        A2 = self.FC2.forward(Z1)

        Z2 = self.activation2.forward(A2)

        A3 = self.FC3.forward(Z2)

        Z3 = self.activation3.forward(A3)
        self.result = Z3

        

    def backward(self,X,y):
#         print(self.result.shape)
#         print(y.shape)
        dA3 = self.activation3.backward(self.result,y)
        dZ2 = self.FC3.backward(dA3)
        dA2 = self.activation2.backward(dZ2)
        dZ1 = self.FC2.backward(dA2)
        dA1 = self.activation1.backward(dZ1)
        dA1 = dA1.reshape(dA1.shape[0],1, dA1.shape[-1])
        dZ0 = self.conv1d.backward(dA1)
       
        

    def derivative(self,A,d):
        if self.activation_function == "sigmoid":
            dA = d*(1 / (1 + np.exp(-A)))*(1-(1 / (1 + np.exp(-A))))
            return dA
        elif self.activation_function == " tanh":
            dA = d*(1- np.tanh(A)**2)
            return dA
        

        

        
    def predict(self, X):
        """
        Estimate using a neural network classifier.
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            sample
        Returns
        -------
            The following form of ndarray, shape (n_samples, 1)
            Estimated result
        """
        self.forward(X)
        return np.argmax(self.result,axis = 1)
        
      
    
class GetMiniBatch:
    """
Iterator to get a mini-batch
    Parameters
    ----------
    X : The following forms of ndarray, shape (n_samples, n_features)
      Training data
    y : The following form of ndarray, shape (n_samples, 1)
      Correct answer value
    batch_size : int
      Batch size
    seed : int
      NumPy random number seed
    """
    def __init__(self, X, y, batch_size = 20, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self._X = X[shuffle_index]
        self._y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    def __len__(self):
        return self._stop
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self._X[p0:p1], self._y[p0:p1]        
    def __iter__(self):
        self._counter = 0
        return self
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self._X[p0:p1], self._y[p0:p1]

In [90]:
model = ScratchConv1Classifier(200, 10 ,20 ,10,activation_function = "sigmoid",lr = 0.01,bias = False,verbose = True)

In [91]:
model.fit(X_train_new, y_train_new)

[0.2897961954026031, 0.22288846466443443, 0.1837572432762507, 0.15673705187205925, 0.13620251092862787, 0.12035999688190413, 0.10811009247019333, 0.09845448952728304, 0.09057453369556255, 0.08390234733925006]


In [96]:
result = model.predict(X_test)

In [97]:
result

array([7, 2, 1, ..., 4, 5, 6], dtype=int64)

In [104]:
acc = (result == y_test).sum()*100/len(y_test)

In [106]:
acc

96.86