# Implementation from Scratch

<br />

I am going to create a one dimentional convolution layer.

In [1]:
from keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import copy

Using TensorFlow backend.


In [11]:
class Scratch1dCNNClassifier:
    """
    Implement a 1 dimentional convolution neural network classifier.
    
    Parameters
    ----------
    num_epoch : int
        Number of epochs
    
    batch_size : int
        Size of batch
    
    verbose : bool
        True if outputting learning process
    
    Attributes
    ----------
    loss : list
        List of arrays of records of loss on train dataset
    
    val_loss : list
        List of arrays of records of loss on validation dataset
    
    layers : list
        List of layers
    """
    
    def __init__(self, num_epoch=10, batch_size=10, verbose=True):
        # Record hyperparameters as attribute
        self.epoch = num_epoch
        self.batch_size = batch_size
        self.verbose = verbose
        
        # Prepare lists for arrays to record losses
        self.loss = []
        self.val_loss = []
        # Prepare lists for arrays to record losses
        self.layers = []
    
    
    def add(self, layer):
        self.layers += [layer]
    
    
    def forward_layer(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X
    
    
    def backward_layer(self, y):
        for layer in reversed(self.layers):
            y = layer.backward(y)
        return y
    
    
    def fit(self, X, y, X_val=None, y_val=None):
        """
        Fit neural network classifier.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Features of train dataset
        
        y : ndarray, shape (n_samples, )
            Correct values of train dataset
        
        X_val : ndarray, shape (n_samples, n_features)
            Features of validation dataset
        
        y_val : ndarray, shape (n_samples, )
            Correct values of validation dataset
        """
        
        # Fit
        if self.verbose:
            count = 0
        
        for i in range(self.epoch):
            # Initialize
            get_mini_batch = GetMiniBatch(X, y, batch_size=self.batch_size)
            
            if (X_val is not None) and (y_val is not None):
                get_mini_batch_val = GetMiniBatch(X_val, y_val, batch_size=self.batch_size)
                
                for ((mini_X_train, mini_y_train), (mini_X_val_train, mini_y_val_train)) in zip(get_mini_batch, 
                                                                                                get_mini_batch_val):
                    # Forwardpropagation per iteration
                    Z3 = self.forward_layer(mini_X_train)
                    Z3_val = self.forward_layer(mini_X_val_train)
                    
                    # Loss
                    if self.verbose:
                        # Initialize
                        loss = Loss()
                        # Compute losses
                        L = loss.cross_entropy_loss(mini_y_train, Z3)
                        L_val = loss.cross_entropy_loss(mini_y_val_train, Z3_val)
                    
                    # Backforwardpropagation per iteration
                    dX = self.backward_layer(mini_y_train)
                    dX_val = self.backward_layer(mini_y_val_train)
            
            else:
                for mini_X_train, mini_y_train in get_mini_batch:
                    # Forwardpropagation per iteration
                    Z3 = self.forward_layer(mini_X_train)
                    
                    # Loss
                    if self.verbose:
                        # Initialize
                        loss = Loss()
                        # Compute losses
                        L = loss.cross_entropy_loss(mini_y_train, Z3)
                    
                    # Backforwardpropagation per iteration
                    dX = self.backward_layer(mini_y_train)
            
            # Output learning process if verbose is True
            if self.verbose:
                self.loss += [sum(L) / self.batch_size]
                if (X_val is not None) and (y_val is not None):
                    self.val_loss += [sum(L_val) / self.batch_size]
                    print("{0}batch loss: {1}, val_loss: {2}".format(count+1, self.loss[count], self.val_loss[count]))
                else:
                    print(self.loss[count])
                count += 1
    
    
    def predict(self, X):
        """
        Predict by neural network classifier.
        
        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Samples
        
        Returns
        -------
        ndarray, shape (n_samples, 1)
            Results of prediction
        """
        
        Z3 = self.forward_layer(X)
        
        return np.argmax(Z3, axis=1)
    
    
    def plot_learning_record(self):
        """
        Plot learning records.
        """
        
        plt.figure(facecolor="azure", edgecolor="coral")
        
        plt.plot(self.loss, label="loss")
        plt.plot(self.val_loss, label="val_loss")
        
        plt.title("Learning Records")
        plt.xlabel("Number of Epochs")
        plt.ylabel("Loss")
        plt.grid(True)
        
        plt.legend()
        plt.show()
    
    
    def compute_index_values(self, y, y_pred):
        """
        Compute Index values.
        
        Parameters
        ----------
        X: ndarray, shape(n_samples,n_features)
            Features of train dataset
        
        y: ndarray, shape(n_samples,)
            Correct values of train dataset
        """
        
        print("accuracy score:", accuracy_score(y, y_pred))
    
    
    def plot_misclassification(self, X_val, y_val, y_pred):
        """
        Plot results of misclassification. Show "Results of prediction/Corrects" above images.

        Parameters
        ----------
        y_pred : ndarray, shape (n_samples,)
            Results of prediction
        
        y_val : ndarray, shape (n_samples,)
            Correct labels of validation data
        
        X_val : ndarray, shape (n_samples, n_features)
            Features of validation data
        """
        
        # Number of results I want to plot
        num = 36

        true_false = y_pred==y_val
        false_list = np.where(true_false==False)[0].astype(np.int)

        if false_list.shape[0] < num:
            num = false_list.shape[0]
        
        fig = plt.figure(figsize=(6, 6))
        fig.subplots_adjust(left=0, right=0.8,  bottom=0, top=0.8, hspace=1, wspace=0.5)
        
        for i in range(num):
            ax = fig.add_subplot(6, 6, i + 1, xticks=[], yticks=[])
            ax.set_title("{} / {}".format(y_pred[false_list[i]],y_val[false_list[i]]))
            ax.imshow(X_val.reshape(-1,28,28)[false_list[i]], cmap='gray')

#### Fully Connected Layer

In [12]:
class FC:
    """
    Fully connected layer from a layer of n_nodes1 to a layer of n_nodes2
    
    Parameters
    ----------
    n_nodes1 : int
        Number of nodes of the previous layer
    
    n_nodes2 : int
        Number of nodes of the following layer
    
    initializer : Instance
        Instance of initialization method
    
    optimizer : Instance
        Instance of optimisation method
    
    Attributes
    ----------
    W : ndarray, shape (n_nodes1, n_nodes2)
        Weight
    
    B : ndarray, shape (n_nodes2,)
        Bias
    
    Z : ndarray, shape (batch_size, n_nodes1)
        Deepcopy of input
    
    dW : float
        Gradient of weight
    
    dB : float
        Gradient of bias
    """
    
    def __init__(self, n_nodes1, n_nodes2, initializer, optimizer):
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        self.initializer = initializer
        self.optimizer = optimizer
        
        # Initialize self.W and self.B by using initializer method
        self.W = self.initializer.W(self.n_nodes1, self.n_nodes2)
        self.B = self.initializer.B(self.n_nodes2)
        
        self.Z = 0
        self.dW = 0
        self.dB = 0
    
    
    def forward(self, X):
        """
        Forwardpropagation
        
        Parameters
        ----------
        X : ndarray, shape (batch_size, n_nodes1)
            Input
        
        Returns
        ----------
        ndarray, shape (batch_size, n_nodes2)
            Output
        """        
        
        self.Z = copy.deepcopy(X)
        
        return np.dot(X, self.W) + self.B
    
    
    def backward(self, dA):
        """
        Backwardpropagation
        
        Parameters
        ----------
        dA : ndarray, shape (batch_size, n_nodes2)
            Gradient given from the following layer
        
        Returns
        ----------
        dZ : ndarray, shape (batch_size, n_nodes1)
            Gradient given to the next layer
        """
        
        self.dB = np.average(dA)
        self.dW = np.dot(self.Z.T, dA) / dA.shape[0]
        
        dZ = np.dot(dA, self.W.T)
        
        # Update
        self = self.optimizer.update(self)
        
        return dZ

#### Initialization Methods

In [13]:
class SimpleInitializer:
    """
    Simple initialization by Gaussian distribution
    
    Parameters
    ----------
    sigma : float
        Standard deviation of Gaussian distribution
    """
    
    def __init__(self, sigma):
        self.sigma = sigma
    
    
    def W(self, n_nodes1, n_nodes2):
        """
        Initialize a weight.

        Parameters
        ----------
        n_nodes1 : int
            Number of nodes of the previous layer

        n_nodes2 : int
            Number of nodes of the following layer
        
        Returns
        ----------
        W : ndarray, shape (n_nodes1, n_nodes2)
            Weight
        """

        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        
        return W.astype("f")
    
    
    def B(self, n_nodes2):
        """
        Initialize a bias.

        Parameters
        ----------
        n_nodes2 : int
            Number of nodes of the following layer
        
        Returns
        ----------
        B : ndarray, shape (n_nodes2,)
            Bias
        """

        B = self.sigma * np.random.randn(1, n_nodes2)
        
        return B.astype("f")

In [14]:
class XavierInitializer:
    """
    Initialize a weight by Xavier's method, and initialize a bias.
    
    Parameters
    ----------
    sigma : float
        Standard deviation of Gaussian distribution
    """
    
    def __init__(self, sigma):
        self.sigma = sigma
    
    
    def W(self, n_nodes1, n_nodes2):
        """
        Initialize a weight by Xavier's method.

        Parameters
        ----------
        n_nodes1 : int
            Number of nodes of the previous layer

        n_nodes2 : int
            Number of nodes of the following layer
        
        Returns
        ----------
        W : ndarray, shape (n_nodes1, n_nodes2)
            Weight
        """

        W = self.sigma * np.random.randn(n_nodes1, n_nodes2) / np.sqrt(n_nodes1)
        
        return W.astype("f")
    
    
    def B(self, n_nodes2):
        """
        Initialize a bias.

        Parameters
        ----------
        n_nodes2 : int
            Number of nodes of the following layer
        
        Returns
        ----------
        B : ndarray, shape (n_nodes2,)
            Bias
        """

        B = self.sigma * np.random.randn(1, n_nodes2)
        
        return B.astype("f")

In [15]:
class HeInitializer:
    """
    Initialize a weight by He's method, and initialize a bias.
    
    Parameters
    ----------
    sigma : float
        Standard deviation of Gaussian distribution
    """
    
    def __init__(self, sigma):
        self.sigma = sigma
    
    
    def W(self, n_nodes1, n_nodes2):
        """
        Initialize a weight by Xavier's method.

        Parameters
        ----------
        n_nodes1 : int
            Number of nodes of the previous layer

        n_nodes2 : int
            Number of nodes of the following layer
        
        Returns
        ----------
        W : ndarray, shape (n_nodes1, n_nodes2)
            Weight
        """

        W = self.sigma * np.random.randn(n_nodes1, n_nodes2) / np.sqrt(2/n_nodes1)
        
        return W.astype("f")
    
    
    def B(self, n_nodes2):
        """
        Initialize a bias.

        Parameters
        ----------
        n_nodes2 : int
            Number of nodes of the following layer
        
        Returns
        ----------
        B : ndarray, shape (n_nodes2,)
            Bias
        """
        
        B = self.sigma * np.random.randn(1, n_nodes2)
        
        return B.astype("f")

#### Optimization Method

In [16]:
class SGD:
    """
    Stochastic Gradient Descent
    
    Parameters
    ----------
    lr : float
        Learning rate
    """
    
    def __init__(self, lr):
        self.lr = lr
    
    
    def update(self, layer):
        """
        Update a weight and a bias of a layer.
        
        Parameters
        ----------
        layer : Instance
            Instance of preupdated layer
        
        Returns
        ----------
        layer : Instance
            Instance of updated layer
        """
        
        layer.W -= self.lr * layer.dW
        layer.B -= self.lr * layer.dB
        
        return layer

In [17]:
class AdaGrad:
    """
    AdaGrad
    
    Parameters
    ----------
    lr : float
        Learning rate
    
    Attributes
    ----------
    h : float
        Sum of squares of all gradients up to the previous iterations about ith layer
    """
    
    def __init__(self, lr):
        self.lr = lr
        
        self.h = 0
    
    
    def update(self, layer):
        """
        Update a weight and a bias of a layer.
        
        Parameters
        ----------
        layer : Instance
            Instance of preupdated layer
        
        Returns
        ----------
        layer : Instance
            Instance of updated layer
        """
        
        self.h += layer.dW * layer.dW
        
        layer.W -= self.lr * layer.dW / np.sqrt(self.h+1e-7)
        layer.B -= self.lr * layer.dB
        
        return layer

In [18]:
class Momentum:
    """
    Momentum SGD
    
    Parameters
    ----------
    lr : float
        Learning rate
    
    momentum : float
        Learning coefficient
    
    Attributes
    ----------
    vW : 
        Momentum term of weight
    
    vB : 
        Momentum term of bias
    """
    
    def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        
        self.vW = 0
        self.vB = 0
    
    
    def update(self, layer):
        """
        Update a weight and a bias of a layer.
        
        Parameters
        ----------
        layer : Instance of preupdated layer
        
        Returns
        -------
        layer : Instance of updated layer
        """

        dW = np.dot(layer.Z.T, layer.dA) / len(layer.dA)
        dB = np.mean(layer.dA, axis=0)
        
        self.vW = self.momentum*self.vW - self.lr*dW
        self.vB = self.momentum*self.vB - self.lr*dB
        
        layer.W[...] = layer.W + self.vW
        layer.B[...] = layer.B + self.vB
        
        return layer

In [22]:
class Adam:
    """
    Adam
    
    Parameters
    ----------
    lr : float
        Learning rate
    
    beta1 : 
    
    beta2 : 
    
    Attributes
    ----------
    iter : int
        Number of iterations
    
    mW : 
    
    mB : 
    
    vW : 
    
    vB : 
    """

    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        
        self.iter = 0
        self.mW = 0
        self.mB = 0
        self.vW = 0
        self.vB = 0
    
    
    def update(self, layer):
        """
        Update a weight and a bias of a layer.
        
        Parameters
        ----------
        layer : Instance of preupdated layer
        
        Returns
        -------
        layer : Instance of updated layer
        """
        
        self.iter += 1
        
        dW = np.dot(layer.Z.T, layer.dA) / len(layer.dA)
        dB = np.mean(layer.dA, axis=0)
        
        lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) 
        
        self.mW += (1 - self.beta1) * (dW - self.mW)
        self.vW += (1 - self.beta2) * (dW**2 - self.vW)
        self.mB += (1 - self.beta1) * (dB - self.mB)
        self.vB += (1 - self.beta2) * (dB**2 - self.vB)
        
        layer.W -= lr_t * self.mW / (np.sqrt(self.vW)+1e-7)
        layer.B -= lr_t * self.mB / (np.sqrt(self.vB)+1e-7)
        
        return layer

#### Activation Functions

In [40]:
class Sigmoid:
    """
    Sigmoid function
    """
    
    def forward(self, A):
        """
        Forwardpropagation
        
        Parameters
        ----------
        A : ndarray, shape (batch_size,)
            Vector from the previous layer of kth class
        
        Returns
        -------
        Z : ndarray, shape (batch_size, ith n_nodes)
            Output
        """
        
        self.A = A
        
        Z = 1 / (1+np.exp(-self.A))
        
        return Z
    
    
    def backward(self, dA):
        """
        Backpropagation
        
        Paramaters
        ----------
        dA : ndarray, shape (batch_size, n_nodes2)
            Gradient given from the following layer
        
        Returns
        -------
        ndarray, shape (batch_size, ith n_nodes)
            Output
        """
        
        Z = self.forward(self.A)
        
        d_sig = Z * (1-Z) * dA
        
        return d_sig

In [38]:
class Tanh:
    """
    tanh function
    """
    
    def forward(self, A):
        """
        Forwardpropagation
        
        Parameters
        ----------
        A : ndarray, shape (batch_size,)
            Vector from the previous layer of kth class
        
        Returns
        -------
        ndarray, shape (batch_size, ith n_nodes)
            Output
        """
        
        self.A = A
        
        Z = np.tanh(self.A)
        
        return Z
    
    
    def backward(self, dA):
        """
        Backpropagation
        
        Parameters
        ----------
        dA : ndarray, shape (batch_size, n_nodes2)
            Gradient given from the following layer
        
        Returns
        -------
        ndarray, shape (batch_size, ith n_nodes)
            Output
        """
        
        Z = self.forward(self.A)
        
        d_tanh = (1 - Z**2)*dA
        
        return d_tanh

In [25]:
class Softmax:
    """
    Softmax function
    
    Attributes
    ----------
    Z : ndarray, shape (batch_size, ith n_nodes)
        Output
    """
    
    def __init__(self):
        self.Z = None
    
    
    def forward(self, A):
        """
        Forwardpropagation
        
        Parameters
        ----------
        A : ndarray, shape (batch_size,)
            Vector from the previous layer of kth class
        
        Returns
        -------
        ndarray, shape (batch_size, ith n_nodes)
            Output
        """
        
        A -= np.max(A)
        
        Z = np.exp(A) / np.sum(np.exp(A), axis=1, keepdims=True)
        
        self.Z = Z
        
        return Z
    
    
    def backward(self, y):
        """
        Backwardpropagation
        
        Parameters
        ----------
        y : ndarray, shape (n_samples, 1)
            Correct values
        
        Returns
        -------
        ndarray, shape (batch_size,)
            Probability vector of kth class
        """
        
        d_soft = self.Z - y
        
        return d_soft

In [26]:
class ReLU:
    """
    ReLU function
    """
    
    def forward(self, A):
        """
        Forwardpropagation
        
        Parameters
        ----------
        A : ndarray, shape (batch_size,)
            Vector from the previous layer of kth class
        
        Returns
        -------
        ndarray, shape (batch_size, ith n_nodes)
            Output
        """
        
        self.A = A
        
        Z = np.where(self.A<=0, 0, self.A)
        
        return Z
    
    
    def backward(self, dA):
        """
        Backpropagation
        
        Parameters
        ----------
        dA : ndarray, shape (batch_size, n_nodes2)
            Gradient given from the following layer
        
        Returns
        -------
        ndarray, shape (batch_size, ith n_nodes)
            Output
        """
        
        d_relu = np.where(self.A<=0, 0, 1) * dA
        
        return d_relu

#### Loss

In [37]:
class Loss:
    """
    Compute loss.
    """
    
    def cross_entropy_loss(self, y, y_pred):
            """
            Cross entropy error

            Parameters
            ----------
            y : ndarray, shape (n_samples, 1)
                Correct values

            y_pred : ndarray, shape (n_samples, 1)
                Predicted values
            
            Returns
            -------
            ndarray, shape (n_samples, 1)
                Cross entropy error
            """
            
            L = np.sum(-1 * y * np.log(y_pred+1e-10), axis=1)
            
            return L

#### Mini-batch

In [28]:
class GetMiniBatch():
    """
    Iterator to get a mini-batch

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_features)
      Train dataset
    
    y : ndarray, shape (n_samples, 1)
      Correct values
    
    batch_size : int
      Size of batch
    
    seed : int
      Seed of random numbers of Numpy
    """
    
    def __init__(self, X, y, batch_size=10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_index]
        self.y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    
    
    def __len__(self):
        return self._stop
    
    
    def __getitem__(self, item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self.X[p0:p1], self.y[p0:p1]        
    
    
    def __iter__(self):
        self._counter = 0
        return self
    
    
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        
        self._counter += 1
        
        return self.X[p0:p1], self.y[p0:p1]

#### Dropout

In [29]:
class Dropout:
    """
    Dropout
    
    Parameters
    ----------
    dropout_ratio : float
        Ratio of dropout
    
    Attributes
    ----------
    mask : float
        Mask
    """
    
    def __init__(self, dropout_ratio=0.5):
        self.dropout_ratio = dropout_ratio
        
        self.mask = None
    
    
    def forward(self, X, train_flag=True):
        if train_flag:
            self.mask = np.random.rand(*X.shape) > self.dropout_ratio
            return X * self.mask
        else:
            return X * (1-self.dropout_ratio)
    
    
    def backward(self, dA):
        return dA * self.mask

## [Task 1] Create a Class of One Deimentional Convolution Layer the Number of Channels is 1

<br />

I do not think of any paddings, and I let the number of strides 1. Also, this class does not handle some data at the same time and apply to only sigle batch size, 1.

#### Forwardpropagation

$$
a_i = \sum_{s=0}^{F-1}x_{(i+s)}w_s+b
$$

$a_i$ : $i$th value of outputted array

$F$ : Size of filter

$x_{(i+s)}$ : $(i+s)$th value of inputted array

$w_s$ : $s$th value of array of weight

$b$ : Bias term

<br />

All of them are scalers.

#### Update Equation

<br />

$$
w_s^{\prime} = w_s - \alpha \frac{\partial L}{\partial w_s} \\
b^{\prime} = b - \alpha \frac{\partial L}{\partial b}
$$

$\alpha$ : Learning rate

$\frac{\partial L}{\partial w_s}$ : Gradient of loss $L$ for $w_s$

$\frac{\partial L}{\partial b}$ : Gradient of loss $L$ for $b$

#### Backpropagation

<br />

$$
\frac{\partial L}{\partial w_s} = \sum_{i=0}^{N_{out}-1} \frac{\partial L}{\partial a_i}x_{(i+s)}\\
\frac{\partial L}{\partial b} = \sum_{i=0}^{N_{out}-1} \frac{\partial L}{\partial a_i}
$$

$\frac{\partial L}{\partial a_i}$ : $i$th value of array of gradient

$N_{out}$ : Size of output

#### Error Given to Forward Layer

<br />

$$
\frac{\partial L}{\partial x_j} = \sum_{s=0}^{F-1} \frac{\partial L}{\partial a_{(j-s)}}w_s
$$

$\frac{\partial L}{\partial x_j}$ : $j$th value of array of error transferred to the forward layer

<br />

When $j - s < 0$ or $j - s > N_{out} - 1$, $\frac{\partial L}{\partial a_{(j-s)}}$ = 0.

## [Task 2] Compute Size of Outputs After One Dimentional Convolution

<br />

The following equation shows how the number of features change by convolution. It includes padding and stride.

$$
N_{out} =  \frac{N_{in}+2P-F}{S} + 1
$$

$N_{out}$ : Size of outputs (Number of features)

$N_{in}$ : Size of inputs (Number of features)

$P$ : Number of paddings to a certain direction

$F$ : Size of filter

$S$ : Size of stride

In [31]:
class SimpleConv1d:
    """
    One dimentional convolution layer where the number of channels is 1
    
    Parameters
    ----------
    initializer : Instance
        Instance of initialization method
    
    optimizer : Instance
        Instance of optimisation method
    
    filter_size : int
        Size of a filter
    
    straid : int
        Size of a straid
    
    pad : int
        Size of a padding
    
    Attributes
    ----------
    W : ndarray, shape (n_nodes1, n_nodes2)
        Weight
    
    B : ndarray, shape (n_nodes2,)
        Bias
    
    out_size : int
        Size of outputs
    
    dW : float
        Gradient of weight
    
    dB : float
        Gradient of bias
    """
    
    def __init__(self, initializer, optimizer, filter_size, straid=1, padding=0):
        self.initializer = initializer
        self.optimizer = optimizer
        self.filter_size = filter_size
        self.straid = straid
        self.pad = padding
        
        # Initialize self.W and self.B by using initializer method
        self.W = self.initializer.W(self.filter_size, 1)
        self.B = self.initializer.B(self.filter_size, 1)
        
        self.out_size = None
        self.dW = 0
        self.dB = 0
    
    
    def forward(self, X):
        """
        Forwardpropagation
        
        Parameters
        ----------
        X : ndarray, shape (batch_size, n_nodes1)
            Input
        
        Returns
        ----------
        A : ndarray, shape (batch_size, n_nodes2)
            Output
        """
        
        self.out_size = int((len(X) - self.filter_size)/self.straid + 1)
        
        A = np.zeros(self.out_size)
        for i in range(self.out_size):
            A[i] = np.dot(X[i:i+self.filter_size], self.W) + self.B
        
        return A
    
    
    def backward(self, dA):
        """
        Backpropagation
        
        Parameters
        ----------
        dA : ndarray, shape (batch_size, n_nodes2)
            Gradient given from the following layer
        
        Returns
        ----------
        dZ : ndarray, shape (batch_size, n_nodes1)
            Gradient given to the next layer
        """
        
        self.dB = np.sum(dA, axis=1)
        
        for i in range(self.out_size):
            self.dW[i] = np.dot(dA[i], X[i:i+self.filter_size])
        
        dZ = np.dot(dA, self.W.T)
        
        # Update
        self = self.optimizer.update(self)
        
        return dZ
    
    
    def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
        """
        Image to Column

        Parameters
        ----------
        input_data : ndarray, shape (Number of data, Number of channels, Height, Width)
            4 dimentional input data

        filter_h : int
            Height of a filter

        filter_w : int
            Width of a filter

        stride : int
            Size of a stride

        pad : int
            Size of a padding

        Returns
        -------
        col : ndarray, shape (Height, Width)
            2 dimentional output as column
        """

        N, C, H, W = input_data.shape
        out_h = (H + 2*pad - filter_h)//stride + 1
        out_w = (W + 2*pad - filter_w)//stride + 1

        img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
        col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

        for y in range(filter_h):
            y_max = y + stride*out_h
            for x in range(filter_w):
                x_max = x + stride*out_w
                col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

        col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)

        return col
    
    
    def compute_size_of_output(self, n_in):
        n_out = (n_in + 2*self.pad - self.filter_size)/self.straid + 1
        return n_out

## [Task 3] Validate a One Dimentional Convolution Layer by Using a Simple Array

In [32]:
# Example

# Input
x = np.array([1,2,3,4])

# Weight
w = np.array([3, 5, 7])

# Bias
b = np.array([1])

# Size of a filter
filter_size = 3

# Size of a straid
straid = 1

# Loss
delta_a = np.array([10, 20])

In [33]:
# Forwardpropagation

out_size = int((len(x) - filter_size)/straid + 1)

a = np.zeros(out_size)
for i in range(out_size):
    a[i] = np.dot(x[i:i+filter_size], w) + b

In [34]:
a

array([35., 50.])

In [35]:
# Backpropagation

delta_b = np.sum(delta_a, axis=0)

delta_w = np.zeros(out_size)
for i in range(out_size):
    print(delta_w.shape)
    print(x[i:i+filter_size].shape)
    print(delta_a.shape)
    delta_w[i] = np.dot(x[i:i+filter_size].T, delta_a[i])

deltaz = np.dot(delta_a, w.T)

# Update
#self = self.optimizer.update(self)

(2,)
(3,)
(2,)


ValueError: setting an array element with a sequence.

In [36]:
print(delta_b)
print(delta_w)
print(delta_x)

30
[0. 0.]


NameError: name 'delta_x' is not defined