In [2]:
import numpy as np

class Layer:
    """
    Layer is base class of all layers
    """
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        """forwardは順伝播の処理を表す。子クラスは中身を実装しないといけない"""
        raise NotImplementedError
    
    def backward(self, output_gradient, learning_rate):
        """backwardは順伝播の処理を表す。子クラスは中身を実装しないといけない"""
        raise NotImplementedError


## ReLUレイヤー

In [3]:
class ReLU(Layer):
    def __init__(self):
        super().__init__()

    def forward(self, input_data):
        self.input = input_data

        return np.maximum(0, self.input)

    def backward(self, output_gradient, learning_rate):
        """順伝播の入力が０より大きかった場所だけ勾配をそのまま後ろに流す"""
        mask = self.input > 0

        return output_gradient * mask


## （Max）プーリング層
- 入力を小さな領域（プール）に区切り、各領域の最大値だけを取り出して出力を小さくする
- `backward`: 順伝播の時に最大値だった場所（選択された場所）にだけ勾配を流す。選ばれなかった場所の勾配は０とする

In [None]:
class MaxPooling(Layer):
    def __init__(self, pool_size):
        super().__init__()
        self.pool_size = pool_size

    def forward(self, input_data):
        # backwardのために入力を記憶しておく
        self.input = input_data

        depth, height, width = self.input.shape
        ph, pw = self.pool_size, self.pool_size
        output_height = height // ph
        output_width = width // pw

        output = np.zeros((depth, output_height, output_width))

        for d_i in range(depth):
            for h_i in range(output_height):
                for w_i in range(output_width):
                    window = self.input[d_i, h_i*ph:(h_i + 1)*ph, w_i*pw:(w_i + 1)]
                    output[d_i, h_i, w_i] = max(window)

        return output

    def backward(self, output_gradient, learning_rate):
        input_gradient = np.zeros_like(self.input)

        depth, height, width = self.input.shape
        ph, pw = self.pool_size, self.pool_size
        output_height = height // ph
        output_width = width // pw
        
        for d_i in range(depth):
            for h_i in range(output_height):
                for w_i in range(output_width):
                    window = self.input[d_i, h_i*ph:(h_i + 1)*ph, w_i*pw:(w_i + 1)]
                    max_value = max(window)

                    if self.input[d_i, h_i, w_i] == max_value:
                        input_gradient[d_i, h_i*ph:(h_i + 1)*ph, w_i*pw:(w_i + 1)] += output_gradient[d_i, h_i, w_i]

        return input_gradient

        
    


In [None]:

class ConvolutionalLayer(Layer):
    def __init__(self, kernel_num, kernel_size, stride=1, bias=1):
        super().__init__()

        self.no_of_kernels = kernel_num
        self.kernel_size = kernel_size
        self.stride = stride
        self.bias = bias
        scaling_factor = self.kernel_size**2
        self.kernels = np.random.randn(kernel_num, kernel_size, kernel_size) / scaling_factor
    
    def partition_generator(self, input):
        input_h, input_w = input.shape
        self.input = input
        for h in range(0, input_h - self.kernel_size + 1, self.stride):
            for w in range(0, input_w - self.kernel_size + 1, self.stride):
                slice = self.input[h:(h+self.kernel_size), w:(w+self.kernel_size)]
                yield slice, h, w

    def forward(self, input):
        self.input = input
        input_h, input_w = input.shape
        conv_output = np.zeros((input_h - self.kernel_size + 1, input_w - self.kernel_size = 1, self.no_of_kernels))
        for sec, h, w in self.partition_generator(self.input):
            conv_output[h, w] = np.sum(sec*self.kernels, axis=(1, 2))
        return conv_output

    def backward(self, output_gradient, learning_rate):
        dL_dk = np.zeros(self.kernels.shape)
        for sec, h, w in self.generate_sections(self.input):
            for f in range(self.no_of_kernels):
                dL_dk += sec*output_gradient[h, w, f]
            self.kernels -= learning_rate*dL_dk

        return dL_dk


In [None]:
class FullyCon_SoftmaxLayer:
    def __init__(self, input_units, output_units):
        self.weight = np.random.randn(input_units, output_units)/input_units
        self.bias = np.zeros(output_units)
        self.output = None

    def _dense_layer(self, image):
        self.original_shape = image.shape
        image_flattened = image.flatten()
        self.flattened_input = image_flattened
        dense_output = np.dot(image_flattened, self.weight) + self.bias
        return dense_output
        
