In [4]:
import numpy as np
from collections import OrderedDict

In [None]:
def im2col (input_data, filter_h, filter_w, stride=1, pad=0):  # 先实现一个不支持stride和pad的版本
    batch_size, channels, height, width = input_data.shape

    out_h = height - filter_h + 1
    out_w = width - filter_w + 1

    col = np.zeros((batch_size, channels, out_h, out_w, filter_h, filter_w))

    for h in range(out_h):
        for w in range(out_w):
            col[:, :, h, w, :, :] = input_data[:, :, h:h + filter_h, w:w + filter_w]
    
    col = col.transpose(0, 2, 3, 1, 4, 5).reshape(batch_size  * out_h * out_w, channels * filter_h * filter_w)

    return col

In [9]:
class Relu:
    def __init__(self) -> None:
        self.mask = None

    def forward(self, x):
        self.mask = x <= 0
        out = x
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dx = dout
        dx[self.mask] = 0

        return dx

class Affine:
    def __init__(self, W, b) -> None:
        self.W = W
        self.b = b

        self.x = np.array([])

        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x

        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout):
        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.x.T, dout)

        dx = np.dot(dout, self.W.T)

        return dx

In [12]:
def softmax(x):
    if x.ndim == 1:
        x = x.reshape(1, -1)

    x_max = np.max(x, axis=1, keepdims=True)
    x_exp = np.exp(x - x_max)
    x_exp_sum = np.sum(x_exp, axis=1, keepdims=True)
    y = x_exp / x_exp_sum

    return y

def cross_entropy_error(y, t):
    if y.ndim == 1:
        y = y.reshape(1, -1)

    loss = -np.mean(np.log(y[np.arange(y.shape[0]), t] + 1e-6))

    return loss

class SoftmaxWithLoss:
    def __init__(self) -> None:
        self.y = np.array([])
        self.t = None

    def forward(self, x, t):
        self.y = softmax(x)
        self.t = t
        loss = cross_entropy_error(self.y, self.t)

        return loss

    def backward(self, dout=1):
        t_onehot = np.zeros_like(self.y)
        t_onehot[np.arange(t_onehot.shape[0]), self.t] = 1
        
        batch_size = self.y.shape[0]
        dx = (self.y - t_onehot) / batch_size

        return dx

In [None]:
class Convolution:  # 目前只支持stride pad采用默认参数
    def __init__(self, W, b, stride=1, pad=0) -> None:
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, W = x.shape  # N = batch_size
        FN, C, FH, FW = self.W.shape  # FN是滤波器数量，对应输出的通道数
        # 这两个通道数相同

        col = im2col(x, FH, FW, stride=1, pad=0)
        col_W = self.W.reshape(FN, C * FH * FW).T  # 第0维变成 N * out_h * out_w  # .T和strnapose(1, 0)等价
        out = np.dot(col, col_W) + self.b

        out_h = H - FH + 1  # 只支持stride=1, pad=0
        out_w = W - FW + 1
        out = out.reshape(N, out_h, out_w, FW).transpose(0, 3, 1, 2)  # 通道数可类比为Affine层中的参数维度

        return out

class  Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0) -> None:
        self.pool_h = pool_h  # 卷积层直接传W，而W就包含了FN, C, FH, FW这几个参数，池化层没有参数，只需要传形状h, w
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, W = x.shape

        col = im2col(x, self.pool_h, self.pool_w, stride=1, pad=0)  # 这里通道数还在1维，需要把通道数放到0维
        col = col.reshape(-1, self.pool_h * self.pool_w)

        out = np.max(col, axis=1)
        
        out_h = H - self.pool_h + 1  # 只支持stride=1, pad=0
        out_w = W - self.pool_w + 1
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        return out

In [None]:
class SimpleConvNet:  # 暂未实现卷积层和池化层的backward
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01) -> None:
        
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = input_size - filter_size + 1
        pool_output_size = conv_output_size - 2 + 1 # 默认池化层大小为2 * 2，步长为1

        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(filter_num * pool_output_size * pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], self.params['stride'], self.params['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        
        self.last_layer = SoftmaxWithLoss()
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
            return x
        
    def loss(self, x, t):
            y = self.predict(x)
            return self.last_layer.forward(y, t)
        
    def gradient(self, x, t):
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()

        for layer in layers:
             dout = layer.backward(dout)
        
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db

        return grads