[卷积神经网络CNN实现](https://zhuanlan.zhihu.com/p/102119808]

1. 动机
   1. 通过普通的神经网络可以实现，但是图片越来越多，如果使用NN实现则训练的参数太多
   2. 特征位置在不同的图片中会有不同的变化
2. 数据集
   1. [数据集来源](http://yann.lecun.com/exdb/mnist/)
3. 卷积

   ![avator](../resource/v2-69b4c1dd078ee363317bb8fa323eaace_b.gif)

   1. 通过卷积可以提取图片中特定线条，垂直线条或者水平线条
   ![avator](../resource/v2-ca0d5ccbaf1a30eff0f9289987486e96_720w.jpg)
   2. 填充：可以通过在周围补-实现输出前后图像大小一致
   3. 卷基层：卷基层通过一组 filter 将输入的图片转为输出的图片。卷基层的主要参数是 filter 的个数
   ![avator](../resource/v2-cad35827f01a669417b548f52dfc3c2a_720w.jpg)

In [1]:
# 3.4 卷积层代码实现
import numpy as np

class Conv3x3:

    def __init__(self, num_filters):
        self.num_filters = num_filters
        # 初始化时除以9是因为初始值不能太大也不能太小
        self.filters = np.random.randn(num_filters, 3, 3) / 9


    def iterate_regions(self, image):
        h, w = image.shape
        
        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                yield im_region, i, j
        # 将 im_region, i, j 以 tuple 形式存储到迭代器中以便后面遍历使用


    def forward(self, input):
        h, w = input.shape
        output = np.zeros((h - 2, w - 3, self.num_filters))
        
        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
        
        return output
    
    def backprop(self, d_L_d_out, learn_rate):
        '''
        Performs a backward pass of the conv layer.
        - d_L_d_out is the loss gradient for this layer's outputs.
        - learn_rate is a float.
        '''
        # 初始化一组为 0 的 gradient，3x3x8
        d_L_d_filters = np.zeros(self.filters.shape)
 
        # im_region，一个个 3x3 小矩阵
        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                # 按 f 分层计算，一次算一层，然后累加起来
                # d_L_d_filters[f]: 3x3 matrix
                # d_L_d_out[i, j, f]: num
                # im_region: 3x3 matrix in image
                d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region
 
        # Update filters
        self.filters -= learn_rate * d_L_d_filters
 
        # We aren't returning anything here since we use Conv3x3 as
        # the first layer in our CNN. Otherwise, we'd need to return
        # the loss gradient for this layer's inputs, just like every
        # other layer in our CNN.
        return None


池化（Pooling）

1. 图片的相邻元素具有相似的值，因此卷积层中很多信息是冗余的，通过池化来减少这个影响。
    ![avator](../resource/v2-ac441205fd06dc037b3db2dbf05660f7_b.gif)
2. 与卷积运算类似，只是这个更容易，只是计算最大值并赋值。池化层会把26*26*26=>13*13*8

In [2]:
import numpy as np

class MaxPool2:

    def iterate_regions(self, image):
        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2
        
        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 20):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j


    def forward(self, input):
        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))
        
        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(im_region, axis=(0, 1))
        return output

    def backprop(self, d_L_d_out):
        '''
        Performs a backward pass of the maxpool layer.
        Returns the loss gradient for this layer's inputs.
        - d_L_d_out is the loss gradient for this layer's outputs.
        '''
        # 池化层输入数据，26x26x8，默认初始化为 0
        d_L_d_input = np.zeros(self.last_input.shape)
 
        # 每一个 im_region 都是一个 3x3x8 的8层小矩阵
        # 修改 max 的部分，首先查找 max
        for im_region, i, j in self.iterate_regions(self.last_input):
            h, w, f = im_region.shape
            # 获取 im_region 里面最大值的索引向量，一叠的感觉
            amax = np.amax(im_region, axis=(0, 1))
 
            # 遍历整个 im_region，对于传递下去的像素点，修改 gradient 为 loss 对 output 的gradient
            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        # If this pixel was the max value, copy the gradient to it.
                        if im_region[i2, j2, f2] == amax[f2]:
                            d_L_d_input[i * 2 + i2, j * 2 + j2, f2] = d_L_d_out[i, j, f2]
 
        return d_L_d_input

Softmax将一组数字转换为一组概率

1. 我们将要使用一个含有 10 个节点（分别代表相应数字）的 softmax 层，作为我们 CNN 的最后一层。最后一层为一个全连接层，只是激活函数为 softmax。经过 softmax 的变换，数字就是具有最高概率的节点。
    ![avator](../resource/v2-e73a42ebce8cbf6af10aeaf309d4b116_720w.jpg)
2. 交叉熵损失函数：$H(p,q)=-\sum_xp(x)\ln(q(x))$

In [3]:
import numpy as np
 
class Softmax:
    # A standard fully-connected layer with softmax activation.
 
    def __init__(self, input_len, nodes):
        # We divide by input_len to reduce the variance of our initial values
        # input_len: 输入层的节点个数，池化层输出拉平之后的
        # nodes: 输出层的节点个数，本例中为 10
        # 构建权重矩阵，初始化随机数，不能太大
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)
 
    def forward(self, input):
        '''
        Performs a forward pass of the softmax layer using the given input.
        Returns a 1d numpy array containing the respective probability values.
        - input can be any array with any dimensions.
        '''
        # 3d to 1d，用来构建全连接网络
        input = input.flatten()
 
        input_len, nodes = self.weights.shape
 
        # input: 13x13x8 = 1352
        # self.weights: (1352, 10)
        # 以上叉乘之后为 向量，1352个节点与对应的权重相乘再加上bias得到输出的节点
        # totals: 向量, 10
        totals = np.dot(input, self.weights) + self.biases
        # exp: 向量, 10
        exp = np.exp(totals)
        return exp / np.sum(exp, axis=0)
    
    def backprop(self, d_L_d_out, learn_rate):
        '''
        Performs a backward pass of the softmax layer.
        Returns the loss gradient for this layer's inputs.
        - d_L_d_out is the loss gradient for this layer's outputs.
        - learn_rate is a float
        '''
        # We know only 1 element of d_L_d_out will be nonzero
        for i, gradient in enumerate(d_L_d_out):
            if gradient == 0:
                continue
 
            # e^totals
            t_exp = np.exp(self.last_totals)
 
            # Sum of all e^totals
            S = np.sum(t_exp)
 
            # Gradients of out[i] against totals
            d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
            d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)
 
            # Gradients of totals against weights/biases/input
            d_t_d_w = self.last_input
            d_t_d_b = 1
            d_t_d_inputs = self.weights
 
            # Gradients of loss against totals
            d_L_d_t = gradient * d_out_d_t
 
            # Gradients of loss against weights/biases/input
            d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
            d_L_d_b = d_L_d_t * d_t_d_b
            d_L_d_inputs = d_t_d_inputs @ d_L_d_t
 
            # NEW ADD
            # Update weights / biases
            self.weights -= learn_rate * d_L_d_w
            self.biases -= learn_rate * d_L_d_b
            # 将矩阵从 1d 转为 3d
            # 1352 to 13x13x8
            return d_L_d_inputs.reshape(self.last_input_shape)

In [1]:
import mnist
import numpy as np
 
# We only use the first 1k testing examples (out of 10k total)
# in the interest of time. Feel free to change this if you want.
test_images = mnist.test_images()[:1000]
test_labels = mnist.test_labels()[:1000]
 
conv = Conv3x3(8)                                    # 28x28x1 -> 26x26x8
pool = MaxPool2()                                    # 26x26x8 -> 13x13x8
softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10
 
def forward(image, label):
    '''
    Completes a forward pass of the CNN and calculates the accuracy and
    cross-entropy loss.
    - image is a 2d numpy array
    - label is a digit
    '''
    # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier
    # to work with. This is standard practice.
  
   # out 为卷基层的输出, 26x26x8
    out = conv.forward((image / 255) - 0.5)
    # out 为池化层的输出, 13x13x8
    out = pool.forward(out)
    # out 为 softmax 的输出, 10
    out = softmax.forward(out)
 
    # Calculate cross-entropy loss and accuracy. np.log() is the natural log.
    # 损失函数的计算只与 label 的数有关，相当于索引
    loss = -np.log(out[label])
    # 如果 softmax 输出的最大值就是 label 的值，表示正确，否则错误
    acc = 1 if np.argmax(out) == label else 0
 
    return out, loss, acc
 
print('MNIST CNN initialized!')
 
loss = 0
num_correct = 0
# enumerate 函数用来增加索引值
for i, (im, label) in enumerate(zip(test_images, test_labels)):
    # Do a forward pass.
    _, l, acc = forward(im, label)
    loss += l
    num_correct += acc
 
    # Print stats every 100 steps.
    if i % 100 == 99:
        print(
            '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
            (i + 1, loss / 100, num_correct)
        )
        loss = 0
        num_correct = 0

KeyboardInterrupt: 