# CNN的基本原理

* 卷积层
* 池化层
* 全接连层

* 通道
* 卷积核

手动实现卷积层numpy

In [5]:
# x: input
# w: kernel
# b: bias
# pad: padding
def conv_numpy(x, w, b, pad, strides):
    out = None
    
    N, H, W, C = x.shape # N:sampls, H: height, W:width, C: Channel
    F, HH, WW, C = w.shape # F: output channel, HH: height, WW: width, C: input channel
    
    X = np.pad(x, ((0, 0), (pad, pad), (pad, pad) (0, 0)), 'constant') #(00pppp00) -> NHWC
    
    Hn = 1 + int((H + 2 * pad - HH) / strides[0])
    Wn = 1 + int((W + 2 * pad - WW) / strides[1])
    
    out = np.zeros((N, Hn, Wn, F))
    
    for n in range(N):
        for m in range(F):
            for i in range(Hn):
                for j in range(Wn):
                    data = X[n, i * strides[0]:i * strides[0] + HH, j * strides[1]: j * strides[1] + WW, :].reshape(1, -1)
                    filt = w[m].reshape(-1, 1)
                    out[n, i, j, m] = data.dot(filt) + b[m]
    return out

In [28]:
def corr2d(x, w, b, pad, stride):
    N, H, W, C = tf.shape(x)
    F, HH, WW, C = tf.shape(w)
    
    x = tf.pad(x, ((0,0),(pad, pad),(pad, pad), (0,0)), 'constant')
    Hn = 1 + int((H + 2 * pad - HH) / stride[0])
    Wn = 1 + int((W + 2 * pad - WW) / stride[1])
    Y = tf.Variable(tf.zeros((N, Hn, Wn, F), dtype=tf.float32))
    
    for m in range(F):
        for i in range(Hn):
            for j in range(Wn):
                data = x[:, i * stride[0]:i * 1 + HH, j * stride[1]:j * 1 + WW, :]
                filt = w[m,:,:,:]
                Y[:, i, j, m].assign(tf.reduce_sum(tf.multiply(data, filt), axis=(1,2,3))+b[m])
    return Y

手动实现池化层

In [35]:
def max_pool_forward_naive(x, pool_size=(2,2), strides=(1,1)):
    
    N, H, W, C = x.shape
    h_p, w_p = pool_size
    h_s, w_s = strides
    
    Hn = 1 + int((H - h_p) / h_s)
    Wn = 1 + int((W - w_p) / w_s)
    out = np.zeros((N, Hn, Wn, C))
    for i in range(Hn):
        for j in range(Wn):
            out[:, i, j, :] = np.max(x[:, i*h_s:i*h_s+h_p, j*w_s:j*w_s+w_p,:], axis=(1,2))
    return out

array([[9]])

In [36]:
def pool2d(X, pool_size=(2,2), strides=(1,1)):
    N, H, W, C = x.shape
    h_p, w_p = pool_size
    h_s, w_s = strides
    Y = tf.zeros((N, (H - p_h + 1)//s_h, (W - p_w + 1)//s_w, C))
    Y = tf.Variable(Y)
    
    for i in tf.range(tf.shape(Y)[1]):
        for j in tf.range(tf.shape(Y)[2]):
            Y[:,i,j,:].assign(tf.math.reduce_max(X[:,i*s_h:i*s_h+p_h,j*s_w:j*s_w+p_w,:], axis=(1,2),keepdims=False))
    return Y

# 实战 Quick, Draw! Google涂鸦识别比赛

评估指标 Mean Average Precision

In [37]:
def apk(actual, predicted, k=10):
    if len(predicted)>k:
        predicted = predicted[:k]
        
    score = 0.0
    num_hits = 0.0
    
    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)
    
    if not actual:
        return 0.0
    return score / min(len(actual), k)