# CNN的基本原理

* 卷积层
* 池化层
* 全接连层

* 通道
* 卷积核

手动实现卷积层numpy

In [5]:
# x: input
# w: kernel
# b: bias
# pad: padding
def conv_numpy(x, w, b, pad, strides):
    out = None
    
    N, H, W, C = x.shape # N:sampls, H: height, W:width, C: Channel
    F, HH, WW, C = w.shape # F: output channel, HH: height, WW: width, C: input channel
    
    X = np.pad(x, ((0, 0), (pad, pad), (pad, pad) (0, 0)), 'constant') #(00pppp00) -> NHWC
    
    Hn = 1 + int((H + 2 * pad - HH) / strides[0])
    Wn = 1 + int((W + 2 * pad - WW) / strides[1])
    
    out = np.zeros((N, Hn, Wn, F))
    
    for n in range(N):
        for m in range(F):
            for i in range(Hn):
                for j in range(Wn):
                    data = X[n, i * strides[0]:i * strides[0] + HH, j * strides[1]: j * strides[1] + WW, :].reshape(1, -1)
                    filt = w[m].reshape(-1, 1)
                    out[n, i, j, m] = data.dot(filt) + b[m]
    return out

In [28]:
def corr2d(x, w, b, pad, stride):
    N, H, W, C = tf.shape(x)
    F, HH, WW, C = tf.shape(w)
    
    x = tf.pad(x, ((0,0),(pad, pad),(pad, pad), (0,0)), 'constant')
    Hn = 1 + int((H + 2 * pad - HH) / stride[0])
    Wn = 1 + int((W + 2 * pad - WW) / stride[1])
    Y = tf.Variable(tf.zeros((N, Hn, Wn, F), dtype=tf.float32))
    
    for m in range(F):
        for i in range(Hn):
            for j in range(Wn):
                data = x[:, i * stride[0]:i * 1 + HH, j * stride[1]:j * 1 + WW, :]
                filt = w[m,:,:,:]
                Y[:, i, j, m].assign(tf.reduce_sum(tf.multiply(data, filt), axis=(1,2,3))+b[m])
    return Y

手动实现池化层

In [35]:
def max_pool_forward_naive(x, pool_size=(2,2), strides=(1,1)):
    
    N, H, W, C = x.shape
    h_p, w_p = pool_size
    h_s, w_s = strides
    
    Hn = 1 + int((H - h_p) / h_s)
    Wn = 1 + int((W - w_p) / w_s)
    out = np.zeros((N, Hn, Wn, C))
    for i in range(Hn):
        for j in range(Wn):
            out[:, i, j, :] = np.max(x[:, i*h_s:i*h_s+h_p, j*w_s:j*w_s+w_p,:], axis=(1,2))
    return out

array([[9]])

In [36]:
def pool2d(X, pool_size=(2,2), strides=(1,1)):
    N, H, W, C = x.shape
    h_p, w_p = pool_size
    h_s, w_s = strides
    Y = tf.zeros((N, (H - p_h + 1)//s_h, (W - p_w + 1)//s_w, C))
    Y = tf.Variable(Y)
    
    for i in tf.range(tf.shape(Y)[1]):
        for j in tf.range(tf.shape(Y)[2]):
            Y[:,i,j,:].assign(tf.math.reduce_max(X[:,i*s_h:i*s_h+p_h,j*s_w:j*s_w+p_w,:], axis=(1,2),keepdims=False))
    return Y

# 实战 Quick, Draw! Google涂鸦识别比赛

In [6]:
import tensorflow as tf
print(tf.__version__)

2.2.0


评估指标 Mean Average Precision

In [37]:
def apk(actual, predicted, k=10):
    if len(predicted)>k:
        predicted = predicted[:k]
        
    score = 0.0
    num_hits = 0.0
    
    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)
    
    if not actual:
        return 0.0
    return score / min(len(actual), k)

#### 数据shuffle
数据的读取方式
* 转成TFRecords：会变大，读取速度快
* TextLineDataset方式读取CSV文件：**需要对drawing数据进行编码，变成image像素数据**

In [None]:
s = Simplified('./data/')
NCSVS = 100
categories = s.list_all_categories()
print(len(categories))

for y, cat in tqdm(enumerate(categories)):
    df = s.read_training_csv(cat)
    df['y'] = y
    df['cv'] = (df.key_id // 10 ** 7) % NCSVS
    for k in range(NCSVSC):
        filename = './shuffle_data/train_k{}.csv'.format(k)
        chunk = df[df.cv == k]
        chunk = chunk.drop(['key_id'], axis=1)
        if y == 0:
            chunk.to_csv(filename, index=False)
        else:
            chunk.to_csv(filename, mode='a', header=False, index=False)

In [None]:
with tf.io.TFRecordWriter(tfrecord_file) as writer:
    for filename in fileList[:1]:
        df = pd.read_csv(filename)
        df['drawing'] = df['drawing'].apply(json.loads)
        for row in range(df.shape[0]):
            drawing = df.loc[row, 'drawing']
            img = draw_cv2(drawing, BASE_SIZE=128, size=128, lw=6)
            img = img.tostring()
            label = df.loc[row, 'y']
            feature = {
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=img)),
                'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
            }
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            writer.write(example.SerializeToString())

In [3]:
# testline
def draw_cs2(raw_strokes, size=64, lw=6):
    raw_strokes = eval(raw_strokes.numpy())
    img = np.zeros((256, 256), np.uint8)
    for stroke in raw_strokes:
        for i in range(len(stroke[0] - 1)):
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]), (stroke[0][i+1], stroke[1][i+1]), 255, lw)
    return cv2.resize(img, (size, size))

In [4]:
def tf_draw_cv2(image, label):
    [image] = tf.py_function(draw_cv2, [image], [tf.float32]) # 变成了tensor
    image = tf.reshape(image, (64,64,1))
    label = tf.one_hot(label, depth=NCATS)
    image.set_shape((64,64,1))
    label.set_shape((340,))
    return image, label

In [None]:
train_ds = tf.data.TextLineDataset(fileList[2], compression_type='GZIP').skip(1).map(parse_csv, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_ds = train_ds.map(tf_draw_cv2, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_ds = train_ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE).shuffle(3000).batch(1024)

**py_function**的作用：

这是因为目前tf.data.Dataset.map函数里头的计算是在计算图模式(Graph mode)下执行，所以里头的Tensors并不会有Eager Execution下才有的numpy属性。

解法是使用tf.py_function将我们定义的encode函数包成一个以eager模式执行的TensorFlow Operation

### from_generator方法

gen必须是一个可调用对象，返回支持iter()对象的协议

In [8]:
def gen():
    for i in itertools.count(1):
        yield (i, [1] * i)

In [9]:
class DataLoader(object):
    def __init__(self, resize_height=64, resize_width=64, batch_size=512, fileList=None, size=256, lw=6):
        self.resize_height = resize_height
        self.resize_width = resize_width
        self.batch_size = batch_size
        self.fileList = fileList
        self.size = size
        self.lw = lw
        
    def __call__(self):
        def _generator(size):
            while True:
                for filename in np.random.permutation(self.fileList):
                    df = pd.read_csv(filename)
                    df['drawing'] = df['drawing'].apply(json.loads)
                    x = np.zeros((len(df), size, size, 1)).astype(np.float32)
                    y = tf.keras.utils.to_categorical(df.y, num_classes=n_labels)
                    for x_i, y_i in zip(x, y):
                        yield (x_i, y_i)
                        
        dataset = tf.data.Dataset.from_generator(generator=_generator,
                                                output_types=(tf.dtypes.float32, tf.dtypes.int32),
                                                output_shapes=((self.resize_height, self.resize_height, 1), (340, )),
                                                args=(self.size, self.lw))
        dataset = dataset.prefetch(buffer_size=10240)
        dataset = dataset.shuffle(buffer_size=10240).batch(self.batch_size)
        return dataset

**MobileNetV2**

In [10]:
model = tf.keras.applications.mobilenet.MobileNet(input_shape=None, 
                                                  alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True,
                                                 weights='imagenet', input_tensor=None, pooling=None, classes=1000)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf.h5


Baseline构建
* 数据处理：shuffle
* 数据读取：from_generator
* 建模方法：MobileNetV2
* 参数调优：图片大小、batch_size等

In [12]:
import glob
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import cv2
import json
import matplotlib.pyplot as plt
os.environ['CUDA_VISIBLE_DEVICES']='1'

In [13]:
# 黑白图片

def draw_cs2(raw_strokes, size=256, lw=6, time_color=True):
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for t, stroke in enumerate(raw_strokes):
        for i in range(len(stroke[0]) - 1):
            color = 255 - min(t, 10) * 13 if time_color else 255
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]),
                        (stroke[0][i+1], stroke[1][i+1]), color, lw)
    if size != BASE_SIZE:
        return cv2.resize(img, (size, size))
    else:
        return img