# 基于CNN的MNIST手写数字识别
## 1. 加载Overlay

In [1]:
from pynq import Overlay
import numpy as np
from pynq import Xlnk
import struct
from scipy.misc import imread
import cv2

overlay = Overlay('./mnist_cnn.bit')
print("Overlay downloaded successfully!")



Overlay downloaded successfully!


## 2. 定义IP核驱动及其他功能函数

In [2]:
conv_ip = overlay.Conv_0
pool_ip = overlay.Pool_0
xlnk = Xlnk()
    
# 卷积IP核驱动函数    
def hwConv(conv, Kx, Ky, Sx, Sy, mode, relu_en, feat_in, W, bias, feat_out):
    conv.write(0x10, feat_in.shape[2])           # 输入特征图通道数
    conv.write(0x18, feat_in.shape[0])           # 输入特征图高度
    conv.write(0x20, feat_in.shape[1])           # 输入特征图宽度
    conv.write(0x28, feat_out.shape[2])          # 输出特征图通道数
    conv.write(0x30, Kx)                         # 卷积核高度
    conv.write(0x38, Ky)                         # 卷积核宽度
    conv.write(0x40, Sx)                         # 竖直方向上的卷积步长
    conv.write(0x48, Sy)                         # 水平方向上的卷积步长
    conv.write(0x50, mode)                       # 输入特征图是否需要padding
    conv.write(0x58, relu_en)                    # 是否使用ReLU激活函数
    conv.write(0x60, feat_in.physical_address)   # 输入特征图地址
    conv.write(0x68, W.physical_address)         # 卷积核地址
    conv.write(0x70, bias.physical_address)      # 偏置地址
    conv.write(0x78, feat_out.physical_address)  # 输出特征图地址
    conv.write(0, (conv.read(0) & 0x80) | 0x01)
    tp = conv.read(0)
    while not ((tp >> 1) & 0x1):
        tp = conv.read(0)
    
# 池化IP核驱动函数
def hwPool(pool, Kx, Ky, mode, feat_in, feat_out):
    pool.write(0x10, feat_in.shape[2])           # 输入特征图通道数
    pool.write(0x18, feat_in.shape[0])           # 输入特征图高度
    pool.write(0x20, feat_in.shape[1])           # 输入特征图宽度
    pool.write(0x28, Kx)                         # 池化核高度
    pool.write(0x30, Ky)                         # 池化核宽度
    pool.write(0x38, mode)                       # 选择池化类型（0:MEAN, 1:MIN, 2:MAX）
    pool.write(0x40, feat_in.physical_address)   # 输入特征图地址
    pool.write(0x48, feat_out.physical_address)  # 输出特征图地址
    pool.write(0, (pool.read(0) & 0x80) | 0x01)
    while not ((pool.read(0) >> 1) & 0x1):
        pass

def readbinfile(filename, size):
    f = open(filename, "rb")
    z = []
    for j in range(size):
        data = f.read(4)
        data_float = struct.unpack("f", data)[0]
        z.append(data_float)
    f.close()
    z = np.array(z, dtype = np.float32)
    return z

## 3. CNN各层输入输出的基本参数

In [3]:
##################################################
# Conv1
IN_CH1     = 1
IN_WIDTH1  = 28
IN_HEIGHT1 = 28

KERNEL_W1  = 3
STRIDE1    = 1
RELU_EN1   = 1

PADDING1   = int((KERNEL_W1 - 1)/2)

OUT_CH1     = 16
OUT_WIDTH1  = int((IN_WIDTH1  + 2*PADDING1 - KERNEL_W1)/STRIDE1 + 1) # 28
OUT_HEIGHT1 = int((IN_HEIGHT1 + 2*PADDING1 - KERNEL_W1)/STRIDE1 + 1) # 28

##################################################
# Pool1
MODE11      = 2  #mode: 0:MEAN, 1:MIN, 2:MAX
IN_CH11     = OUT_CH1      # 16
IN_WIDTH11  = OUT_WIDTH1   # 28
IN_HEIGHT11 = OUT_HEIGHT1  # 28

KERNEL_W11  = 2

OUT_CH11     = IN_CH11                       # 16
OUT_WIDTH11  = int(IN_WIDTH11 /KERNEL_W11)   # 14
OUT_HEIGHT11 = int(IN_HEIGHT11/KERNEL_W11)   # 14

##################################################
# Conv2
IN_CH2     = OUT_CH11          # 16
IN_WIDTH2  = OUT_WIDTH11       # 14
IN_HEIGHT2 = OUT_HEIGHT11      # 14

KERNEL_W2  = 3
STRIDE2    = 1
RELU_EN2   = 1

PADDING2 = int((KERNEL_W2 - 1)/2)

OUT_CH2     = 32
OUT_WIDTH2  = int((IN_WIDTH2  + 2*PADDING2 - KERNEL_W2)/STRIDE2 + 1) # 14
OUT_HEIGHT2 = int((IN_HEIGHT2 + 2*PADDING2 - KERNEL_W2)/STRIDE2 + 1) # 14

##################################################
# Pool2
MODE21      = 2  #mode: 0:MEAN, 1:MIN, 2:MAX
IN_CH21     = OUT_CH2       # 32
IN_WIDTH21  = OUT_WIDTH2    # 14
IN_HEIGHT21 = OUT_HEIGHT2   # 14

KERNEL_W21  = 2

OUT_CH21     = IN_CH21                      # 32
OUT_WIDTH21  = int(IN_WIDTH21 /KERNEL_W21)  # 7
OUT_HEIGHT21 = int(IN_HEIGHT21/KERNEL_W21)  # 7

##################################################
# FC1
IN_CH3     = OUT_CH21      # 32
IN_WIDTH3  = OUT_WIDTH21   # 7
IN_HEIGHT3 = OUT_HEIGHT21  # 7

KERNEL_W3  = 7
STRIDE3    = 1
RELU_EN3   = 1

OUT_CH3     = 128
OUT_WIDTH3  = int((IN_WIDTH3  - KERNEL_W3)/STRIDE3 + 1) # 1
OUT_HEIGHT3 = int((IN_HEIGHT3 - KERNEL_W3)/STRIDE3 + 1) # 1

##################################################
# FC2
IN_CH4     = OUT_CH3     # 128
IN_WIDTH4  = OUT_WIDTH3  # 1
IN_HEIGHT4 = OUT_HEIGHT3 # 1

KERNEL_W4  = 1
STRIDE4    = 1
RELU_EN4   = 1

OUT_CH4     = 10
OUT_WIDTH4  = int((IN_WIDTH4  - KERNEL_W4)/STRIDE4 + 1) # 1
OUT_HEIGHT4 = int((IN_HEIGHT4 - KERNEL_W4)/STRIDE4 + 1) # 1

## 4. 读取网络参数

In [4]:
# Input image
image = xlnk.cma_array(shape = (IN_HEIGHT1, IN_WIDTH1, IN_CH1), cacheable = 0, dtype = np.float32)

# Conv1
W_conv1 = xlnk.cma_array(shape = (KERNEL_W1, KERNEL_W1, IN_CH1, OUT_CH1), cacheable = 0, dtype = np.float32)
b_conv1 = xlnk.cma_array(shape = (OUT_CH1), cacheable = 0, dtype = np.float32)
h_conv1 = xlnk.cma_array(shape = (OUT_HEIGHT1, OUT_WIDTH1, OUT_CH1), cacheable = 0, dtype = np.float32)
h_pool1 = xlnk.cma_array(shape = (OUT_HEIGHT11, OUT_WIDTH11, OUT_CH11), cacheable = 0, dtype = np.float32)

# Conv2
W_conv2 = xlnk.cma_array(shape = (KERNEL_W2, KERNEL_W2, IN_CH2, OUT_CH2), cacheable = 0, dtype = np.float32)
b_conv2 = xlnk.cma_array(shape = (OUT_CH2), cacheable = 0, dtype = np.float32)
h_conv2 = xlnk.cma_array(shape = (OUT_HEIGHT2, OUT_WIDTH2, OUT_CH2), cacheable = 0, dtype = np.float32)
h_pool2 = xlnk.cma_array(shape = (OUT_HEIGHT21, OUT_WIDTH21, OUT_CH21), cacheable = 0, dtype = np.float32)

# FC1
W_fc1 = xlnk.cma_array(shape = (KERNEL_W3, KERNEL_W3, IN_CH3, OUT_CH3), cacheable = 0, dtype = np.float32)
b_fc1 = xlnk.cma_array(shape = (OUT_CH3), cacheable = 0, dtype = np.float32)
h_fc1 = xlnk.cma_array(shape = (OUT_HEIGHT3, OUT_WIDTH3, OUT_CH3), cacheable = 0, dtype = np.float32)

# FC2
W_fc2 = xlnk.cma_array(shape = (KERNEL_W4, KERNEL_W4, IN_CH4, OUT_CH4), cacheable = 0, dtype = np.float32)
b_fc2 = xlnk.cma_array(shape = (OUT_CH4), cacheable = 0, dtype = np.float32)
h_fc2 = xlnk.cma_array(shape = (OUT_HEIGHT4, OUT_WIDTH4, OUT_CH4), cacheable = 0, dtype = np.float32)

# Read weights and bias from pre-tranined file
print("Conv1:\tloading weight... ", end = "")

w_conv1 = readbinfile("./data/W_conv1.bin", KERNEL_W1*KERNEL_W1*IN_CH1*OUT_CH1)
w_conv1 = w_conv1.reshape((KERNEL_W1, KERNEL_W1, IN_CH1, OUT_CH1))
xlnk.cma_memcopy(W_conv1, w_conv1, KERNEL_W1*KERNEL_W1*IN_CH1*OUT_CH1*4)

print("done")
print("\tloading bias... ", end = "")
                
B_conv1 = readbinfile("./data/b_conv1.bin", OUT_CH1)
xlnk.cma_memcopy(b_conv1, B_conv1, OUT_CH1*4)

print("done")
print("Conv2:\tloading weight... ", end = "")

w_conv2 = readbinfile("./data/W_conv2.bin", KERNEL_W2*KERNEL_W2*IN_CH2*OUT_CH2)
w_conv2 = w_conv2.reshape((KERNEL_W2, KERNEL_W2, IN_CH2, OUT_CH2))
xlnk.cma_memcopy(W_conv2, w_conv2, KERNEL_W2*KERNEL_W2*IN_CH2*OUT_CH2*4)

print("done")
print("\tloading bias... ", end = "")

B_conv2 = readbinfile("./data/b_conv2.bin",OUT_CH2)
xlnk.cma_memcopy(b_conv2, B_conv2, OUT_CH1*4)

print("done")
print("FC1:\tloading weight... ", end = "")

w_fc1 = readbinfile("./data/W_fc1.bin", KERNEL_W3*KERNEL_W3*IN_CH3*OUT_CH3)
w_fc1 = w_fc1.reshape((KERNEL_W3, KERNEL_W3, IN_CH3, OUT_CH3))
xlnk.cma_memcopy(W_fc1, w_fc1, KERNEL_W3*KERNEL_W3*IN_CH3*OUT_CH3*4)

print("done")
print("\tloading bias... ", end = "")

B_fc1 = readbinfile("./data/b_fc1.bin", OUT_CH3)
xlnk.cma_memcopy(b_fc1, B_fc1, OUT_CH3*4)

print("done")
print("FC2:\tloading weight... ", end = "")

w_fc2 = readbinfile("./data/W_fc2.bin", KERNEL_W4*KERNEL_W4*IN_CH4*OUT_CH4)
w_fc2 = w_fc2.reshape((KERNEL_W4, KERNEL_W4, IN_CH4, OUT_CH4))
xlnk.cma_memcopy(W_fc2, w_fc2, KERNEL_W4*KERNEL_W4*IN_CH4*OUT_CH4*4)

print("done")
print("\tloading bias... ", end = "")

B_fc2 = readbinfile("./data/b_fc2.bin",OUT_CH4)
xlnk.cma_memcopy(b_fc2, B_fc2, OUT_CH4*4)

print("done")
print("CNN loaded successfully!")

Conv1:	loading weight... done
	loading bias... done
Conv2:	loading weight... done
	loading bias... done
FC1:	loading weight... done
	loading bias... done
FC2:	loading weight... done
	loading bias... done
CNN loaded successfully!


## 5. CNN硬件推导

In [5]:
image1 = cv2.imread("./data/1.jpg", cv2.IMREAD_GRAYSCALE).astype(np.float32)
for r in range(IN_HEIGHT1):
    for c in range(IN_WIDTH1):
        for ch in range(IN_CH1):
            image[r][c][ch] = (255 - image1[r][c])/255

print("Finish reading image.")

# Conv1
hwConv(conv_ip, KERNEL_W1, KERNEL_W1, STRIDE1, STRIDE1, 1, RELU_EN1, image, W_conv1, b_conv1, h_conv1)
hwPool(pool_ip, KERNEL_W11, KERNEL_W11, MODE11, h_conv1, h_pool1)
# Conv2
hwConv(conv_ip, KERNEL_W2, KERNEL_W2, STRIDE2, STRIDE2, 1, RELU_EN2, h_pool1, W_conv2, b_conv2, h_conv2)
hwPool(pool_ip, KERNEL_W21, KERNEL_W21, MODE21, h_conv2, h_pool2)
# FC1
hwConv(conv_ip, KERNEL_W3, KERNEL_W3, STRIDE3, STRIDE3, 0, RELU_EN3, h_pool2, W_fc1, b_fc1, h_fc1)
# FC2
hwConv(conv_ip, KERNEL_W4, KERNEL_W4, STRIDE4, STRIDE4, 0, RELU_EN4, h_fc1, W_fc2, b_fc2, h_fc2)

MAX = h_fc2[0][0][0]
result = 0
for ch in range(1, OUT_CH4):
    if (h_fc2[0][0][ch] > MAX):
        MAX = h_fc2[0][0][ch]
        result = ch

print("The image was recognized as " + str(result))

Finish reading image.
The image was recognized as 2
