# 定义网络并读取

从事先训练好的网络中读取参数，并做量化操作。
其中的onet作为原来没有量化的网络，而net会被量化成8bit的网络。

In [11]:
import torch 
import numpy as np
from torch.utils.data import DataLoader
from torchvision.datasets import mnist
from torch import nn
from torch.autograd import Variable
from torch import  optim
from torchvision import transforms
from torch.quantization import QuantStub, DeQuantStub,QConfig
import os
import time
import sys
import torch.quantization
import time

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.quant = QuantStub()
        self.dequant = DeQuantStub()
        self.layer1 = nn.Sequential(
                nn.Conv2d(1,16,kernel_size=3), # 16, 26 ,26
                #nn.BatchNorm2d(16),
                nn.ReLU(inplace=True))
        
        self.layer2 = nn.Sequential(
                nn.Conv2d(16,32,kernel_size=3),# 32, 24, 24
                #nn.BatchNorm2d(16),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2,stride=2)) # 32,12,12
        
        self.layer3 = nn.Sequential(
                nn.Conv2d(32,64,kernel_size=3), # 64,10,10
                #nn.BatchNorm2d(16),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2,stride=2))  # 64, 5,5

        self.layer4 = nn.Sequential(
                nn.Conv2d(64,64,kernel_size=3), # 64,3,3
                #nn.BatchNorm2d(16),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3,stride=3))  # 64,1,1

        self.fc = nn.Sequential(
                nn.Linear(64,32),
                nn.ReLU(inplace=True),
                nn.Linear(32,10))
        
    def forward(self,x):
        in_size = x.size(0)
        x = self.quant(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.view(in_size, -1)
        x = self.fc(x)
        x = self.dequant(x)

        return x

    
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

# 预处理=>将各种预处理组合在一起
data_tf = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize([0.5],[0.5])])
 
train_set = mnist.MNIST('./data',train=True,transform=data_tf,download=True)
test_set = mnist.MNIST('./data',train=False,transform=data_tf,download=True)
 
train_data = DataLoader(train_set,batch_size=1,shuffle=True)
test_data = DataLoader(test_set,batch_size=1,shuffle=False)

onet = CNN()
net = CNN()
net.load_state_dict(torch.load("./CNNnet.pt"))
onet.load_state_dict(torch.load("./CNNnet.pt"))
print("net")

my_qconfig = QConfig(activation= torch.quantization.default_observer.with_args(dtype=torch.quint8), weight=torch.quantization.default_observer.with_args(dtype=torch.qint8))
net.qconfig = torch.quantization.default_qconfig

torch.quantization.prepare(net, inplace=True)
torch.quantization.convert(net, inplace=True)

print_size_of_model(net)
print_size_of_model(onet)

net
Size (MB): 0.071677
Size (MB): 0.254867


## 观察网络结构
查看网络中各层的参数列表可以看到网络的组成，我们将量化后的参数提取出来。

In [12]:
print("=====quant NN composition=====")
for name in net.state_dict():
    print(name)

print("\n=====NN composition=====")
for name in onet.state_dict():
    print(name)
    
qweight1 = net.state_dict()['layer1.0.weight']
oweight1 = onet.state_dict()['layer1.0.weight']

qweight2 = net.state_dict()['layer2.0.weight']
oweight2 = onet.state_dict()['layer2.0.weight']

qweight3 = net.state_dict()['layer3.0.weight']
oweight3 = onet.state_dict()['layer3.0.weight']

qweight4 = net.state_dict()['layer4.0.weight']
oweight4 = onet.state_dict()['layer4.0.weight']

qbias1 = net.state_dict()['layer1.0.bias']
obias1 = onet.state_dict()['layer1.0.bias']

qbias2 = net.state_dict()['layer2.0.bias']
obias2 = onet.state_dict()['layer2.0.bias']

qbias3 = net.state_dict()['layer3.0.bias']
obias3 = onet.state_dict()['layer3.0.bias']

qbias4 = net.state_dict()['layer4.0.bias']
obias4 = onet.state_dict()['layer4.0.bias']

qfcweight1 = net.state_dict()['fc.0._packed_params._packed_params'][0]
ofcweight1 = onet.state_dict()['fc.0.weight']

qfcweight2 = net.state_dict()['fc.2._packed_params._packed_params'][0]
ofcweight2 = onet.state_dict()['fc.2.weight']

qfcbias1 = net.state_dict()['fc.0._packed_params._packed_params'][1]
ofcbias1 = onet.state_dict()['fc.0.bias']

qfcbias2 = net.state_dict()['fc.0._packed_params._packed_params'][1]
ofcbias2 = onet.state_dict()['fc.2.bias']

=====quant NN composition=====
quant.scale
quant.zero_point
layer1.0.weight
layer1.0.bias
layer1.0.scale
layer1.0.zero_point
layer2.0.weight
layer2.0.bias
layer2.0.scale
layer2.0.zero_point
layer3.0.weight
layer3.0.bias
layer3.0.scale
layer3.0.zero_point
layer4.0.weight
layer4.0.bias
layer4.0.scale
layer4.0.zero_point
fc.0.scale
fc.0.zero_point
fc.0._packed_params.dtype
fc.0._packed_params._packed_params
fc.2.scale
fc.2.zero_point
fc.2._packed_params.dtype
fc.2._packed_params._packed_params

=====NN composition=====
layer1.0.weight
layer1.0.bias
layer2.0.weight
layer2.0.bias
layer3.0.weight
layer3.0.bias
layer4.0.weight
layer4.0.bias
fc.0.weight
fc.0.bias
fc.2.weight
fc.2.bias


## 提取量化信息
我们将网络中的每一层的zero_point和scale值取出来，并向下取整到2的幂次。进行重新量化

In [13]:
print("qweight1",qweight1.q_zero_point(),1/qweight1.q_scale())
print("qweight2",qweight2.q_zero_point(),1/qweight2.q_scale())
print("qweight3",qweight3.q_zero_point(),1/qweight3.q_scale())
print("qweight4",qweight4.q_zero_point(),1/qweight4.q_scale())
print("qfcweight1",qfcweight1.q_zero_point(),1/qfcweight1.q_scale())
print("qfcbias1",qfcweight2.q_zero_point(),1/qfcweight2.q_scale())

qweight1 0 193.4951654222813
qweight2 0 455.6926789416391
qweight3 0 567.5108962261097
qweight4 0 484.9417163364051
qfcweight1 0 296.82317966874973
qfcbias1 0 183.76869356878268


In [14]:
oweight1 = oweight1*128
obias1 = obias1*128

oweight2 = oweight2*256
obias2 = obias2*256

oweight3 = oweight3*512
obias3 = obias3*512

oweight4 = oweight4*256
obias4 = obias4*256

ofcweight1 = ofcweight1*256
ofcbias1 = ofcbias1*256

ofcweight2 = ofcweight2*128
ofcbias2 = ofcbias2*128

oweight1 = torch.round(oweight1)
oweight2 = torch.round(oweight2)
oweight3 = torch.round(oweight3)
oweight4 = torch.round(oweight4)
obias1 = torch.round(obias1)
obias2 = torch.round(obias2)
obias3 = torch.round(obias3)
obias4 = torch.round(obias4)

ofcweight1 = torch.round(ofcweight1)
ofcweight2 = torch.round(ofcweight2)
ofcbias1 = torch.round(ofcbias1)
ofcbias2 = torch.round(ofcbias2)


oweight1 = oweight1.to(torch.int16)
oweight2 = oweight2.to(torch.int16)
oweight3 = oweight3.to(torch.int16)
oweight4 = oweight4.to(torch.int16)
obias1 = obias1.to(torch.int16)
obias2 = obias2.to(torch.int16)
obias3 = obias3.to(torch.int16)
obias4 = obias4.to(torch.int16)

ofcweight1 = ofcweight1.to(torch.int16)
ofcweight2 = ofcweight2.to(torch.int16)
ofcbias1 = ofcbias1.to(torch.int16)
ofcbias2 = ofcbias2.to(torch.int16)


In [15]:
i = 0;
for img , label in train_data:
    if (i < 1):
        #print(img)
        #img = img.reshape(img.size(0),-1)
        img = Variable(img)
        label = Variable(label)
        img = img.view(1,28,28)
        for m in range(28):
            for n in range(28):
                if(img[0][m][n].item() < 0.5):
                    img[0][m][n] = 0
                else:
                    img[0][m][n] = 1
        i = i+1
#         print(img)
    else:
        break


## 对比量化后的网络和之前的网络的精度对比
将测试集输入给被8bit量化的网络中运行。

In [71]:
#quantized net

i = 0
j = 0
k = 0
for img , label in test_data:
        #print(img)
        #img = img.reshape(img.size(0),-1)
        img = Variable(img)
        label = Variable(label)
        start =  time.time()
        out = net(img)
        out1 = onet(img)

        _ , pred = out.max(1)
        _ , pred1 = out1.max(1)
        if(pred1 == label):
            k = k + 1
        if(pred == label):
            j = j + 1
        i = i + 1
print("k ",k," j ",j," i ",i)

k  9889  j  8447  i  10000


## 保存权重信息

In [16]:

def save_4D(FileName,P_tensor):
    file = open(FileName,'w',encoding = 'utf-8')
    print(P_tensor.size)
    for i in range(P_tensor.size(0)):
        file.write('{')
        for j in range(P_tensor.size(1)):
            file.write('{')
            for k in range(P_tensor.size(2)):
                file.write('{')
                for l in range(P_tensor.size(3)):
                    file.write(str(P_tensor[i][j][k][l].item()))
                    if(l != P_tensor.size(3)-1):
                        file.write(',')
                file.write('}')
                if(k != P_tensor.size(2)-1):
                    file.write(',')
            file.write('}')
            if(j != P_tensor.size(1)-1):
                file.write(',')
        file.write('}')
        if(i != P_tensor.size(0)-1):
            file.write(',\n')
    file.write('}')
    file.close()
    
def save_1D(FileName,P_tensor):
    file = open(FileName,'w',encoding = 'utf-8')

    file.write('{')
    for i in range(P_tensor.size(0)):
        file.write(str(P_tensor[i].item()))
        if(i != P_tensor.size(0)-1):
            file.write(',')
    file.write('}')
    file.close()
    
def save_2D(FileName,P_tensor):
    file = open(FileName,'w',encoding = 'utf-8')

    print(ofcweight1.size())
    file.write('{')
    for i in range(P_tensor.size(0)):
        file.write('{')
        for j in range(P_tensor.size(1)):
            file.write(str(P_tensor[i][j].item()))
            if(j != P_tensor.size(1)-1):
                file.write(',')
        file.write('}')
        if (i != P_tensor.size(0)-1):
            file.write(',\n')
    file.write('}')
    file.close()
    

def save_3D(FileName,P_tensor):
    file = open(FileName,'w',encoding = 'utf-8')
    file.write('{')
    for i in range(P_tensor.size(1)):
        file.write('{')
        for j in range(P_tensor.size(2)):
            file.write('{')
            for k in range(P_tensor.size(0)):
                file.write(str(P_tensor[k][i][j].item()))
                if(k != P_tensor.size(0)-1):
                    file.write(',')
            file.write('}')
            if(j != P_tensor.size(2)-1):
                file.write(',\n')
        file.write('}')
        if(i != P_tensor.size(1)-1):
            file.write(',\n')
    file.write('}')
    file.close()

In [17]:
np.save("./fpga/oweight1",  oweight1)
np.save("./fpga/oweight2",  oweight2)
np.save("./fpga/oweight3",  oweight3)
np.save("./fpga/oweight4",  oweight4)
np.save("./fpga/obias1",  obias1)
np.save("./fpga/obias2",  obias2)
np.save("./fpga/obias3",  obias3)
np.save("./fpga/obias4",  obias4)
np.save("./fpga/ofcweight1",  ofcweight1)
np.save("./fpga/ofcweight2",  ofcweight2)
np.save("./fpga/ofcbias1",  ofcbias1)
np.save("./fpga/ofcbias2",  ofcbias2)
save_4D('Cw1.txt',oweight1)
save_4D('Cw2.txt',oweight2)
save_4D('Cw3.txt',oweight3)
save_4D('Cw4.txt',oweight4)
save_1D('Cb1.txt',obias1)
save_1D('Cb2.txt',obias2)
save_1D('Cb3.txt',obias3)
save_1D('Cb4.txt',obias4)
save_1D('Cfcb1.txt',ofcbias1)
save_1D('Cfcb2.txt',ofcbias2)
save_2D('Cfcw1.txt',ofcweight1)
save_2D('Cfcw2.txt',ofcweight2)

<built-in method size of Tensor object at 0x7ff19c78ee10>
<built-in method size of Tensor object at 0x7ff19c78e6c0>
<built-in method size of Tensor object at 0x7ff19c78eab0>
<built-in method size of Tensor object at 0x7ff19c78e7e0>
torch.Size([32, 64])
torch.Size([32, 64])


## 测试数据到片上
将MNIST数据转为整数形式存放，之后可以放置到PYNQ板子上进行测试。

In [19]:
x = np.empty([10000,28,28], dtype = int) 
y = np.empty([10000,1], dtype = int) 
i = 0
for img , label in test_data:
        img = Variable(img)
        label = Variable(label)
        img = torch.round(img)+1
        x[i] = img
        y[i] = label
        i = i+1
np.save("img.npy",x)
np.save("label.npy",y)