In [None]:
import sys
sys.path.append('./data')

import argparse
import os
import random
import shutil
import time
import warnings
import math
from matplotlib import pyplot as plt
from dataset import flowerDataset
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim as optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.autograd import Variable
print("Pytorch Version: ", torch.__version__)

In [None]:
def uniform_quantize(k):
  class qfn(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input):
      if k == 32:
        out = input
      elif k == 1:
        out = torch.sign(input)
      else:
        n = float(2 ** k  - 1)
        out = torch.round(input * n) / n
      return out

    @staticmethod
    def backward(ctx, grad_output):
      grad_input = grad_output.clone()
      return grad_input

  return qfn().apply


class weight_quantize_fn(nn.Module):
  def __init__(self, w_bit):
    super(weight_quantize_fn, self).__init__()
    assert w_bit <= 8 or w_bit == 32
    self.w_bit = w_bit
    self.uniform_q = uniform_quantize(k=w_bit - 1)

  def forward(self, x):
    # print('===================')
    if self.w_bit == 32:
      weight_q = x
    elif self.w_bit == 1:
      E = torch.mean(torch.abs(x)).detach()
      weight_q = (self.uniform_q(x / E) + 1) / 2 * E
    else:
      weight = torch.tanh(x)
      weight = weight / torch.max(torch.abs(weight))
      weight_q = self.uniform_q(weight)
    return weight_q


def conv2d_Q_fn(w_bit):
  class Conv2d_Q(nn.Conv2d):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1, bias=True):
      super(Conv2d_Q, self).__init__(in_channels, out_channels, kernel_size, stride,
                                     padding, dilation, groups, bias)
      self.w_bit = w_bit
      self.quantize_fn = weight_quantize_fn(w_bit=w_bit)

    def forward(self, input, order=None):
      weight_q = self.quantize_fn(self.weight)
      return F.conv2d(input, weight_q, self.bias, self.stride,
                      self.padding, self.dilation, self.groups)

  return Conv2d_Q


class activation_quantize_fn(nn.Module):
  def __init__(self, a_bit):
    super(activation_quantize_fn, self).__init__()
    assert a_bit <= 8 or a_bit == 32
    self.a_bit = a_bit
    self.uniform_q = uniform_quantize(k=a_bit)

  def forward(self, x):
    if self.a_bit == 32:
      activation_q = x
    else:
      activation_q = self.uniform_q(torch.clamp(x, 0, 1))
    return activation_q

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        W_BIT = 4
        A_BIT = 4

        self.layers = nn.Sequential(
            conv2d_Q_fn(W_BIT)(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            activation_quantize_fn(A_BIT),
            nn.MaxPool2d(2, stride=2),

            conv2d_Q_fn(W_BIT)(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            activation_quantize_fn(A_BIT),
            nn.MaxPool2d(2, stride=2),

            conv2d_Q_fn(W_BIT)(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            activation_quantize_fn(A_BIT),
            nn.MaxPool2d(2, stride=2),
            
            conv2d_Q_fn(W_BIT)(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            activation_quantize_fn(A_BIT),
            nn.MaxPool2d(2, stride=2),
            
            conv2d_Q_fn(W_BIT)(32, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            activation_quantize_fn(A_BIT),
            nn.MaxPool2d(2, stride=2),

        )
        self.conv1 = conv2d_Q_fn(8)(16*5*5, 16, kernel_size=1, stride=1, padding=0, bias=False)
        self.Outact = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.layers(x)
        x = x.permute(0, 2, 3, 1)
        x = x.reshape(-1, 16*5*5, 1, 1)
        x = self.conv1(x)
        x = x.squeeze(3).squeeze(2)
        x = self.Outact(x)
        
        return x

In [None]:
#宏定义一些数据，如epoch数，batchsize等
MAX_EPOCH=100
BATCH_SIZE=36
LR=0.0001
log_interval=6
val_interval=1

In [None]:
# ============================ step 1/5 数据 ============================
split_dir=os.path.join(".","data","splitData")
train_dir=os.path.join(split_dir,"train")
valid_dir=os.path.join(split_dir,"valid")

#对训练集所需要做的预处理
train_transform=transforms.Compose([
    transforms.Resize((160,160)),
    transforms.ToTensor(),
])

#对验证集所需要做的预处理
valid_transform=transforms.Compose([
    transforms.Resize((160,160)),
    transforms.ToTensor(),
])

# 构建MyDataset实例
train_data=flowerDataset(data_dir=train_dir,transform=train_transform)
valid_data=flowerDataset(data_dir=valid_dir,transform=valid_transform)

# 构建DataLoader
# 训练集数据最好打乱
# DataLoader的实质就是把数据集加上一个索引号，再返回
train_loader=DataLoader(dataset=train_data,batch_size=BATCH_SIZE,shuffle=True)
valid_loader=DataLoader(dataset=valid_data,batch_size=BATCH_SIZE)

In [None]:
# ============================ step 2/5 模型 ============================
net = ConvNet()
if torch.cuda.is_available():
    net.cuda()

In [None]:
def test():
    net=ConvNet()
    x=torch.randn(2,3,160,160)
    y=net(x)
    print(y.size())
    print(y)

test()

In [None]:
# ============================ step 3/5 损失函数 ============================
criterion=nn.CrossEntropyLoss()


In [None]:
# ============================ step 4/5 优化器 ============================
optimizer=optim.Adam(net.parameters(),lr=LR, betas=(0.9, 0.99))# 选择优化器

In [None]:
# ============================ step 5/5 训练 ============================
# 记录每一次的数据，方便绘图
train_curve=list()
valid_curve=list()
net.train()
accurancy_global=0.0

for epoch in range(MAX_EPOCH):
    loss_mean=0.
    correct=0.
    total=0.
    running_loss = 0.0

    for i,data in enumerate(train_loader):
        img,label=data
        img = Variable(img)
        label = Variable(label)
        if torch.cuda.is_available():
            img=img.cuda()
            label=label.cuda()
            
        # 前向传播
        out=net(img)
        optimizer.zero_grad()  # 归0梯度
        loss=criterion(out,label)#得到损失函数

        print_loss=loss.data.item()

        loss.backward()#反向传播
        optimizer.step()#优化
        if (i+1)%log_interval==0:
            print('epoch:{},loss:{:.4f}'.format(epoch+1,loss.data.item()))
        _, predicted = torch.max(out.data, 1)
        total += label.size(0)

        correct += (predicted == label).sum()
    print("============================================")
    accurancy=correct / total
    if accurancy>accurancy_global:
        torch.save(net.state_dict(), './weights/convnet_best.pt')
        print("准确率由：", accurancy_global, "上升至：", accurancy, "已更新并保存权值为weights/convnet_best.pt")
        accurancy_global=accurancy
    print('第%d个epoch的识别准确率为：%d%%' % (epoch + 1, 100*accurancy))

torch.save(net.state_dict(), './weights/convnet_last.pt')
print("训练完毕，权重已保存为：weights/convnet_last.pt")

In [None]:
net.train()
for epoch in range(MAX_EPOCH):
    loss_mean=0.
    correct=0.
    total=0.
    running_loss = 0.0

    for i,data in enumerate(train_loader):
        img,label=data
        img = Variable(img)
        label = Variable(label)
        if torch.cuda.is_available():
            img=img.cuda()
            label=label.cuda()
            
        # 前向传播
        out=net(img)
        optimizer.zero_grad()  # 归0梯度
        loss=criterion(out,label)#得到损失函数

        print_loss=loss.data.item()

        loss.backward()#反向传播
        optimizer.step()#优化
        if (i+1)%log_interval==0:
            print('epoch:{},loss:{:.4f}'.format(epoch+1,loss.data.item()))
        _, predicted = torch.max(out.data, 1)
        total += label.size(0)

        correct += (predicted == label).sum()
    print("============================================")
    accurancy=correct / total
    if accurancy>accurancy_global:
        torch.save(net.state_dict(), './weights/convnet_best.pt')
        print("准确率由：", accurancy_global, "上升至：", accurancy, "已更新并保存权值为weights/convnet_best.pt")
        accurancy_global=accurancy
    print('第%d个epoch的识别准确率为：%d%%' % (epoch + 1, 100*accurancy))

torch.save(net.state_dict(), './weights/convnet_last.pt')
print("训练完毕，权重已保存为：weights/convnet_last.pt")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# split_dir=os.path.join(".","data","splitData")
# test_dir=os.path.join(split_dir,"test")
test_dir = os.path.join(".", "data", "rawData")

test_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
])

test_data = flowerDataset(data_dir=test_dir, transform=test_transform)
test_dataLoader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
net.load_state_dict(torch.load("./weights/convnet_best.pt"))
net.eval()
correct = 0.
total=0.
with torch.no_grad():
    for i, data in enumerate(test_dataLoader):
        img, label = data
        img = Variable(img)
        label = Variable(label)
        img, label = img.to(device), label.to(device)
        print("label:", label)
        output = net(img)
        _, predicted = torch.max(output.data, 1)
        print("predict:", predicted)
        total += label.size(0)
        correct += (predicted == label).sum()

acc = correct/total * 100.
print("Total:", total)
print("Accuracy:{}%".format(acc))

In [None]:
from Quantization import quantization, qnn_param_reader, qnn_mem_process
import json

def generate_config(model, in_shape):
    feature_map_shape = in_shape
    print(in_shape)
    dic = {}
    conv_cnt = 0
    pool_cnt = 0
    linear_cnt = 0
    # cnt = 0
    for sub_module in model.modules():
        if type(sub_module).__base__ is torch.nn.Conv2d:
            conv_cur = {}
            conv_cur['in_shape'] = feature_map_shape[:]         
            feature_map_shape[0] = sub_module.out_channels
            feature_map_shape[1] = (feature_map_shape[1] + 2 * sub_module.padding[0] - sub_module.kernel_size[0]) // sub_module.stride[0] + 1
            feature_map_shape[2] = (feature_map_shape[2] + 2 * sub_module.padding[0] - sub_module.kernel_size[0]) // sub_module.stride[0] + 1
            conv_cur['out_shape'] = feature_map_shape[:]
            conv_cur['k'] = sub_module.kernel_size[0]
            conv_cur['s'] = sub_module.stride[0]
            conv_cur['p'] = sub_module.padding[0]
            
            dic['conv_' + str(conv_cnt)] = conv_cur
            
            conv_cnt = conv_cnt + 1
            # cnt = cnt + 1

        elif type(sub_module) is torch.nn.MaxPool2d:
            pool_cur = {}
            pool_cur['in_shape'] = feature_map_shape[:]
            pool_cur['p'] =  sub_module.kernel_size

            feature_map_shape[1] = feature_map_shape[1] // sub_module.kernel_size
            feature_map_shape[2] = feature_map_shape[2] // sub_module.kernel_size

            pool_cur['out_shape'] = feature_map_shape[:]

            dic['pool_' + str(pool_cnt)] = pool_cur

            pool_cnt = pool_cnt + 1
            # cnt = cnt + 1
        elif type(sub_module) is torch.nn.Linear:
            linear_cur = {}
            linear_cur['in_len'] = sub_module.in_features
            linear_cur['out_len'] = sub_module.out_features

            dic['linear_' + str(linear_cnt)] = linear_cur
            linear_cnt = linear_cnt + 1
            # cnt = cnt + 1
    
    return dic
    

def generate_params(model):
    dic = {}
    cnt = 0
    for sub_module in model.modules():
        if type(sub_module).__base__ is torch.nn.Conv2d:
            w = sub_module.weight.detach().numpy()
            dic['arr_' + str(cnt)] = w
            cnt = cnt + 1
            if sub_module.bias is not None:
                w = sub_module.bias.detach().numpy()
                dic['arr_' + str(cnt)] = w
                cnt = cnt + 1
        elif type(sub_module) is torch.nn.Linear:
            w = sub_module.weight.detach().numpy()
            dic['arr_' + str(cnt)] = w
            cnt = cnt + 1
        elif type(sub_module) is torch.nn.BatchNorm2d or type(sub_module) is torch.nn.BatchNorm1d:
            gamma = sub_module.weight.detach().numpy()
            dic['arr_' + str(cnt)] = gamma
            cnt = cnt + 1
            beta = sub_module.bias.detach().numpy()
            dic['arr_' + str(cnt)] = beta
            cnt = cnt + 1
            mean = sub_module.running_mean.numpy()
            dic['arr_' + str(cnt)] = mean
            cnt = cnt + 1
            var = sub_module.running_var.numpy()
            dic['arr_' + str(cnt)] = var
            cnt = cnt + 1
            eps = sub_module.eps
            dic['arr_' + str(cnt)] = eps
            cnt = cnt + 1
    return dic

In [None]:
model = ConvNet()
model.load_state_dict(torch.load('./weights/convnet_best.pt', map_location='cpu'))

dic = generate_params(model)
np.savez('ConvNet_4w4a.npz', **dic)

dic = generate_config(model, [3, 160, 160])
#indent参数根据数据格式缩进显示，读起来更加清晰, indent的值，代表缩进空格式
json_str = json.dumps(dic, indent=4)
with open('ConvNet_config.json', 'w') as json_file:
    json_file.write(json_str)

In [None]:
# conv       0      1   2       3   4   5  
w_bit   =   [4,     4,  4,      4,  4,   8]
in_bit  =   [8,     4,  4,      4,  4,   4]
out_bit =   [4,     4,  4,      4,  4,   32]
l_shift =   [6,     6,  6,      6,  6,   6]
simd    =   [3,     16,  16,    16, 8,   8]
pe      =   [16,    8,   8,     4,  4,   2]

In [None]:
from Quantization.qnn_param_reader import QNNParamReader
from Quantization.qnn_mem_process import QNNLayerMemProcess

target_dir_hls_param = 'param/hls/'
if not os.path.exists(target_dir_hls_param):
    os.makedirs(target_dir_hls_param)
    
hls_param_file = open(target_dir_hls_param + 'param.h', 'w')
hls_config_file = open(target_dir_hls_param + 'config.h', 'w')

config_file = open('ConvNet_config.json', 'r', encoding='utf-8')
config = json.load(config_file)
reader = QNNParamReader('ConvNet_4w4a.npz')

In [None]:
# conv_0 - 4
for i in range(5):
    processer = QNNLayerMemProcess('conv_' + str(i), reader, config, w_bit=w_bit[i], in_bit=in_bit[i], out_bit=out_bit[i], l_shift=l_shift[i], pe=pe[i], simd=simd[i])
    w, inc, bias = processer.conv()
    param_str = processer.layer_param_to_init_str(w, inc, bias)
    config_str = processer.conv_config_str()
    hls_param_file.write(param_str)
    hls_config_file.write(config_str)

processer = QNNLayerMemProcess('conv_' + str(5), reader, config, w_bit=w_bit[5], in_bit=in_bit[5],
                                out_bit=out_bit[5], l_shift=l_shift[5], pe=pe[5], simd=simd[5])
w = processer.last_conv()
param_str = processer.last_layer_param_to_init_str(w)
config_str = processer.last_conv_config_str()
hls_param_file.write(param_str)
hls_config_file.write(config_str)

hls_param_file.close()
hls_config_file.close()