## 基于CANN构建VGG17

### 1 实验目标
- 基于CANN高性能算子库构建VGG17网络
- 加载提供的权重（实验四得到的VGG17权重），编译graph得到离线模型，并利用实验三的测试样例对构建的模型进行推理验证，得到推理结果。

### 2 依赖环境
- 操作系统：Ubuntu x86
- 编译器：g++
- 芯片：Ascend310
- python及依赖的库：python3.7.5
- 已完成昇腾AI软件栈在开发环境上的部署（CANN环境，需要完成驱动及CANN软件的安装）。

### 3 构建算子

#### Conv2D  

REG_OP(Conv2D) \
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) //定义数据输入 \
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) //定义卷积核输入 \
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) //可选输入，卷积核的偏置bias \
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) //可选输入，卷积核的offset_w， 仍在算子清单中保留 \
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) \
.REQUIRED_ATTR(strides, ListInt) //定义Conv2D的属性strides \
.REQUIRED_ATTR(pads, ListInt) //定义Conv2D的属性pads \
.ATTR(dilations, ListInt, {1, 1, 1, 1}) //定义Conv2D的属性dilations \
.ATTR(groups, Int, 1) //定义Conv2D属性group \
.ATTR(data_format, String, "NHWC") //定义Conv2D的数据输入格式 \
.ATTR(offset_x, Int, 0) //定义Conv2D的 \
.OP_END_FACTORY_REG(Conv2D) //结束算子注册

从Conv2D算子原型定义可以看到，Conv2D算子包括：
- 两个必选输入（INPUT）：x 和 filter
- 两个可选输入（OPTIONAL_INPUT）：bias 和 offset_w
- 两个必选属性（ATTR）：strides、pads
- 四个可选属性（REQUIRED_ATTR）：dilations、groups、data_format、offset_x


Operator GenConv2dOp(Shape weight_shape,string conv_name,Operator data){

    //构造权重算子的描述信息desc_weight
    TensorDesc desc_weight(weight_shape,  FORMAT_NCHW, DT_FLOAT);
    //构造tensor 
    Tensor weight_tensor(desc_weight);
    //计算出tensor需要的大小
    uint32_t weight_len = weight_shape.GetShapeSize() * sizeof(float);
    //从bin文件中加载数据，赋给tensor
    bool res = GetConstTensorFromBin(kPath+conv_name+".weight.bin", weight_tensor, weight_len);
    if (!res) {
        cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl;
    }
    //创建Const类型的权重算子，通过Const算子的属性value，传入tensor
    auto conv_weight = op::Const(conv_name+"_weight")
        .set_attr_value(weight_tensor);

    //创建卷积算子
    auto conv2d = op::Conv2D(conv_name)
        .set_input_x(data)    //定义输入，传入上一个算子
        .set_input_filter(conv_weight)    //定义卷积核，传入卷积核的权重
        .set_attr_strides({ 1, 1, 1, 1 })  //定义strides
        .set_attr_pads({ 1, 1, 1, 1 })   //定义pads
        .set_attr_dilations({ 1, 1, 1, 1 })  //定义dilations
        .set_attr_data_format("NCHW");  //定义输入数据的格式

    TensorDesc conv2d_input_desc_x(ge::Shape(), FORMAT_NCHW, DT_FLOAT);
    TensorDesc conv2d_input_desc_filter(ge::Shape(), FORMAT_NCHW, DT_FLOAT);
    TensorDesc conv2d_output_desc_y(ge::Shape(), FORMAT_NCHW, DT_FLOAT);
    conv2d.update_input_desc_x(conv2d_input_desc_x);     //更新卷积的输入信息
    conv2d.update_input_desc_filter(conv2d_input_desc_filter);  //更新卷积的filter信息
    conv2d.update_output_desc_y(conv2d_output_desc_y);    //更新卷积的输出信息

    return conv2d;
}


#### Data

REG_OP(Data) \
.INPUT(x, TensorType::ALL()) //定义输入信息，输入支持所有数据类型的输入 \
.OUTPUT(y, TensorType::ALL()) //定义输出信息，输出支持所有数据类型的输出 \
.ATTR(index, Int, 0) //定义算子属性 \
.OP_END_FACTORY_REG(Data) //结束算子注册

代码如下：

    auto shape_data = vector<int64_t>({1,3,224,224});//输入数据[N,C,W,H],推理时batchsize为1
    TensorDesc desc_data(ge::Shape(shape_data), FORMAT_NCHW, DT_FLOAT); //定义算子信息描述，shape传入desc_data

    // 实例化Data算子，名为data
    auto data = op::Data("data");
    data.update_input_desc_x(desc_data); //更新data1算子的输入数据信息描述，定义输入数据的shape，format和dtype
    data.update_output_desc_y(desc_data); //更新data1算子的输出数据信息描述，定义输入数据的shape，format和dtype



#### Const

REG_OP(Const) \
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) //定义算子的输出，支持如上类型 \
.ATTR(value, Tensor, Tensor()) //定义算子的属性，value表示常量算子对应的值 \
.OP_END_FACTORY_REG(Const) //结束算子定义

#### MaxPool

REG_OP(MaxPool) \
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8})) \
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8})) \
.REQUIRED_ATTR(ksize, ListInt) \
.REQUIRED_ATTR(strides, ListInt) \
.REQUIRED_ATTR(padding, String) \
.ATTR(data_format, String, "NHWC") \
.OP_END_FACTORY_REG(MaxPool)

代码如下：

Operator GenMaxpoolOp(string pool_name,Operator data){

    auto maxpool = op::MaxPoolV3(pool_name)
        .set_input_x(data)
        .set_attr_strides({1,1,2,2})  // 代表在四个维度（batch、 height,、width、channels）所移动的步长
        .set_attr_ksize({1,1,2,2}) //代表在四个维度（batch、 height,、width、channels）池化的尺寸，一般是[1, height, width, 1]
        .set_attr_pads({0,0,0,0})
        .set_attr_data_format("NCHW")
        .set_attr_padding_mode("CALCULATED")  //padding_mode默认CALCULATED，三种模式 "SAME" "VALID" or "CALCULATE"
        .set_attr_global_pooling(false)
        .set_attr_ceil_mode(false);  //是否在计算输出shape时，使用向上整取，默认false
    
    TensorDesc tensor_desc(ge::Shape(), FORMAT_NCHW, DT_FLOAT);
    maxpool.update_input_desc_x(tensor_desc);
    maxpool.update_output_desc_y(tensor_desc);   
    return maxpool;
}


### Relu


REG_OP(Relu) \
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT32, DT_INT16, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8})) \
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT32, DT_INT16, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8})) \
.OP_END_FACTORY_REG(Relu)

代码如下：

Operator GenReluOp(string relu_name,Operator data){

	// 因为relu算子接在bn算子后面，bn算子有多个输出，得指明是data为"y"的输出传入relu，防止因BN有多个输出造成图不明确
    auto relu = op::Relu(relu_name).set_input_x(data, "y");

    TensorDesc tensor_desc(ge::Shape(), FORMAT_ND, DT_FLOAT);
    relu.update_input_desc_x(tensor_desc);
    relu.update_output_desc_y(tensor_desc);
    return relu;
}

### BatchNorm

REG_OP(BatchNorm) \
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) \
.INPUT(scale, TensorType({DT_FLOAT})) \
.INPUT(offset, TensorType({DT_FLOAT})) \
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) \
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) \
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) \
.OUTPUT(batch_mean, TensorType({DT_FLOAT})) \
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) \
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) \
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) \
.OUTPUT(reserve_space_3, TensorType({DT_FLOAT})) \
.ATTR(epsilon, Float, 0.0001) .ATTR(data_format, String, "NHWC") .ATTR(is_training, Bool, true) \
.OP_END_FACTORY_REG(BatchNorm)

Operator GenBNOp(Shape weight_shape,string bn_name, Operator data){
    TensorDesc desc_weight_1(weight_shape, FORMAT_ND, DT_FLOAT);
    //定义BN算子的四个权重Const算子，分别对应为BN的offset，scale，mean和variance
    Tensor offset_weight_tensor(desc_weight_1);
    Tensor scale_weight_tensor(desc_weight_1);
    Tensor mean_weight_tensor(desc_weight_1);
    Tensor variance_weight_tensor(desc_weight_1);

    uint32_t weight_1_len = weight_shape.GetShapeSize() * sizeof(float);
    //从bin文件中加载BN的offset，offset对应权重文件中的beta，表示输入偏置项
    bool res = GetConstTensorFromBin(kPath+bn_name+".beta.bin", offset_weight_tensor, weight_1_len);
    if (!res) {
        cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl;
    }
    //从bin文件中加载BN的scale，scale对应权重文件中的gamma，表示输入Scalar
    res = GetConstTensorFromBin(kPath+bn_name+".gamma.bin", scale_weight_tensor, weight_1_len);
    if (!res) {
        cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl;
    }
     //从bin文件中加载BN的moving_mean，表输入的均值   
    res = GetConstTensorFromBin(kPath+bn_name+".moving_mean.bin", mean_weight_tensor, weight_1_len);
    if (!res) {
        cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl;
    }
    //从bin文件中加载BN的moving_variance，表输入的方差
    res = GetConstTensorFromBin(kPath+bn_name+".moving_variance.bin", variance_weight_tensor, weight_1_len);
    if (!res) {
        cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl;
    }   

    //构造对应的常量算子，用来定义权重
    auto bn_offset = op::Const(bn_name+"_beta")
        .set_attr_value(offset_weight_tensor);
    auto bn_scale = op::Const(bn_name+"_gamma")
        .set_attr_value(scale_weight_tensor);
    auto bn_mean = op::Const(bn_name+"_mean")
        .set_attr_value(mean_weight_tensor);
    auto bn_variance = op::Const(bn_name+"_variance")
        .set_attr_value(variance_weight_tensor);

     //构建bn算子
    auto batchnorm = op::BatchNorm(bn_name)
        .set_input_x(data)
        .set_input_offset(bn_offset)
        .set_input_scale(bn_scale)  //设置输入Scalar
        .set_input_mean(bn_mean)    //设置输入均值
        .set_input_variance(bn_variance)    //设置输入方差
        .set_attr_data_format("NCHW")    //设置输入数据的格式NCHW
        .set_attr_is_training(false);   //此时非训练状态，设置成false

    TensorDesc batchnorm_input_desc_x(ge::Shape(), FORMAT_NCHW, DT_FLOAT);
    TensorDesc batchnorm_output_desc_y(ge::Shape(), FORMAT_NCHW, DT_FLOAT);
    //更新BN的输入信息
    batchnorm.update_input_desc_x(batchnorm_input_desc_x);
    batchnorm.update_input_desc_scale(batchnorm_input_desc_x);
    batchnorm.update_input_desc_offset(batchnorm_input_desc_x);
    batchnorm.update_input_desc_mean(batchnorm_input_desc_x);
    batchnorm.update_input_desc_variance(batchnorm_input_desc_x);

    batchnorm.update_output_desc_y(batchnorm_output_desc_y);
    batchnorm.update_output_desc_batch_mean(batchnorm_output_desc_y);
    batchnorm.update_output_desc_batch_variance(batchnorm_output_desc_y);

    return batchnorm;
}

#### Flatten

REG_OP(Flatten) \
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT, DT_FLOAT16})) \
.OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT, DT_FLOAT16})) \
.ATTR(axis, Int, 1) \
.OP_END_FACTORY_REG(Flatten)

代码如下：

Operator GenFlattenOp(string flatten_name,Operator data){
    //构建Flatten算子
    auto flatten = op::FlattenV2(flatten_name).set_input_x(data);
    //更新算子输入输出信息
    TensorDesc tensor_desc(ge::Shape(), FORMAT_ND, DT_FLOAT);
    flatten.update_input_desc_x(tensor_desc);
    flatten.update_output_desc_y(tensor_desc); 

    return flatten;
}

#### Dense

REG_OP(MatMulV2) \
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) \
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) \
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) \
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) \
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) \
.ATTR(transpose_x1, Bool, false) \
.ATTR(transpose_x2, Bool, false) \
.ATTR(offset_x, Int, 0) \
.OP_END_FACTORY_REG(MatMulV2)

代码如下：

Operator GenDenseOp(uint32_t input_channel,uint32_t output_channel,string dense_name,Operator data){


    // 构造dense层的权重矩阵，权重来自bin文件
    auto matmul_weight_shape = ge::Shape({output_channel, input_channel});
    TensorDesc desc_matmul_weight(matmul_weight_shape, FORMAT_ND, DT_FLOAT);
    Tensor matmul_weight_tensor(desc_matmul_weight);
    uint32_t matmul_weight_len = matmul_weight_shape.GetShapeSize() * sizeof(float);
    bool res = GetConstTensorFromBin(kPath + dense_name+".weight.bin", matmul_weight_tensor, matmul_weight_len);
    if (!res) {
        cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl;
    }
    //构造matmul算子的权重常量算子
    auto matmul_weight = op::Const(dense_name+"_weight")
        .set_attr_value(matmul_weight_tensor);

    //构造偏重常量算子，读取偏置参数，作为OPTIONAL_INPUT的bias输入
    auto bias_add_shape = ge::Shape({ output_channel });
    TensorDesc desc_bias_add_const(bias_add_shape, FORMAT_ND, DT_FLOAT);
    Tensor bias_add_const_tensor(desc_bias_add_const);
    uint32_t bias_add_const_len = bias_add_shape.GetShapeSize() * sizeof(float);
    res = GetConstTensorFromBin(kPath + dense_name+".bias.bin", bias_add_const_tensor, bias_add_const_len);
    if (!res) {
        cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl;
    }

    auto bias_add_const = op::Const(dense_name+"_bias")
        .set_attr_value(bias_add_const_tensor);
    
    // 构造MatMulV2算子，三个输入，权重矩阵W，flatten后的输入数据X，偏置bias
    auto matmul = op::MatMulV2(dense_name+"_matmul")
        .set_input_x1(data)
        .set_input_x2(matmul_weight)
	.set_attr_transpose_x2(true)
        .set_input_bias(bias_add_const);

    // 更新算子描述信息
    TensorDesc tensor_desc_matmul(ge::Shape(), FORMAT_ND, DT_FLOAT);
    matmul.update_input_desc_x1(tensor_desc_matmul);
    matmul.update_input_desc_x2(tensor_desc_matmul);
    matmul.update_input_desc_bias(tensor_desc_matmul);
    matmul.update_output_desc_y(tensor_desc_matmul);

    return matmul;
}

#### Softmax

REG_OP(SoftmaxV2) \
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT})) \
.OUTPUT(y, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT})) \
.ATTR(axes, ListInt, {-1}) \
.OP_END_FACTORY_REG(SoftmaxV2)

代码如下：

Operator GenSoftmaxOp(string flatten_name, Operator data){

    auto softmax = op::SoftmaxV2(flatten_name).set_input_x(data);   //softmax默认axes为-1
    //auto softmax = op::Softmax(flatten_name).set_input_x(data);   //softmax默认axes为-1
    return softmax;
    
}

In [7]:
! scripts/testcase_300.sh

mkdir -p out
g++ .//src/main.cpp -I /usr/local/Ascend/ascend-toolkit/latest/opp/op_proto/built-in/inc -I /usr/local/Ascend/ascend-toolkit/latest/atc/include/graph -I /usr/local/Ascend/ascend-toolkit/latest/atc/include/ge -I /usr/local/Ascend/ascend-toolkit/latest/atc/include/parser -I /usr/local/Ascend/ascend-toolkit/latest/atc/include  -L /usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub -lgraph -lge_compiler -lfmk_parser  -std=c++11 -g -Wall -D_GLIBCXX_USE_CXX11_ABI=0 -o ./out/ir_build 
[01m[K.//src/main.cpp:[m[K In function ‘[01m[Kge::Operator GenConv2dOp(ge::Shape, std::string, ge::Operator)[m[K’:
     auto conv_weight = op::Const(conv_name+"_weight"[01;35m[K)[m[K
                                                     [01;35m[K^[m[K
In file included from [01m[K/usr/local/Ascend/ascend-toolkit/latest/opp/op_proto/built-in/inc/aipp.h:24:0[m[K,
                 from [01m[K/usr/local/Ascend/ascend-toolkit/latest/opp/op_proto/built-in/inc/all_ops.h:24[m[K,
   