# SPACEMIT ONNXRUNTIME 神经网络部署软件栈 WorkShop

<img src="compare.png" alt="compare" style="width:1280px;"/>

## Step1 准备模型（host端）

<img src="onnx.png" alt="onnx" style="width:1280px;"/>

### Pytorch官方预训练模型

In [1]:
from torchvision.io import read_image
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights

img = read_image("/onnx/tools/redwolf.jpg")

# Step 1: Initialize model with the best available weights
weights = MobileNet_V2_Weights.DEFAULT
model = mobilenet_v2(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(img).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
class_id = prediction.argmax().item()
score = prediction[class_id].item()
category_name = weights.meta["categories"][class_id]
print(f"{category_name}: {100 * score:.1f}%")

coyote: 23.9%




### Step1.1 转换pt格式到onnx格式

In [2]:
import torch
import torchvision

# step1 设置网络输入的形状
dummy_input = torch.randn(1, 3, 224, 224, device="cpu")
model = torchvision.models.mobilenet_v2(pretrained=True).cpu()
# step2 设置网络输入输出的name
input_names = [ "input" ]
output_names = [ "output" ]
# step3 导出onnx模型
torch.onnx.export(
    model,
    dummy_input,
    "/onnx/play_yard/mobilenet_v2.onnx",
    verbose=False,
    input_names=input_names,
    output_names=output_names,
    export_params=True)



verbose: False, log level: Level.ERROR



In [3]:
import IPython
import threading
import time
import os
#!pip install netron

def display_netron(path):
    os.system(f'netron {path}')

thread = threading.Thread(target=display_netron, args=("/onnx/play_yard/mobilenet_v2.onnx",))
thread.start()
time.sleep(1)

Serving '/onnx/play_yard/mobilenet_v2.onnx' at http://localhost:8080


In [4]:
display(IPython.display.IFrame(f"http://localhost:8080", width=1000, height=1000))

### Step1.2 量化ONNX模型

In [4]:
import sys
sys.path.append("/onnx/onnxruntime-inference-examples/quantization/image_classification/cpu/")

from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
import resnet50_data_reader

# step1 指定网络与calibration数据集、calibration数据预处理方式
input_model_path = "/onnx/play_yard/mobilenet_v2.onnx"
output_model_path = "/onnx/play_yard/mobilenet_v2_QDQ.onnx"
calibration_dataset_path = "/onnx/onnxruntime-inference-examples/quantization/image_classification/cpu/test_images"
dr = resnet50_data_reader.ResNet50DataReader(calibration_dataset_path, input_model_path)

# step2 设置参数并量化
quantize_static(
    input_model_path,
    output_model_path,
    dr,
    quant_format=QuantFormat.QDQ,
    per_channel=True,
    weight_type=QuantType.QInt8,
)
print("Calibrated and quantized model saved.")

# step3 检查量化精度损失(TODO:)



Calibrated and quantized model saved.


In [6]:
def display_netron(path):
    os.system(f'netron {path}')

thread = threading.Thread(target=display_netron, args=("/onnx/play_yard/mobilenet_v2_QDQ.onnx",))
thread.start()
time.sleep(1)

Serving '/onnx/play_yard/mobilenet_v2_QDQ.onnx' at http://localhost:8081


In [7]:
display(IPython.display.IFrame(f"http://localhost:8081", width=1000, height=1000))

## Step2 部署执行（device端）

<img src="mobile.png" alt="mobile" style="width:640px;"/>

### Step2.1 CPP代码编写

```
  //*************************************************************************
  // 设置log等级
  Ort::Env env(ORT_LOGGING_LEVEL_FATAL, "test");
  //*************************************************************************
  // 创建ort session
  Ort::SessionOptions session_options;
  session_options.AppendExecutionProvider("XNNPACK");
  session_options.SetIntraOpNumThreads(1);
  Ort::Session session(env, model_path, session_options);
  //*************************************************************************
  // 打印网络的输入信息 (node names, types, shape etc.)
  Ort::AllocatorWithDefaultOptions allocator;
  // print number of model input nodes
  const size_t num_input_nodes = session.GetInputCount();
  std::vector<Ort::AllocatedStringPtr> input_names_ptr;
  std::vector<const char*> input_node_names;
  input_names_ptr.reserve(num_input_nodes);
  input_node_names.reserve(num_input_nodes);
  std::vector<int64_t> input_node_dims;  // simplify... this model has only 1 input node {1, 3, 224, 224}.
                                         // Otherwise need vector<vector<>>

  std::cout << "Number of inputs = " << num_input_nodes << std::endl;

  // iterate over all input nodes
  for (size_t i = 0; i < num_input_nodes; i++) {
    // print input node names
    auto input_name = session.GetInputNameAllocated(i, allocator);
    std::cout << "Input " << i << " : name =" << input_name.get() << std::endl;
    input_node_names.push_back(input_name.get());
    input_names_ptr.push_back(std::move(input_name));

    // print input node types
    auto type_info = session.GetInputTypeInfo(i);
    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType type = tensor_info.GetElementType();
    std::cout << "Input " << i << " : type = " << type << std::endl;

    // print input shapes/dims
    input_node_dims = tensor_info.GetShape();
    std::cout << "Input " << i << " : num_dims = " << input_node_dims.size() << '\n';
    for (size_t j = 0; j < input_node_dims.size(); j++) {
      std::cout << "Input " << i << " : dim[" << j << "] =" << input_node_dims[j] << '\n';
    }
    std::cout << std::flush;
    input_node_dims[0] = 1;
  }

  constexpr size_t input_tensor_size = 224 * 224 * 3;  // simplify ... using known dim values to calculate size
                                                       // use OrtGetTensorShapeElementCount() to get official size!

  std::vector<float> input_tensor_values(input_tensor_size);
  //*************************************************************************
  // 获取并打印网络的输出 (node names, types, shape etc.)
  auto output_node_names_ptr = session.GetOutputNameAllocated(0, allocator);
  auto output_node_names = output_node_names_ptr.get();
  std::cout << "output_node_names: " << output_node_names <<std::endl;
  //*************************************************************************
  // 初始化网络输入的数据
  cnpy::NpyArray arr = cnpy::npy_load("image.npy");
  float* loaded_data = arr.data<float>();
  std::cout << arr.shape[0] << " x " << arr.shape[1] << " x " << arr.shape[2] << "\n";
  for (unsigned int i = 0; i < input_tensor_size; i++) input_tensor_values[i] = static_cast<float>(loaded_data[i]);
  // create input tensor object from data values
  auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
  auto input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size,
                                                            input_node_dims.data(), 4);
  assert(input_tensor.IsTensor());
  std::vector<Ort::Value> output_tensors;
  //*************************************************************************
  // 网络推理计算
    struct timeval start_time, stop_time;
    gettimeofday(&start_time, nullptr);
    // score model & input tensor, get back output tensor
    output_tensors =
        session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, &output_node_names, 1);
    assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
    gettimeofday(&stop_time, nullptr);
    double inference_time_ms = (get_us(stop_time) - get_us(start_time)) / 1000;
    std::cout << "Inference time for frame " << ": "
                  << inference_time_ms << " ms"
                  << " XNNPACKrun: " << " - " << "ms" << std::endl;
  //*************************************************************************
  // 获取网络输出
  float* floatarr = output_tensors.front().GetTensorMutableData<float>();
  std::string last_label = "None";
  int argmax = -1;
  //*************************************************************************
  // 获取最大置信度与对应的标签，打印分类结果
  auto it = std::max_element(floatarr, floatarr+1001);
  argmax = std::distance(floatarr, it);
  float prob_threshold = 0.2;
  if ((argmax < labels.size()) && (*it > prob_threshold)) {
    std::cout << "label: " << labels[argmax] << " with probability " << *it
              << std::endl;
    last_label = labels[argmax];
  }
  std::cout << std::flush;
```

### Step2.2 部署执行

In [5]:
# 重命名准备好的模型
!cp mobilenet_v2_QDQ.onnx model.onnx

# 拷贝预编译ONNXRUNTIME的动态库，并设置RPATH
!cp /onnx/onnxruntime/build/Linux/RelWithDebInfo/libonnxruntime.so.1.15.1 .
!patchelf --set-rpath /onnx/play_yard/ libonnxruntime.so.1.15.1
!patchelf --set-interpreter /onnx/play_yard/ld-linux-riscv64-lp64d.so.1 libonnxruntime.so.1.15.1

# 拷贝预编译的CPP Demo，并设置RPATH
!cp /onnx/onnxruntime-inference-examples/c_cxx/build/squeezenet/capi_test .
!patchelf --set-interpreter /onnx/play_yard/ld-linux-riscv64-lp64d.so.1 capi_test
!patchelf --set-rpath /onnx/play_yard/ capi_test

# qemu下仿真测试
!qemu-riscv64 capi_test


cannot find section .interp
Load lable done... 
open failed(-1)
tcm init err(-1)
tcm_malloc alloc failed(524288)
tcm alloc failed!
malloc successfully!(0x740f80)(524288)
Number of inputs = 1
Input 0 : name =input
Input 0 : type = 1
Input 0 : num_dims = 4
Input 0 : dim[0] =1
Input 0 : dim[1] =3
Input 0 : dim[2] =224
Input 0 : dim[3] =224
output_node_names: output
224 x 224 x 3
903168, 2816, 12544, 27, 32, 12544, 72, 32, 127872, 8, 1, 2816, 1, 1
401408, 768, 12544, 32, 16, 12544, 32, 16, 130048, 4, 1, 768, 1, 1
200704, 3072, 12544, 16, 96, 12544, 16, 96, 128000, 2, 1, 3072, 1, 1
301056, 2688, 3136, 96, 24, 3136, 96, 24, 127488, 3, 1, 2688, 1, 1
75264, 5760, 3136, 24, 144, 3136, 24, 144, 75264, 1, 1, 5760, 1, 1
451584, 3840, 3136, 144, 24, 3136, 144, 24, 126720, 4, 1, 3840, 1, 1
75264, 5760, 3136, 24, 144, 3136, 24, 144, 75264, 1, 1, 5760, 1, 1
112896, 5120, 784, 144, 32, 784, 144, 32, 112896, 1, 1, 5120, 1, 1
25088, 9216, 784, 32, 192, 784, 32, 192, 25088, 1, 1, 9216, 1, 1
150528, 6656, 