# Chapter-7 Deploying ONNX Models on Edge Devices

#### In this notebook, we will learn about ONNX Runtime C++ APIs and see how ONNX Runtime can be used to deploy the models on the edge devices.

In [1]:
# Download ONNX Runtime Prebuilt binaries
!wget https://github.com/microsoft/onnxruntime/releases/download/v1.21.0/onnxruntime-linux-x64-1.21.0.tgz
!tar -xzvf onnxruntime-linux-x64-1.21.0.tgz

!mkdir deps/
!mkdir data/

%cd deps
# Download image processing helper libraries
!wget https://raw.githubusercontent.com/nothings/stb/master/stb_image.h -O stb_image.h
!wget https://raw.githubusercontent.com/nothings/stb/master/stb_image_resize2.h -O stb_image_resize2.h

%cd ../data
# Download ResNet50 model from Onnx model zoo
!wget https://github.com/onnx/models/raw/refs/heads/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx

# Download a sample image of cat
!wget https://huggingface.co/spaces/ClassCat/ViT-ImageNet-Classification/resolve/main/samples/cat.jpg

%cd ../

--2025-04-03 21:49:12--  https://github.com/microsoft/onnxruntime/releases/download/v1.21.0/onnxruntime-linux-x64-1.21.0.tgz
Resolving github.com (github.com)... 20.207.73.82
Connecting to github.com (github.com)|20.207.73.82|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/156939672/d9f524e2-f059-49ca-a237-afa886d0f73e?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250403%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250403T161810Z&X-Amz-Expires=300&X-Amz-Signature=7698ae6a2e028603cf97411d3373a98c34dfea39b914605a9022c51afee6a731&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Donnxruntime-linux-x64-1.21.0.tgz&response-content-type=application%2Foctet-stream [following]
--2025-04-03 21:49:12--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/156939672/d9f524e2-f059-49ca-a237-afa886d0f73e?X-Am

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]



2025-04-03 21:49:17 (5.04 MB/s) - ‘stb_image.h’ saved [283010/283010]

--2025-04-03 21:49:17--  https://raw.githubusercontent.com/nothings/stb/master/stb_image_resize2.h
185.199.110.133, 185.199.109.133, 185.199.111.133, ...tent.com)... 
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 452529 (442K) [text/plain]
Saving to: ‘stb_image_resize2.h’


2025-04-03 21:49:17 (5.65 MB/s) - ‘stb_image_resize2.h’ saved [452529/452529]

/mnt/d/Meet/Company/Orange Eva Publication/Jupyter Notebook/Ultimate-ONNX-for-Optimizing-Deep-Learning-Models/Chapter-7/data
--2025-04-03 21:49:17--  https://github.com/onnx/models/raw/refs/heads/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx
20.207.73.82thub.com (github.com)... 
connected. to github.com (github.com)|20.207.73.82|:443... 
HTTP request sent, awaiting response... 302 Found
Location: https://media.githubusercontent.com/med

In [2]:
cpp_content = """
#include <iostream>
#include <vector>
#include <onnxruntime_cxx_api.h>

#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_RESIZE2_IMPLEMENTATION
#include <stb_image.h>
#include <stb_image_resize2.h>

// ImageNet mean and std values
const float mean[] = {0.485f, 0.456f, 0.406f};
const float std_dev[] = {0.229f, 0.224f, 0.225f};

void preprocessImage(const std::string& imagePath, std::vector<float>& inputTensorValues) {
    int width, height, channels;
    unsigned char* img = stbi_load(imagePath.c_str(), &width, &height, &channels, 3); // Load as RGB

    if (!img) {
        throw std::runtime_error("Failed to load image: " + imagePath);
    }

    int target_width = 224, target_height = 224;
    std::vector<unsigned char> resized_img(target_width * target_height * 3);

    // Resize image to 224x224
    stbir_resize_uint8_linear(img, width, height, 0,
                       resized_img.data(), target_width, target_height, 0, STBIR_RGB);

    // Convert to normalized float format (CHW layout)
    for (int c = 0; c < 3; c++) {  // Loop over channels
        for (int h = 0; h < target_height; h++) {
            for (int w = 0; w < target_width; w++) {
                int idx = (h * target_width + w) * 3 + c;  // HWC index
                inputTensorValues[c * target_width * target_height + h * target_width + w] =
                    ((float)resized_img[idx] / 255.0f - mean[c]) / std_dev[c];  // Normalize & Standardize
            }
        }
    }

    stbi_image_free(img);
}

int main() {
    int model_height = 224;
    int model_width = 224;
    int num_channels = 3;
    std::string model_path = "./data/resnet50-v2-7.onnx";
    std::string img_path = "./data/cat.jpg";

    try {
        Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "ONNXRuntimeTest");
        Ort::SessionOptions session_options;

        // Load ResNet50 model
        Ort::Session session(env, model_path.c_str(), session_options);
        Ort::AllocatorWithDefaultOptions allocator;

        // Get input details
        auto input_name = session.GetInputNameAllocated(0, allocator);
        std::vector<int64_t> input_shape = {1, num_channels, model_height, model_width}; // Batch size 1
        
        // Preprocess image
        std::vector<float> input_tensor_values(num_channels * model_height * model_width, 0.0f);
        preprocessImage(img_path, input_tensor_values);
        
        // Create input tensor
        Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(),
                            input_tensor_values.size(), input_shape.data(), input_shape.size());
        
        // Run inference
        std::vector<const char*> input_names = {input_name.get()};
        auto output_name = session.GetOutputNameAllocated(0, allocator);
        std::vector<const char*> output_names = {output_name.get()};
        std::vector<Ort::Value> output_tensors = session.Run(Ort::RunOptions{nullptr}, input_names.data(),
                                                             &input_tensor, 1, output_names.data(), 1);

        // Print top-1 predicted class index
        float* output_data = output_tensors[0].GetTensorMutableData<float>();

        int predicted_class = std::max_element(output_data, output_data + 1000) - output_data;
        std::cout << "Predicted Class: " << predicted_class << std::endl;

    } catch (const Ort::Exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return -1;
    }

    return 0;
}
"""

with open("resnet_inference.cpp", "w") as f:
    f.writelines(cpp_content)

The above C++ code will load image of cat as shown below. 

![Image of Cat](https://huggingface.co/spaces/ClassCat/ViT-ImageNet-Classification/resolve/main/samples/cat.jpg)

After loading the image, it will be resized to 224x224 pixels and standardized according to ImageNet norms. Once the image is preprocessed, it will be passed to the ONNX Runtime session for prediction. The model is expected to classify it as class 282, which corresponds to "tiger cat," the closest match to a cat.

In [3]:
# Compile the cpp file
!g++ -std=c++17 -I./onnxruntime-linux-x64-1.21.0/include -I./deps/ -L./onnxruntime-linux-x64-1.21.0/lib -o resnet_inference resnet_inference.cpp -lonnxruntime 

In [4]:
# Add ONNX Runtime library path in LD_LIBRARY_PATH as we have dynamically linked this library during compilation and Run the inference
!LD_LIBRARY_PATH=./onnxruntime-linux-x64-1.21.0/lib:$LD_LIBRARY_PATH ./resnet_inference

Predicted Class: 282
