# Chapter-8 End to End Execution of YOLOv12

#### In this notebook, we will use the concepts which we learned in the earlier chapters and try to play with YOLOv12 model in an end-to-end manner.

### Step-1 Exporting YOLOv12 to ONNX

In [1]:
# Clone YOLOv12 repository
!git clone https://github.com/sunsmarterjie/yolov12.git
%cd yolov12

fatal: destination path 'yolov12' already exists and is not an empty directory.
/mnt/d/Meet/Company/Orange Eva Publication/Jupyter Notebook/Ultimate-ONNX-for-Optimizing-Deep-Learning-Models/Chapter-8/yolov12


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
# Below are the minimal requirements as we don
req = """
--extra-index-url https://download.pytorch.org/whl/cpu
torch==2.2.2
torchvision==0.17.2
onnx==1.14.0
PyYAML==6.0.1
# scipy==1.13.0
onnxslim==0.1.31
onnxruntime==1.18.0
opencv-python==4.9.0.80
# psutil==5.9.8
# py-cpuinfo==9.0.0
huggingface-hub==0.23.2
safetensors==0.4.3
numpy==1.26.4
matplotlib"""

with open("requirements_minimul.txt", "w") as f:
    f.write(req)

In [3]:
!pip install -r requirements_minimul.txt
!pip install thop --no-deps

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu


In [4]:
# Run YOLOv12 model in Pytorch first.
from ultralytics import YOLO

model = YOLO('yolov12n.pt')
res = model.predict()

for i, r in enumerate(res):
    r.save(f"res_{i}.jpg")

FlashAttention is not available on this device. Using scaled_dot_product_attention instead.
Downloading https://github.com/sunsmarterjie/yolov12/releases/download/turbo/yolov12n.pt to 'yolov12n.pt'...


100%|██████████████████████████████████████████████████████████████████████████████| 5.26M/5.26M [00:00<00:00, 14.3MB/s]



image 1/2 /mnt/d/Meet/Company/Orange Eva Publication/Jupyter Notebook/Ultimate-ONNX-for-Optimizing-Deep-Learning-Models/Chapter-8/yolov12/ultralytics/assets/bus.jpg: 640x480 4 persons, 1 bus, 234.6ms
image 2/2 /mnt/d/Meet/Company/Orange Eva Publication/Jupyter Notebook/Ultimate-ONNX-for-Optimizing-Deep-Learning-Models/Chapter-8/yolov12/ultralytics/assets/zidane.jpg: 384x640 2 persons, 1 tie, 124.8ms
Speed: 6.6ms preprocess, 179.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)


In [5]:
from IPython.display import display, HTML

def display_side_by_side(img_path1, img_path2, caption1="Image 1", caption2="Image 2", 
                        width=512, title="Image Comparison"):
    """
    Display two images side by side with centered captions and a title.
    
    Parameters:
    - img_path1: Path to the first image
    - img_path2: Path to the second image
    - caption1: Caption for the first image
    - caption2: Caption for the second image
    - width: Display width for each image in pixels
    - title: Title for the entire comparison
    """
    html = f"""
    <div style="text-align: center; margin: 20px 0;">
        <h3>{title}</h3>
        <table style="margin: 0 auto; border-collapse: collapse;">
            <tr>
                <td style="padding: 10px; text-align: center; vertical-align: top;">
                    <div style="width: {width}px; margin: 0 auto;">
                        <img src="{img_path1}" alt="{caption1}" style="max-width: 100%; height: auto; display: block; margin: 0 auto;"/>
                        <p style="text-align: center; margin-top: 8px; font-weight: bold;">{caption1}</p>
                    </div>
                </td>
                <td style="padding: 10px; text-align: center; vertical-align: top;">
                    <div style="width: {width}px; margin: 0 auto;">
                        <img src="{img_path2}" alt="{caption2}" style="max-width: 100%; height: auto; display: block; margin: 0 auto;"/>
                        <p style="text-align: center; margin-top: 8px; font-weight: bold;">{caption2}</p>
                    </div>
                </td>
            </tr>
        </table>
    </div>
    """
    display(HTML(html))

In [6]:
display_side_by_side(
    "./yolov12/ultralytics/assets/zidane.jpg",
    "./yolov12/res_1.jpg",
    "Original Image",
    "Image with detections",
    title="Pytorch Outputs"
)

0,1
Original Image,Image with detections


In [7]:
# Export to ONNX
from ultralytics import YOLO

model = YOLO('yolov12n.pt')
model.export(format="onnx", device="cpu", simplify=True, nms=False)

Ultralytics 8.3.63 🚀 Python-3.10.12 torch-2.2.2+cpu CPU (AMD Ryzen 5 5600H with Radeon Graphics)
YOLOv12n summary (fused): 376 layers, 2,542,440 parameters, 0 gradients, 6.0 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov12n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.3 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.0 opset 17...
[34m[1mONNX:[0m slimming with onnxslim 0.1.31...
[34m[1mONNX:[0m export success ✅ 9.5s, saved as 'yolov12n.onnx' (10.0 MB)

Export complete (10.4s)
Results saved to [1m/mnt/d/Meet/Company/Orange Eva Publication/Jupyter Notebook/Ultimate-ONNX-for-Optimizing-Deep-Learning-Models/Chapter-8/yolov12[0m
Predict:         yolo predict task=detect model=yolov12n.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov12n.onnx imgsz=640 data=None  
Visualize:       https://netron.app


'yolov12n.onnx'

### Step-2 Execute FP32 model using ONNX Runtime

In [None]:
%cd ../

In [9]:
import cv2
from yolo_helper.inference import YOLOInference
from yolo_helper.visualization import draw_detections
from yolo_helper.postprocessing import get_class_names

# Configuration
MODEL_PATH = "yolov12/yolov12n.onnx"
IMAGE_PATH = "./yolov12/ultralytics/assets/zidane.jpg"
CLASS_NAMES = get_class_names()  # COCO class names
OUTPUT_PATH = "output_fp32.jpg"

# Initialize detector
detector = YOLOInference(MODEL_PATH)

# Run inference
result = detector(IMAGE_PATH, conf_thresh=0.25, iou_thresh=0.45)

if result is not None:
    image, detections = result
    
    # Draw and save results
    output_image = draw_detections(image, detections, CLASS_NAMES)
    cv2.imwrite(OUTPUT_PATH, output_image)
    print(f"Saved results to {OUTPUT_PATH}")
else:
    print("No detections found")

Inference time: 0.128s
Saved results to output_fp32.jpg


### Step-3 Apply Static Quantization to model 

In [10]:
# Download coco-val2017 dataset for calibration.

command = """
# Create datasets directory if it doesn't exist
mkdir -p ./datasets

# Download coco-val2017 dataset only if zip file doesn't exist
if [ ! -f ./datasets/val2017.zip ]; then
    wget http://images.cocodataset.org/zips/val2017.zip -O ./datasets/val2017.zip
else
    echo "val2017.zip already exists - skipping download"
fi

# Extract only if the directory doesn't exist
if [ ! -d ./datasets/val2017 ]; then
    unzip ./datasets/val2017.zip -d ./datasets/
else
    echo "val2017 directory already exists - skipping extraction"
fi
"""

with open("coco_val_2017_download.sh", "w") as f:
    f.writelines(command)
    
!./coco_val_2017_download.sh

val2017.zip already exists - skipping download
val2017 directory already exists - skipping extraction


In [11]:
import os
from yolo_helper.dataloader import COCODataset, YoloDataReader
from onnxruntime.quantization import quantize_static, quantize_dynamic, QuantType, CalibrationMethod
from onnxruntime.quantization.shape_inference import quant_pre_process

dataset = COCODataset("./datasets/val2017", sample=100)
data_reader = YoloDataReader(dataset)

fp32_path = "./yolov12/yolov12n.onnx"
fp32_path_preproc = "./yolov12/yolov12n_preproc.onnx"
int8_path_dynamic_quant = "./yolov12/yolov12n_int8_dynamic_quant.onnx"
int8_path_static_quant = "./yolov12/yolov12n_int8_static_quant.onnx"

# Firstly, apply shape inference and onnxruntime model optimization before quantizing the model.
quant_pre_process(fp32_path, fp32_path_preproc, skip_symbolic_shape=True)

# Apply dynamic quantization
quantized_model = quantize_dynamic(
    model_input=fp32_path_preproc,        # Input ONNX model
    model_output=int8_path_dynamic_quant,       # Output quantized model
    weight_type=QuantType.QInt8          # Quantize only weights to int8, activations will be quantize during runtime
)

print(f"Dynamic Quantized model saved at: {int8_path_dynamic_quant}")


# Apply static quantization
quantize_static(
    model_input=fp32_path_preproc,          # Input ONNX model
    model_output=int8_path_static_quant,    # Output quantized model
    calibration_data_reader=data_reader,
    weight_type=QuantType.QInt8,            # Quantize weights to int8
    activation_type=QuantType.QInt8,        # Quantize activations to int8
    calibrate_method=CalibrationMethod.MinMax,
    extra_options={"CalibStridedMinMax": 4}    # Process 4 images at a time for calibration
)

print(f"Static Quantized model saved at: {int8_path_static_quant}")

# Compare model sizes
fp32_size = os.path.getsize(fp32_path) / 1024 / 1024
dynamic_quant_size = os.path.getsize(int8_path_dynamic_quant) / 1024 / 1024
static_quant_size = os.path.getsize(int8_path_static_quant) / 1024 / 1024

print(f"FP32 Model Size: {fp32_size:.2f} MB")
print(f"Dynamic Quantized Model Size: {dynamic_quant_size:.2f} MB")
print(f"Static Quantized Model Size: {static_quant_size:.2f} MB")

Dynamic Quantized model saved at: ./yolov12/yolov12n_int8_dynamic_quant.onnx
Static Quantized model saved at: ./yolov12/yolov12n_int8_static_quant.onnx
FP32 Model Size: 10.02 MB
Dynamic Quantized Model Size: 2.96 MB
Static Quantized Model Size: 3.12 MB


In [12]:
# Run Static Quantized Model

# Configuration
MODEL_PATH = int8_path_static_quant
IMAGE_PATH = "./yolov12/ultralytics/assets/zidane.jpg"
CLASS_NAMES = get_class_names()  # COCO class names
OUTPUT_PATH = "./output_int8_static.jpg"

# Initialize detector
detector = YOLOInference(MODEL_PATH)

# Run inference with reduced conf_threshold.
result = detector(IMAGE_PATH, conf_thresh=0.05, iou_thresh=0.45)

if result is not None:
    image, detections = result
    
    # Draw and save results
    output_image = draw_detections(image, detections, CLASS_NAMES)
    cv2.imwrite(OUTPUT_PATH, output_image)
    print(f"Saved results to {OUTPUT_PATH}")
else:
    print("No detections found")

Inference time: 0.239s
No detections found


In [13]:
# Run Dynamic Quantized Model

# Configuration
MODEL_PATH = int8_path_dynamic_quant
IMAGE_PATH = "./yolov12/ultralytics/assets/zidane.jpg"
CLASS_NAMES = get_class_names()  # COCO class names
OUTPUT_PATH = "./output_int8_dynamic.jpg"

# Initialize detector
detector = YOLOInference(MODEL_PATH)

# Run inference
result = detector(IMAGE_PATH, conf_thresh=0.25, iou_thresh=0.45)

if result is not None:
    image, detections = result
    
    # Draw and save results
    output_image = draw_detections(image, detections, CLASS_NAMES)
    cv2.imwrite(OUTPUT_PATH, output_image)
    print(f"Saved results to {OUTPUT_PATH}")
else:
    print("No detections found")

NotImplemented: [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for ConvInteger(10) node with name '/model.0/conv/Conv_quant'

### Step-4 Debug Static Quantized model

In [17]:
int8_path_static_quant_w_concat_fp32 = "./yolov12/yolov12n_int8_static_quant_w_concat_fp32.onnx"

# Rewind the datareader so that it can be used again.
data_reader.rewind()

# Add last Concat node which concatenates boxes prediction and scores prediction.
# Boxes prediction is in [0-640] range, while scores prediction is in [0-1] range. 
# This makes it difficult to quantize. Hence, keeping this node in FP32 precision
# recovers the lost accuracy.
quantize_static(
    model_input=fp32_path_preproc,          # Input ONNX model
    model_output=int8_path_static_quant_w_concat_fp32,    # Output quantized model
    calibration_data_reader=data_reader,
    weight_type=QuantType.QInt8,            # Quantize weights to int8
    activation_type=QuantType.QInt8,        # Quantize activations to int8
    calibrate_method=CalibrationMethod.MinMax,
    nodes_to_exclude=["/model.21/Concat_5"],
    extra_options={"CalibStridedMinMax": 4}    # Process 4 images at a time for calibration
)

print(f"Updated Static Quantized model saved at: {int8_path_static_quant_w_concat_fp32}")

Updated Static Quantized model saved at: ./yolov12/yolov12n_int8_static_quant_w_concat_fp32.onnx


In [18]:
# Run Updated Static Quantized Model

# Configuration
MODEL_PATH = int8_path_static_quant_w_concat_fp32
IMAGE_PATH = "./yolov12/ultralytics/assets/zidane.jpg"
CLASS_NAMES = get_class_names()  # COCO class names
OUTPUT_PATH = "./output_int8_static_w_concat_fp32.jpg"

# Initialize detector
detector = YOLOInference(MODEL_PATH)

# Run inference with 0.25 conf_threshold
result = detector(IMAGE_PATH, conf_thresh=0.25, iou_thresh=0.45)

if result is not None:
    image, detections = result
    
    # Draw and save results
    output_image = draw_detections(image, detections, CLASS_NAMES)
    cv2.imwrite(OUTPUT_PATH, output_image)
    print(f"Saved results to {OUTPUT_PATH}")
else:
    print("No detections found")

Inference time: 0.382s
Saved results to ./output_int8_static_w_concat_fp32.jpg


In [19]:
# Example usage:
display_side_by_side(
    "./yolov12/ultralytics/assets/zidane.jpg",
    "./output_int8_static_w_concat_fp32.jpg",
    "Original Image",
    "Image with detections",
    title="ONNX Runtime Static Quantized Outputs"
)

0,1
Original Image,Image with detections
