# Installation

In [None]:
!pip install ultralytics
!pip install onnxconverter-common

# Model Export

In [None]:
import numpy as np
import cv2
import urllib.request
from PIL import Image
import torch
import torchvision.transforms as transforms
from ultralytics import YOLO
import torchvision

# Load the YOLO model
model = YOLO('/content/yolov8n.pt')

In [None]:
from ultralytics import YOLO

# Load a YOLOv8 model
model = YOLO("yolov8n.pt")

# Export the model
model.export(format="onnx", opset=17, simplify=True, dynamic=False, imgsz=640)

In [None]:
# !yolo export model=yolov8n.pt imgsz=640 format=openvino


Preprocess the model before quantization

In [None]:
!python -m onnxruntime.quantization.preprocess --input yolov8n.onnx --output yolov8n_infer.onnx


# Static Quantization - QOperator

In [None]:
!git clone https://github.com/EliSchwartz/imagenet-sample-images.git

In [None]:
!rm -rf /content/imagenet-sample-images/.git

In [None]:
import numpy as np
from onnxruntime.quantization import CalibrationDataReader, quantize_static, QuantType, QuantFormat
from PIL import Image
import os
import random

class ImageCalibrationReader(CalibrationDataReader):
    def __init__(self, image_dir, num_samples=100, input_name="images"):
        # Get all image files
        self.image_files = [
            os.path.join(image_dir, f) for f in os.listdir(image_dir)
            if f.lower().endswith(('.png', '.jpg', '.jpeg'))
        ]
        # Randomly sample images
        if len(self.image_files) > num_samples:
            self.image_files = random.sample(self.image_files, num_samples)

        self.idx = 0
        self.input_name = input_name

    def preprocess(self, image_path):
        # Open and resize image
        image = Image.open(image_path).convert('RGB')
        image = image.resize((640, 640), Image.Resampling.BILINEAR)

        # Convert to numpy and normalize
        image_np = np.array(image).astype(np.float32) / 255.0

        # HWC to CHW format
        image_np = np.transpose(image_np, (2, 0, 1))

        # Add batch dimension
        image_np = np.expand_dims(image_np, axis=0)
        return image_np

    def get_next(self):
        if self.idx >= len(self.image_files):
            return None

        try:
            input_data = self.preprocess(self.image_files[self.idx])
            self.idx += 1
            return {self.input_name: input_data}
        except Exception as e:
            print(f"Error processing image {self.image_files[self.idx]}: {str(e)}")
            self.idx += 1
            return self.get_next()

In [None]:
calibration_data_reader = ImageCalibrationReader(
    image_dir="/content/imagenet-sample-images",  # Directory containing your images
    num_samples=100  # Number of images to use for calibration
)

For more accurate results, keep the operation in the postprocessing subgraph in floating point precision, using the nodes_to_exlude parameter. You can visualize the model graph using https://netron.app for finding the names of nodes in in the postprocessing subgraph



In [None]:
# Nodes to exclude
nodes_to_exclude = [
    '/model.22/Concat_3', '/model.22/Split', '/model.22/Sigmoid',
    '/model.22/dfl/Reshape', '/model.22/dfl/Transpose', '/model.22/dfl/Softmax',
    '/model.22/dfl/conv/Conv', '/model.22/dfl/Reshape_1', '/model.22/Slice_1',
    '/model.22/Slice', '/model.22/Add_1','/model.22/Add_2',  '/model.22/Sub', '/model.22/Div_1',
    '/model.22/Concat_4', '/model.22/Mul_2', '/model.22/Concat_5'
]

# Perform static quantization
try:
    quantize_static(
        model_input='yolov8n_infer.onnx',
        model_output="yolov8n_st_quant.onnx",
        weight_type=QuantType.QInt8,
        activation_type=QuantType.QUInt8,
        calibration_data_reader=calibration_data_reader,
        quant_format=QuantFormat.QOperator,
        nodes_to_exclude=nodes_to_exclude,
        per_channel=False,
        reduce_range=True,
    )
    print("Quantization completed successfully!")
except Exception as e:
    print(f"Quantization failed: {str(e)}")

# Static Qunatization -  QDQ

In [None]:
import numpy as np
from onnxruntime.quantization import CalibrationDataReader, quantize_static, QuantType, QuantFormat
from PIL import Image
import os
import random

class ImageCalibrationReader(CalibrationDataReader):
    def __init__(self, image_dir, num_samples=100, input_name="images"):
        # Get all image files
        self.image_files = [
            os.path.join(image_dir, f) for f in os.listdir(image_dir)
            if f.lower().endswith(('.png', '.jpg', '.jpeg'))
        ]
        # Randomly sample images
        if len(self.image_files) > num_samples:
            self.image_files = random.sample(self.image_files, num_samples)

        self.idx = 0
        self.input_name = input_name

    def preprocess(self, image_path):
        # Open and resize image
        image = Image.open(image_path).convert('RGB')
        image = image.resize((640, 640), Image.Resampling.BILINEAR)

        # Convert to numpy and normalize
        image_np = np.array(image).astype(np.float32) / 255.0

        # HWC to CHW format
        image_np = np.transpose(image_np, (2, 0, 1))

        # Add batch dimension
        image_np = np.expand_dims(image_np, axis=0)
        return image_np

    def get_next(self):
        if self.idx >= len(self.image_files):
            return None

        try:
            input_data = self.preprocess(self.image_files[self.idx])
            self.idx += 1
            return {self.input_name: input_data}
        except Exception as e:
            print(f"Error processing image {self.image_files[self.idx]}: {str(e)}")
            self.idx += 1
            return self.get_next()

In [None]:
calibration_data_reader = ImageCalibrationReader(
    image_dir="/content/imagenet-sample-images",  # Directory containing your images
    num_samples=100  # Number of images to use for calibration
)

In [None]:
# Nodes to exclude
nodes_to_exclude = [
    '/model.22/Concat_3', '/model.22/Split', '/model.22/Sigmoid',
    '/model.22/dfl/Reshape', '/model.22/dfl/Transpose', '/model.22/dfl/Softmax',
    '/model.22/dfl/conv/Conv', '/model.22/dfl/Reshape_1', '/model.22/Slice_1',
    '/model.22/Slice', '/model.22/Add_1','/model.22/Add_2',  '/model.22/Sub', '/model.22/Div_1',
    '/model.22/Concat_4', '/model.22/Mul_2', '/model.22/Concat_5'
]

# Perform static quantization
try:
    quantize_static(
        model_input='yolov8n_infer.onnx',
        model_output="yolov8n_st_quant_qdq.onnx",
        weight_type=QuantType.QInt8,
        activation_type=QuantType.QUInt8,
        calibration_data_reader=calibration_data_reader,
        quant_format=QuantFormat.QDQ,
        nodes_to_exclude=nodes_to_exclude,
        per_channel=False,
        reduce_range=True,
    )
    print("Quantization completed successfully!")
except Exception as e:
    print(f"Quantization failed: {str(e)}")

# Dynamic Qunatization

In [None]:
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

input_model_path = 'yolov8n.onnx'
output_model_path = 'yolov8n_dy_quant.onnx'
quantize_dynamic(
    model_input=input_model_path,
    model_output=output_model_path,
    weight_type=QuantType.QUInt8,
    reduce_range=True
)

# Float 16 conversion

In [None]:
import onnx
from onnxconverter_common import float16

# Load the model
model_fp32 = onnx.load("yolov8n.onnx")

# Convert to FP16
model_fp16 = float16.convert_float_to_float16(model_fp32)

# Save the FP16 model
onnx.save(model_fp16, "yolov8n_fp16.onnx")

print("Converted yolov8n.onnx to yolov8n_fp16.onnx")


In [None]:
# import onnxruntime as ort
# import numpy as np
# from PIL import Image
# import time

# def preprocess_image(image_path):
#     """Loads and preprocesses an image for inference."""
#     image = Image.open(image_path).convert('RGB')
#     image = image.resize((640, 640), Image.Resampling.BILINEAR)

#     # Convert to numpy and normalize
#     image_np = np.array(image).astype(np.float32) / 255.0

#     # HWC to CHW format
#     image_np = np.transpose(image_np, (2, 0, 1))

#     # Add batch dimension
#     return np.expand_dims(image_np, axis=0)

# def run_inference(model_path, input_tensor):
#     """Runs inference on the given ONNX model and measures time taken."""
#     session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
#     input_name = session.get_inputs()[0].name

#     # Start time measurement
#     start_time = time.time()

#     # Run inference
#     outputs = session.run(None, {input_name: input_tensor})

#     # End time measurement
#     end_time = time.time()

#     # Calculate time taken
#     time_taken = end_time - start_time
#     return outputs, time_taken

# # Load test image
# image_path = "/content/imagenet-sample-images/n01440764_tench.JPEG"  # Replace with your test image
# input_tensor = preprocess_image(image_path)

# # Run inference on original model
# orig_outputs, orig_time = run_inference("yolov8n.onnx", input_tensor)

# # Run inference on quantized model
# quant_outputs, quant_time = run_inference("static_quantized.onnx", input_tensor)

# # Print output and timing results
# print("Original Model Output:", orig_outputs[0].flatten()[:10])  # Print first 10 values
# print("Quantized Model Output:", quant_outputs[0].flatten()[:10])  # Print first 10 values

# # Compute mean absolute difference
# difference = np.abs(orig_outputs[0] - quant_outputs[0])
# print(f"Mean Absolute Difference: {np.mean(difference)}")

# # Print time taken for each model
# print(f"Time taken for original model: {orig_time:.4f} seconds")
# print(f"Time taken for quantized model: {quant_time:.4f} seconds")
