In [1]:
from onnxsim import simplify
import onnx

# Load your ONNX model
model = onnx.load("model_quant.onnx")

# Simplify
model_simplified, check = simplify(model)

assert check, "Simplified ONNX model could not be validated"

# Save it
onnx.save(model_simplified, "model_quant_simplified.onnx")


In [None]:
!

In [4]:
from onnxruntime.quantization import quantize_dynamic, QuantType

quantize_dynamic(
    model_input="model_preprocessed.onnx",
    model_output="model_quant4.onnx",
    weight_type=QuantType.QInt4 # or QuantType.QUInt8
)


In [12]:
from onnxconverter_common import float16

import onnx
model = onnx.load("model_preprocessed.onnx")
model_fp16 = float16.convert_float_to_float16(model)
onnx.save(model_fp16, "model_fp16.onnx")




In [14]:
model = onnx.load("model_fp16.onnx")
node_types = set(node.op_type for node in model.graph.node)
print("Ops used in model:", node_types)

Ops used in model: {'MaxPool', 'Shape', 'Slice', 'Sigmoid', 'GlobalAveragePool', 'Resize', 'Relu', 'Add', 'Mul', 'Unsqueeze', 'Gather', 'Div', 'Sub', 'Concat', 'Conv', 'Cast'}


In [22]:
import os
import cv2
import numpy as np
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType

# Calibration DataReader
class ImageFolderReader(CalibrationDataReader):
    def __init__(self, folder, input_name, size=(224, 224)):
        self.data = []
        self.input_name = input_name
        for fname in os.listdir(folder):
            path = os.path.join(folder, fname)
            img = cv2.imread(path)
            if img is None:
                continue
            img = img.transpose(2, 0, 1)  # HWC -> CHW
            self.data.append({input_name: np.expand_dims(img, axis=0)})
        self.iterator = iter(self.data)

    def get_next(self):
        return next(self.iterator, None)

# Paths
input_model = "model_preprocessed.onnx"
output_model = "model_quantized.onnx"
calibration_folder = "../DATA/dataset/test/images"  # Path to your calibration image folder

# Get input name from the model
import onnxruntime as ort
session = ort.InferenceSession(input_model)
input_name = session.get_inputs()[0].name  # Get the input name

# Quantize the model
quantize_static(
    model_input=input_model,
    model_output=output_model,
    calibration_data_reader=ImageFolderReader(calibration_folder, input_name),
    quant_format=QuantType.QUInt8,  # QDQ (Quantize-Dequantize) format for best compatibility
    weight_type=QuantType.QUInt8,  # Quantize weights to int8
    activation_type=QuantType.QUInt8,  # Quantize activations to int8
    
)

print(f"Quantized model saved to: {output_model}")


Quantized model saved to: model_quantized.onnx


In [23]:
import numpy as np
import onnxruntime as ort
onnx_orig= "/home/adelb/Downloads/comp_200eps_abloss_model3b_3c_se_resnext50_32x4d_512.onnx"
onnx_simp= 'model_quantized.onnx'


sample = np.random.randint(0, 256, (2, 3, 1024, 1024), dtype= np.uint8)

orig_session = ort.InferenceSession(
    onnx_orig, providers=["CPUExecutionProvider"]
)

simp_session= ort.InferenceSession(
    onnx_simp, providers=["CPUExecutionProvider"]
)

# compute ONNX Runtime output prediction
ort_inputs = {"input": sample}
orig_outputs = orig_session.run(output_names=None, input_feed=ort_inputs)

simp_outputs = simp_session.run(output_names=None, input_feed=ort_inputs)



# compare ONNX Runtime and PyTorch results
d= np.testing.assert_allclose(simp_outputs[0], orig_outputs[0], rtol=1e-2, atol=1e-3)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

AssertionError: 
Not equal to tolerance rtol=0.01, atol=0.001

Mismatched elements: 8688498 / 9437184 (92.1%)
Max absolute difference: 4.8479233
Max relative difference: 7497466.5
 x: array([[[[  3.016465,   3.016465,   3.016465, ...,   2.681303,
            2.34614 ,   2.34614 ],
         [  3.016465,   3.351628,   3.686791, ...,   2.681303,...
 y: array([[[[  2.266474,   2.399349,   2.532224, ...,   2.688377,
            2.538295,   2.388207],
         [  2.324796,   2.547629,   2.770462, ...,   2.554197,...