In [55]:
import onnxruntime as ort
import numpy as np
from PIL import Image
import time

import numpy as np

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=-1, keepdims=True)

def load_model(onnx_file_path):
    session = ort.InferenceSession(onnx_file_path, providers=['CPUExecutionProvider'])
    return session

def run_model(session, input_data):
    input_name = session.get_inputs()[0].name
    output_names = [output.name for output in session.get_outputs()]
    print("22", len(output_names))
    print(input_name)
    results = session.run(output_names, {input_name: input_data})
    return results

# if __name__ == "__main__":
#     onnx_file_path = "./assets/resnet50_csv_26_inf_900x1200.onnx"
#     session = load_model(onnx_file_path)
#     image_path = './images/validation_image_cmu/10.6.3.9 Flat margin.jpg'
#     target_size = (1200, 900)
#     image = Image.open(image_path).convert('RGB')
#     image = image.resize(target_size)
#     image_array = np.asarray(image)
#     image_array = image_array.astype(np.float32) / 255.0
#     image_array = np.expand_dims(image_array, axis=0)
    
#     # print("Model Input:", image_array)
    
#     start_time = time.time()
#     results = run_model(session, image_array)
#     end_time = time.time()
    
#     print('Inference session.get_outputs():',results)
#     print('Inference Time:', end_time - start_time, 'seconds')


**Works for Quantized Int8 and Unquantized model**

In [66]:
import cv2

def preprocess_image(x, mode='caffe'):
    
    x = x.astype(np.float32)

    if mode == 'tf':
        x /= 127.5
        x -= 1.
    elif mode == 'caffe':
        x -= [103.939, 116.779, 123.68]

    return x

def resize_image(img, min_side=900, max_side=1200):

    scale = compute_resize_scale(img.shape, min_side=min_side, max_side=max_side)

    # resize the image with the computed scale
    img = cv2.resize(img, None, fx=scale, fy=scale)

    return img, scale

def compute_resize_scale(image_shape, min_side=900, max_side=1200):
    
    (rows, cols, _) = image_shape

    smallest_side = min(rows, cols)

    # rescale the image so the smallest side is min_side
    scale = min_side / smallest_side

    # check if the largest side is now greater than max_side, which can happen
    # when images have a large aspect ratio
    largest_side = max(rows, cols)
    if largest_side * scale > max_side:
        scale = max_side / largest_side

    return scale

In [67]:
image_path_ = './images/validation_image_cmu/10.6.3.9 Flat margin.jpg'

image = Image.open(image_path_).convert('RGB')

image = image.rotate(90)
image = np.asarray(image)
image = image[:, :, ::-1].copy()
image = preprocess_image(image)
image__, scale = resize_image(image)
image__.shape

(900, 1200, 3)

In [68]:
def run_model_(session, input_data):
    input_name = session.get_inputs()[0].name
    output_names = [output.name for output in session.get_outputs()]
    print("22", len(output_names))
    print(input_name)
    results = session.run(output_names, {input_name: input_data})
    return results

**Unquantized Inference**

In [69]:
onnx_file_path = "./assets/resnet50_csv_26_inf_900x1200.onnx"
session_1 = ort.InferenceSession(onnx_file_path, providers=['CPUExecutionProvider'])

In [70]:
start_time = time.time()
results = run_model_(session_1, np.expand_dims(image__, axis=0))
end_time = time.time()

print('Inference session.get_outputs():',results)
print('Inference Time:', end_time - start_time, 'seconds')

22 3
input_1:0
Inference session.get_outputs(): [array([[[ 1.8300137e+02,  1.6337952e+01,  1.0949430e+03,  9.0000000e+02],
        [ 2.1348184e+02,  0.0000000e+00,  1.1051562e+03,  9.0000000e+02],
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00],
        ...,
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00],
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00],
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00]]],
      dtype=float32), array([[ 0.54041857,  0.24755007, -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        

**Quantized Inference**

In [71]:
onnx_file_path_quant = "./assets/resnet50_csv_26_inf_900x1200_Quantized.onnx"
session_quant = ort.InferenceSession(onnx_file_path_quant, providers=['CPUExecutionProvider'])

In [72]:
start_time = time.time()
results = run_model_(session_quant, np.expand_dims(image__, axis=0))
end_time = time.time()

print('Inference session.get_outputs():',results)
print('Inference Time:', end_time - start_time, 'seconds')

22 3
input_1:0
Inference session.get_outputs(): [array([[[ 1.8355899e+02,  1.7561951e+01,  1.0942911e+03,  9.0000000e+02],
        [ 2.1642825e+02,  0.0000000e+00,  1.1028279e+03,  9.0000000e+02],
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00],
        ...,
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00],
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00],
        [-1.0000000e+00, -1.0000000e+00, -1.0000000e+00, -1.0000000e+00]]],
      dtype=float32), array([[ 0.5345413 ,  0.24685106, -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        

**FP16 Inference**

In [73]:
onnx_file_path_fp16 = "./assets/resnet50_csv_26_inf_900x1200_FP16.onnx"
session_fp16 = ort.InferenceSession(onnx_file_path_fp16, providers=['CPUExecutionProvider'])

2023-11-24 12:37:50.719250 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_0/add'
2023-11-24 12:37:50.719479 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_0/add_1'
2023-11-24 12:37:50.719776 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_1/add'
2023-11-24 12:37:50.719955 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_1/add_1'
2023-11-24 12:37:50.720264 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_2/add'
2023-11-24 12:37:50.720395 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_2/add_1'
2023-11-24 12:37:50.72

hors_3/add'
2023-11-24 12:37:50.787318 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_3/add_1'
2023-11-24 12:37:50.787378 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Less node 'copy__76/filtered_detections/map/while/Less_1'
2023-11-24 12:37:50.814232 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_0/add'
2023-11-24 12:37:50.814377 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_0/add_1'
2023-11-24 12:37:50.814599 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add node 'anchors_1/add'
2023-11-24 12:37:50.814668 [W:onnxruntime:, constant_folding.cc:212 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Add 

In [74]:
start_time = time.time()
results = run_model_(session_fp16, np.expand_dims(image__.astype(np.float16), axis=0))
end_time = time.time()

print('Inference session.get_outputs():',results)
print('Inference Time:', end_time - start_time, 'seconds')

22 3
input_1:0
Inference session.get_outputs(): [array([[[ 1.830e+02,  1.672e+01,  1.094e+03,  9.000e+02],
        [ 2.141e+02,  0.000e+00,  1.105e+03,  9.000e+02],
        [-1.000e+00, -1.000e+00, -1.000e+00, -1.000e+00],
        ...,
        [-1.000e+00, -1.000e+00, -1.000e+00, -1.000e+00],
        [-1.000e+00, -1.000e+00, -1.000e+00, -1.000e+00],
        [-1.000e+00, -1.000e+00, -1.000e+00, -1.000e+00]]], dtype=float16), array([[ 0.541 ,  0.2477, -1.    , -1.    , -1.    , -1.    , -1.    ,
        -1.    , -1.    , -1.    , -1.    , -1.    , -1.    , -1.    ,
        -1.    , -1.    , -1.    , -1.    , -1.    , -1.    , -1.    ,
        -1.    , -1.    , -1.    , -1.    , -1.    , -1.    , -1.    ,
        -1.    , -1.    , -1.    , -1.    , -1.    , -1.    , -1.    ,
        -1.    , -1.    , -1.    , -1.    , -1.    , -1.    , -1.    ,
        -1.    , -1.    , -1.    , -1.    , -1.    , -1.    , -1.    ,
        -1.    , -1.    , -1.    , -1.    , -1.    , -1.    , -1.    ,
    

In [None]:
def run_inference_pc_cpu(model_path, image_paths):
    
    labels_to_names = {0: 'non_cancer', 1: 'cancer'}
    model = load_model(model_path, backbone_name='resnet50')
    try:
        model = models.convert_model(model)
    except:
        print("Model is likely already an inference model")
    results = []

    for image_path in image_paths:
        # Load and preprocess the image
        # print("Loading Image: {}".format(image_path))
        image = Image.open(image_path).convert('RGB')
        
        image = image.rotate(90)
            
        image = np.asarray(image)
        image = image[:, :, ::-1].copy()
        image = preprocess_image(image)
        image, scale = resize_image(image)

        # Run the inference
        start = time.time()
        boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
        inference_time = time.time() - start
        predicted_label = labels[0][np.argmax(scores[0])]
        confidence = scores[0][np.argmax(scores[0])]
        
        results.append({'isCancerDetected': predicted_label, 'InferenceTime': inference_time, 'Confidence2': confidence})
    return results

**Quantization to INT8**

In [6]:
from onnxruntime.quantization import quantize_dynamic, QuantType

model_fp32 = './assets/resnet50_csv_26_inf_900x1200.onnx'
model_quant = './assets/resnet50_csv_26_inf_900x1200_Quantized.onnx'

quantized_model = quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)



**Quantization to FP16**

In [52]:
import onnx
from onnxconverter_common import float16

model = onnx.load("./assets/resnet50_csv_26_inf_900x1200.onnx")
model_fp16 = float16.convert_float_to_float16(model)
onnx.save(model_fp16, "./assets/resnet50_csv_26_inf_900x1200_FP16.onnx")



In [76]:
import onnxruntime as ort
import numpy as np
from PIL import Image
import time

import numpy as np

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=-1, keepdims=True)


def load_model(onnx_file_path):
    session = ort.InferenceSession(onnx_file_path, providers=['CPUExecutionProvider'])
    return session

def run_model(session, input_data):
    input_name = session.get_inputs()[0].name
    output_names = [output.name for output in session.get_outputs()]
    print("22", len(output_names))
    print(input_name)
    results = session.run(output_names, {input_name: input_data})
    return results

if __name__ == "__main__":
    # onnx_file_path = "./assets/resnet50_csv_26_inf_900x1200_Quantized.onnx"
    # session = load_model(onnx_file_path)
    # image_path = './images/validation_image_cmu/10.6.3.9 Flat margin.jpg'
    # target_size = (1200, 900)
    # image = Image.open(image_path).convert('RGB')
    # image = image.resize(target_size)
    # image_array = np.asarray(image)
    # image_array = image_array.astype(np.float32) / 255.0
    # image_array = np.expand_dims(image_array, axis=0)
    
    # # print("Model Input:", image_array)
    
    start_time = time.time()
    # results = run_model(session, image_array)
    end_time = time.time()
    
    # print('Inference session.get_outputs():',results)
    print('Inference Time:', end_time - start_time, 'seconds')

Inference Time: 9.5367431640625e-07 seconds
