# Result of speed comparison
jump to
1. [raw pytorch + cpu](#rcpu)
2. [onnx + static + cpu](#onnxs)
3. [onnx + dynamic + cpu](#onnxd)
4. [outputs comparison](#out)

In [18]:
# install onnx / onnxruntim / efficientnet first
import onnxruntime
import torch
import torch.nn as nn
from torchvision import models
import os
import numpy as np
import cv2
import time
from efficientnet_pytorch import EfficientNet

In [30]:
def pytorch_to_onnx(model, model_path, input_shape=(1, 3, 224, 224), opset_version=11):        
    input_data = torch.randn(input_shape)
    torch.onnx.export(model.eval(), input_data, model_path, opset_version=opset_version)
    print('Export Finished, now Checking ONNX Model')    
    get_and_check_onnx_model(model_path)


def pytorch_to_onnx_dynamic(model, model_path, input_shape=(1, 3, 224, 224), opset_version=11):        
    input_data = torch.randn(input_shape)
    input_name = "input.1"
    output_name = "output"
    input_names = [input_name]
    output_names = [output_name]
    dynamic_axes = {input_name: {2:'width', 3:'height'}, output_name : {1:'classes'}}
    torch.onnx.export(model, input_data, model_path, input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, opset_version=opset_version)
    print('Export Finished, now Checking ONNX Model')    
    get_and_check_onnx_model(model_path)



def time_calculator(func):
    def wrap(*args, **kwargs):
        start = time.time()
        func(*args, **kwargs)
        end = time.time()
        time_spend = end - start
        print('time_spend: ', time_spend)
        return time_spend
    return wrap
    
def get_and_check_onnx_model(model_path):
    import onnx
    onnx_model = onnx.load(model_path)
    onnx.checker.check_model(onnx_model)
    print('if nothing showed, then the model is fine.')
    return onnx_model

def set_efficient_model_ready(model):
    model.set_swish(memory_efficient=False)
    return model


def _calculate_dhdw_half(h, w):
    """Calculate difference of h or w in order to get a square """
    if h > w:
        dh_half = int(0.1*h/2)
        dw_half = int((h+2*dh_half-w)/2)
    else:
        dw_half = int(0.1*w/2)
        dh_half = int((w+2*dw_half-h)/2)
    return dh_half, dw_half
    
def preprocess_onnx(image):
    # 加邊框
    h, w, c = image.shape
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    dh_half, dw_half = _calculate_dhdw_half(h, w)
    image = cv2.copyMakeBorder(image, dh_half, dh_half, dw_half, dw_half, cv2.BORDER_REPLICATE)
    image = cv2.resize(image, (248, 248))[12:236, 12:236]/255.0
    image = image.transpose(2, 0, 1).astype(np.float32)[np.newaxis,:]
    return image

def preprocess_pt(image):
    # 加邊框
    h, w, c = image.shape
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    dh_half, dw_half = _calculate_dhdw_half(h, w)
    image = cv2.copyMakeBorder(image, dh_half, dh_half, dw_half, dw_half, cv2.BORDER_REPLICATE)
    image = cv2.resize(image, (248, 248))[12:236, 12:236]
    tensor = torch.tensor(image, dtype=torch.float)    
    return tensor.permute(2, 0, 1).div(255.0).unsqueeze(0)

def show_onnx_session_io_name(session):
    session.get_modelmeta()
    first_input_name = session.get_inputs()[0].name
    first_output_name = session.get_outputs()[0].name
    print(path, first_input_name, first_output_name)
    
def onnxruntime_inference(image, ort_session):
    image = preprocess_onnx(image)
    ort_inputs = {ort_session.get_inputs()[0].name: image}
    ort_outs = ort_session.run(None, ort_inputs)
    return ort_outs

In [28]:
!pip install efficientnet_pytorch
from efficientnet_pytorch import EfficientNet
import torch
import torch.nn as nn
import torchvision.models as models
import cv2
import os
import numpy as np
import time



def set_efficient_model_ready(model):
    model.set_swish(memory_efficient=False)
    return model

model_name_list = ["resnet50", "resnet101", "efficientnet-b4", "efficientnet-b5", "efficientnet-b6", "efficientnet-b7"]
model_path_list = []
dynamic_model_path_list = []
model_list = []
class_num = 801
for name in model_name_list:
    if name.startswith("res"):
        model = getattr(models, name)(pretrained=True)
        num_input_fts = model.fc.in_features
        model.fc = nn.Linear(num_input_fts, class_num)
        torch.manual_seed(0)
        w = torch.randn(class_num, num_input_fts)
        b = torch.randn(class_num)
        model.fc.bias.data = b
        model.fc.weight.data = w
    else:
        model = EfficientNet.from_pretrained(name)
        model = set_efficient_model_ready(model)
        num_input_fts = model._fc.in_features
        model._fc = nn.Linear(num_input_fts, class_num)
        torch.manual_seed(0)        
        w = torch.randn(class_num, num_input_fts)
        b = torch.randn(class_num)
        model._fc.bias.data = b
        model._fc.weight.data = w
    print(name)
    print(w[0][:5])
    model_path = f"ONNX_MODELS/{name}.onnx"
    dynamic_model_path = f"ONNX_MODELS/{name}_dynamic.onnx"
    model_path_list.append(model_path)
    dynamic_model_path_list.append(dynamic_model_path)
    model_list.append(model)
    if os.path.exists(model_path):
        continue
    pytorch_to_onnx(model.eval(), model_path, opset_version=10)
    pytorch_to_onnx_dynamic(model.eval(), dynamic_model_path, opset_version=10)


resnet50
tensor([-1.1258, -1.1524, -0.2506, -0.4339,  0.8487])
resnet101
tensor([-1.1258, -1.1524, -0.2506, -0.4339,  0.8487])
Loaded pretrained weights for efficientnet-b4
efficientnet-b4
tensor([-1.1258, -1.1524, -0.2506, -0.4339,  0.8487])
Loaded pretrained weights for efficientnet-b5
efficientnet-b5
tensor([-1.1258, -1.1524, -0.2506, -0.4339,  0.8487])
Loaded pretrained weights for efficientnet-b6
efficientnet-b6
tensor([-1.1258, -1.1524, -0.2506, -0.4339,  0.8487])
Loaded pretrained weights for efficientnet-b7
efficientnet-b7
tensor([-1.1258, -1.1524, -0.2506, -0.4339,  0.8487])


# Raw Pytorch Speed
<a href="#rcpu"></a>

In [29]:
n = 10
raw_outputs = []
for name, model in zip(model_name_list, model_list):
    model.cpu()
    model.eval() 
    start = time.time()
    for _ in range(n):
        test_image = cv2.imread("sample.jpg")
        tensor = preprocess_pt(test_image)
        output = model(tensor.cpu())        
        score, pred = torch.max(output, dim=1)
    end = time.time()
    raw_outputs.append(output)
    print(name, (end-start)/n)

resnet50 0.19793853759765626
resnet101 0.3345116853713989
efficientnet-b4 0.17874245643615722
efficientnet-b5 0.2523103952407837
efficientnet-b6 0.3270812273025513
efficientnet-b7 0.4499534606933594


# Onnx Static Speed
<a href="#onnxs"></a>

In [12]:
onnx_outputs = []
n = 10
for name, model_path in zip(model_name_list, model_path_list):
    
    session = onnxruntime.InferenceSession(model_path)
#     show_onnx_session_io_name(session)
    start = time.time()
    for _ in range(n):
        test_image = cv2.imread("sample.jpg")
        outputs = onnxruntime_inference(test_image, session)
    end = time.time()
    onnx_outputs.append(outputs)
    print(name, model_path, (end-start)/n)

resnet50 ONNX_MODELS/resnet50.onnx 0.11168470382690429
resnet101 ONNX_MODELS/resnet101.onnx 0.20166232585906982
efficientnet-b4 ONNX_MODELS/efficientnet-b4.onnx 0.10989341735839844
efficientnet-b5 ONNX_MODELS/efficientnet-b5.onnx 0.1590277910232544
efficientnet-b6 ONNX_MODELS/efficientnet-b6.onnx 0.21927280426025392
efficientnet-b7 ONNX_MODELS/efficientnet-b7.onnx 0.3087084531784058


# Onnx Dynamic Speed
<a href="#onnxd"></a>

In [9]:
onnx_dynamic_outputs = []
n = 10
for name, model_path in zip(model_name_list, dynamic_model_path_list):    
    session = onnxruntime.InferenceSession(model_path)
#     show_onnx_session_io_name(session)
    start = time.time()
    for _ in range(n):
        test_image = cv2.imread("sample.jpg")
        outputs = onnxruntime_inference(test_image, session)
    end = time.time()
    onnx_dynamic_outputs.append(outputs)
    print(name, model_path, (end-start)/n)

resnet50 ONNX_MODELS/resnet50_dynamic.onnx 0.11474838256835937
resnet101 ONNX_MODELS/resnet101_dynamic.onnx 0.20631396770477295
efficientnet-b4 ONNX_MODELS/efficientnet-b4_dynamic.onnx 0.11047964096069336
efficientnet-b5 ONNX_MODELS/efficientnet-b5_dynamic.onnx 0.15632407665252684
efficientnet-b6 ONNX_MODELS/efficientnet-b6_dynamic.onnx 0.21250839233398439
efficientnet-b7 ONNX_MODELS/efficientnet-b7_dynamic.onnx 0.30537655353546145


# Output comparison
<a href="#out"></a>

In [13]:
def postprocess_onnx(onnx_outputs):
    return onnx_outputs[0][0]

def postprocess_pt_cpu(pt_outputs):
    return pt_outputs.detach().numpy()[0]

In [17]:
for name, raw, onnx_, onnx_d in zip(model_name_list, raw_outputs, onnx_outputs, onnx_dynamic_outputs):
    raw = postprocess_pt_cpu(raw)
    onnx_ = postprocess_onnx(onnx_)
    onnx_d = postprocess_onnx(onnx_d)
    mse = np.mean((raw - onnx_)**2)
    print(name, ", error: ", mse)

resnet50 , error:  7.369016e-10
resnet101 , error:  3.8520462e-10
efficientnet-b4 , error:  3.3588207e-10
efficientnet-b5 , error:  4.901697e-10
efficientnet-b6 , error:  3.3645428e-10
efficientnet-b7 , error:  9.3104985e-10
