In [33]:
import sys
sys.path.append('/Users/jack/Documents/Concurrency')
from multiprocessing_practice.setup_logger import logger
import time
import numpy
import onnxruntime as rt

# load dataset
# https://www.onnxruntime.ai/python/tutorial.html
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

# load model
filename = 'logreg_iris.onnx'

def run():
    # https://www.onnxruntime.ai/docs/how-to/tune-performance.html
    sess_options = rt.SessionOptions()
    sess_options.intra_op_num_threads = 1
    sess = rt.InferenceSession(filename, sess_options=sess_options)
    input_name = sess.get_inputs()[0].name

    pred = sess.run(None, {input_name: X_test.astype(numpy.float32)})[0]

# inference
logger.debug('Started')
begin = time.time()
for _ in range(1000):
    run()
end = time.time()
logger.debug('Finished')
logger.info(f'Time Elapsed : {end-begin}')

2021-06-20 13:46:37 | MainThread |[36m DEBUG    [0m| root | Started
2021-06-20 13:46:38 | MainThread |[36m DEBUG    [0m| root | Finished


In [5]:
# file:///Users/jack/Downloads/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb.html
import sys
sys.path.append('/Users/jack/Documents/Concurrency')
from multiprocessing_practice.setup_logger import logger
import time
import numpy
import torch
import onnxruntime as rt

max_seq_length = 128
total_samples = 20

dataset = torch.load('onnx_models/tensor_dataset.pt')

filename = 'onnx_models/optimized_model_cpu.onnx'

def run(i):
    data = dataset[i]
    rt_inputs = {
        'input_ids':  data[0].cpu().reshape(1, max_seq_length).numpy(),
        'input_mask': data[1].cpu().reshape(1, max_seq_length).numpy(),
        'segment_ids': data[2].cpu().reshape(1, max_seq_length).numpy()
    }

    sess_options = rt.SessionOptions()
    sess_options.intra_op_num_threads = 1
    sess = rt.InferenceSession(filename, sess_options=sess_options)

    pred = sess.run(None, rt_inputs)
    logger.debug(f'Finished inference {i}')
    return pred

# inference
preds = list()
logger.debug('Started')
begin = time.time()
for i in range(total_samples):
    pred = run(i)
    preds.append(pred)
end = time.time()
logger.debug('Finished')
logger.info(f'Time Elapsed : {end-begin}')
logger.info(f'Get {len(preds)} predictions')

2021-06-20 15:00:32 | MainThread |[36m DEBUG    [0m| root | Started
2021-06-20 15:00:33 | MainThread |[36m DEBUG    [0m| root | Finished inference 0
2021-06-20 15:00:34 | MainThread |[36m DEBUG    [0m| root | Finished inference 1
2021-06-20 15:00:34 | MainThread |[36m DEBUG    [0m| root | Finished inference 2
2021-06-20 15:00:35 | MainThread |[36m DEBUG    [0m| root | Finished inference 3
2021-06-20 15:00:36 | MainThread |[36m DEBUG    [0m| root | Finished inference 4
2021-06-20 15:00:37 | MainThread |[36m DEBUG    [0m| root | Finished inference 5
2021-06-20 15:00:38 | MainThread |[36m DEBUG    [0m| root | Finished inference 6
2021-06-20 15:00:39 | MainThread |[36m DEBUG    [0m| root | Finished inference 7
2021-06-20 15:00:39 | MainThread |[36m DEBUG    [0m| root | Finished inference 8
2021-06-20 15:00:40 | MainThread |[36m DEBUG    [0m| root | Finished inference 9
2021-06-20 15:00:41 | MainThread |[36m DEBUG    [0m| root | Finished inference 10
2021-06-20 15:00

Above example loads the model every time when executes run function.

In [2]:
# multiprocessing
import sys
sys.path.append('/Users/jack/Documents/Concurrency')
from multiprocessing_practice.setup_logger import logger
import time
import numpy
import torch
import onnxruntime as rt
import multiprocessing as mp
from multiprocessing import Pool
import psutil

max_seq_length = 128
total_samples = 20

dataset = torch.load('onnx_models/tensor_dataset.pt')
filename = 'onnx_models/optimized_model_cpu.onnx'

def run(i):
    data = dataset[i]
    rt_inputs = {
        'input_ids':  data[0].cpu().reshape(1, max_seq_length).numpy(),
        'input_mask': data[1].cpu().reshape(1, max_seq_length).numpy(),
        'segment_ids': data[2].cpu().reshape(1, max_seq_length).numpy()
    }
    
    sess_options = rt.SessionOptions()
    sess_options.intra_op_num_threads = 1
    sess = rt.InferenceSession(filename, sess_options=sess_options)

    pred = sess.run(None, rt_inputs)
    logger.debug(f'Finished inference {i}')
    return pred

num_cpus = psutil.cpu_count(logical=False)
try:
    mp.set_start_method('fork')
except RuntimeError as re:
    logger.warning(re)
pool = Pool(num_cpus)

# inference
logger.debug('Started')
begin = time.time()
preds = pool.map(run, range(total_samples))
end = time.time()
logger.debug('Finished')
logger.info(f'Time Elapsed : {end-begin}')
logger.info(f'Get {len(preds)} predictions')

2021-06-20 14:57:10 | MainThread |[36m DEBUG    [0m| root | Started
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 7
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 3
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 2
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 1
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 0
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 6
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 4
2021-06-20 14:57:16 | MainThread |[36m DEBUG    [0m| root | Finished inference 5
2021-06-20 14:57:18 | MainThread |[36m DEBUG    [0m| root | Finished inference 8
2021-06-20 14:57:18 | MainThread |[36m DEBUG    [0m| root | Finished inference 9
2021-06-20 14:57:18 | MainThread |[36m DEBUG    [0m| root | Finished inference 11
2021-06-20 14:57