In [1]:
import sys
import numpy as np

import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

if sys.version_info >= (3, 0):
    import queue
else:
    import Queue as queue

In [2]:
# options

verbose     = False # enable verbose output for grpcclient
batch_size  = 8 # number of events to send in one inference request
url         = "agc-triton-inference-server:8001" # url of inference server. use 8001 for grpcclient and 8000 for httpclient
test_events = "testevents.csv" # input csv file
model_name  = "binary_classifier" # name of ML model to use (make sure it is loaded properly)
model_vers  = "" # specify model version if necessary
num_batches = 5 # number of batches to process (number of events will be num_batches*batch_size)

In [3]:
# create gRPC client (communicates with inference server)
triton_client = grpcclient.InferenceServerClient(url=url, 
                                                 verbose=verbose)

In [4]:
model_metadata = triton_client.get_model_metadata(model_name, model_vers)

In [5]:
model_config = triton_client.get_model_config(model_name=model_name, 
                                              model_version=model_vers)

In [6]:
model_config = model_config.config

In [7]:
input_metadata = model_metadata.inputs[0]
input_config = model_config.input[0]
output_metadata = model_metadata.outputs[0]

input_batch_dim = (model_config.max_batch_size > 0)

max_batch_size = model_config.max_batch_size
input_name = input_metadata.name
output_name = output_metadata.name
n_features = input_metadata.shape[1 if input_batch_dim else 0]
format = input_config.format
dtype = input_metadata.datatype

In [8]:
# load data 
data = np.loadtxt(test_events, dtype=np.float32, delimiter=',')

In [9]:
# batch information
data_length = data.shape[0]
max_num_batches = int(np.ceil(data_length/batch_size)) # maximum number of batches given number of events in data and batch_size
    
num_batches = np.minimum(num_batches, max_num_batches) # ensure number of batches doesn't extend beyond number of events

In [11]:
# send inference requests    
startind = 0

for i in range(num_batches):
        
        data_current = data[startind:startind + batch_size, :]
        startind += batch_size
        
        client = grpcclient

        inpt = [client.InferInput(input_name, data_current.shape, dtype)]
        inpt[0].set_data_from_numpy(data_current)

        output = client.InferRequestedOutput(output_name)
        
        results = triton_client.infer(model_name=model_name, 
                                      inputs=inpt, 
                                      outputs=[output])
    
        inference_output = results.as_numpy(output_name)
        print(f"Inference Results for Batch {i}: ", np.round(inference_output).T)

Inference Results for Batch 0:  [[1. 0. 0. 0. 0. 1. 1. 0.]]
Inference Results for Batch 1:  [[0. 0. 1. 1. 1. 0. 0. 0.]]
Inference Results for Batch 2:  [[0. 0. 1. 0. 0. 0. 1. 0.]]
Inference Results for Batch 3:  [[1. 0. 1. 1. 0. 0. 0. 1.]]
Inference Results for Batch 4:  [[1. 1. 0. 0. 1. 1. 1. 0.]]
