# Example Notebook to show how to use RAPIDS+Pytorch with Triton

This notebook calls a ensemble model which uses RAPIDS+Pytorch with Triton


<img src="notebook_images/ensemble_rapids_simple.jpg" width="300" height="400">

### Client Setup

In [None]:
# !pip install nvidia-pyindex
# !pip install tritonclient[all]

### Import Libraries

In [None]:
import numpy as np
import json

import grpc
from tritonclient.grpc import service_pb2
from tritonclient.grpc import service_pb2_grpc
import tritonclient.grpc as grpcclient
from functools import partial

###  Connect to the Triton End to End Model 

In [None]:
url='localhost:8001'

triton_client = grpcclient.InferenceServerClient(url=url,verbose=False)

channel = grpc.insecure_channel(url)
grpc_stub = service_pb2_grpc.GRPCInferenceServiceStub(channel)

In [None]:
preprocessing_model = 'end_to_end_onnx'
request = service_pb2.ModelMetadataRequest(name=preprocessing_model,
                                           version='1')
response = grpc_stub.ModelMetadata(request)
print("model metadata:\n{}".format(response))

## Send Request to Model 

### Prepare Input 

In [None]:
log_ls = ['The product is great', 'This product is bad','This product is good', 'This product is really bad']*1
log_ls = [l.encode('utf-8') for l in log_ls]
log_ar = np.array(log_ls).reshape(1,len(log_ls))

### Request Sending Function

In [None]:
def send_preprocess_request(log_ar, model_name='rapids_tokenizer'):
    triton_client = grpcclient.InferenceServerClient(url=url,verbose=False)
    input_grpc = grpcclient.InferInput("product_reviews",log_ar.shape,"BYTES")
    input_grpc.set_data_from_numpy(log_ar)

    outputs = []
    outputs.append(grpcclient.InferRequestedOutput('input_ids'))
    outputs.append(grpcclient.InferRequestedOutput('attention_mask'))

    
    output = triton_client.infer(model_name=model_name,
                               inputs=[input_grpc],
                              outputs=outputs)
    

    return output

In [None]:
%%timeit
output = send_preprocess_request(log_ar.squeeze())

In [None]:
def send_inference_requet(log_ar, model_name='end_to_end_pytorch'):
    triton_client = grpcclient.InferenceServerClient(url=url,verbose=False)
    input_grpc = grpcclient.InferInput("product_reviews",log_ar.shape,"BYTES")
    input_grpc.set_data_from_numpy(log_ar)
    outputs = []
    outputs.append(grpcclient.InferRequestedOutput('preds'))
    
    output = triton_client.infer(model_name=model_name,
                               inputs=[input_grpc],
                              outputs=outputs)
    

    return output


In [None]:
%%timeit
output = send_inference_requet(log_ar,'end_to_end_onnx')
#output.as_numpy('preds')

In [None]:
%%timeit
output = send_inference_requet(log_ar,'end_to_end_pytorch')

##  Predictions

##### 1 is positive, 0 is negative

In [None]:
output = send_inference_requet(log_ar,'end_to_end_pytorch')
output.as_numpy('preds')

In [None]:
output = send_inference_requet(log_ar,'end_to_end_onnx')
output.as_numpy('preds')