# GRPC Inference

### Setup

In [1]:
!pip install grpcio==1.56.0 grpcio-tools==1.33.2 protobuf==3.20.3


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
!pip list | grep -e grpcio -e protobuf

grpcio                          1.56.0
grpcio-tools                    1.33.2
protobuf                        3.20.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
grpc_host = 'modelmesh-serving'
grpc_port = 8033

textencoder_model_name = 'textencoder'
unet_model_name = 'unet'
vaeencoder_model_name = 'vaeencoder'
vaedecoder_model_name = 'vaedecoder'

### Inspecting the gRPC Endpoint

Let's check out the gRPC endpoint's model metadata.

In [4]:
import grpc
import grpc_predict_v2_pb2
import grpc_predict_v2_pb2_grpc


channel = grpc.insecure_channel(f"{grpc_host}:{grpc_port}")
stub = grpc_predict_v2_pb2_grpc.GRPCInferenceServiceStub(channel)

request = grpc_predict_v2_pb2.ModelMetadataRequest(name=textencoder_model_name)
response = stub.ModelMetadata(request)
print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=unet_model_name)
# response = stub.ModelMetadata(request)
# print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=vaeencoder_model_name)
# response = stub.ModelMetadata(request)
# print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=vaedecoder_model_name)
# response = stub.ModelMetadata(request)
# print(response)


name: "textencoder__isvc-23786653ad"
versions: "1"
platform: "onnxruntime_onnx"
inputs {
  name: "input_ids"
  datatype: "INT32"
  shape: -1
  shape: -1
}
outputs {
  name: "pooler_output"
  datatype: "FP32"
  shape: -1
  shape: 1024
}
outputs {
  name: "last_hidden_state"
  datatype: "FP32"
  shape: -1
  shape: -1
  shape: 1024
}



### Request Function

Builds and submits our gRPC request.

In [5]:
import torch

def textencoder_grpc_request(input_arr):
    # request content building
    inputs = []
    inputs.append(grpc_predict_v2_pb2.ModelInferRequest().InferInputTensor())
    inputs[0].name = "input_ids"
    inputs[0].datatype = "INT32"
    inputs[0].shape.extend([1, 77])
    arr = input_arr.flatten()
    inputs[0].contents.int_contents.extend(arr)

    # request building
    request = grpc_predict_v2_pb2.ModelInferRequest()
    request.model_name = textencoder_model_name
    request.inputs.extend(inputs)

    response = stub.ModelInfer(request)
    text_embeddings = np.frombuffer(response.raw_output_contents[0], dtype=np.float32)
    print(text_embeddings.dtype)
    print(text_embeddings.shape)
    print(text_embeddings)
    return torch.tensor(text_embeddings.reshape([-1, 77, 1024]))

### Run the Request

In [6]:
import numpy as np

from IPython.display import Image

text_encoder_args = np.load("text_encoder_args.npy")
# print(text_encoder_args.dtype)
# print(text_encoder_args.shape)
# print(text_encoder_args)

text_embeddings = textencoder_grpc_request(text_encoder_args)

float32
(78848,)
[-0.31344315 -0.44757485 -0.00821633 ... -0.23510835  0.2653347
  0.9652109 ]


In [7]:
print(text_embeddings.dtype)
print(text_embeddings.shape)
print(text_embeddings)

torch.float32
torch.Size([1, 77, 1024])
tensor([[[-0.3134, -0.4476, -0.0082,  ...,  0.2542, -0.0324, -0.2960],
         [ 0.1997, -1.6939, -0.8946,  ...,  0.4660, -0.0960, -2.1482],
         [ 1.0233, -0.7342, -2.5381,  ...,  0.8945, -0.0587, -1.0723],
         ...,
         [ 0.8669, -1.5203,  0.1674,  ..., -0.1391, -0.1165,  0.4508],
         [ 0.8882, -1.5138,  0.1458,  ..., -0.4148, -0.1800,  0.5422],
         [ 0.5453, -2.2350, -0.4391,  ..., -0.2351,  0.2653,  0.9652]]])
