# GRPC Inference

### Setup

In [None]:
!pip install grpcio==1.56.0 grpcio-tools==1.33.2 protobuf==3.20.3

In [None]:
!pip list | grep -e grpcio -e protobuf

In [None]:
grpc_host = 'modelmesh-serving'
grpc_port = 8033

textencoder_model_name = 'textencoder'
unet_model_name = 'unet'
vaeencoder_model_name = 'vaeencoder'
vaedecoder_model_name = 'vaedecoder'

### Inspecting the gRPC Endpoint

Let's check out the gRPC endpoint's model metadata.

In [None]:
import grpc
import grpc_predict_v2_pb2
import grpc_predict_v2_pb2_grpc


channel = grpc.insecure_channel(f"{grpc_host}:{grpc_port}")
stub = grpc_predict_v2_pb2_grpc.GRPCInferenceServiceStub(channel)

request = grpc_predict_v2_pb2.ModelMetadataRequest(name=textencoder_model_name)
response = stub.ModelMetadata(request)
print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=unet_model_name)
# response = stub.ModelMetadata(request)
# print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=vaeencoder_model_name)
# response = stub.ModelMetadata(request)
# print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=vaedecoder_model_name)
# response = stub.ModelMetadata(request)
# print(response)


### Request Function

Builds and submits our gRPC request.

In [None]:
import torch

def textencoder_grpc_request(input_arr):
    # request content building
    inputs = []
    inputs.append(grpc_predict_v2_pb2.ModelInferRequest().InferInputTensor())
    inputs[0].name = "input_ids"
    inputs[0].datatype = "INT32"
    inputs[0].shape.extend([1, 77])
    arr = input_arr.flatten()
    inputs[0].contents.int_contents.extend(arr)

    # request building
    request = grpc_predict_v2_pb2.ModelInferRequest()
    request.model_name = textencoder_model_name
    request.inputs.extend(inputs)

    response = stub.ModelInfer(request)
    text_embeddings = np.frombuffer(response.raw_output_contents[0], dtype=np.float32)

    return torch.tensor(text_embeddings.reshape([-1, 77, 768]))

### Run the Request

In [None]:
import numpy as np

from IPython.display import Image

text_inputs = np.array([[49406,   320,  1125,   539,   320,  8745, 11798,  1929,   525,   518,
                  2117, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
                 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
                 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
                 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
                 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
                 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
                 49407, 49407, 49407, 49407, 49407, 49407, 49407]])


text_embeddings = textencoder_grpc_request(text_inputs)

In [None]:
print(text_embeddings.dtype)
print(text_embeddings.shape)
print(text_embeddings)