# GRPC Inference

### Setup

In [1]:
!pip list | grep -e grpcio -e protobuf

grpcio                          1.56.0
grpcio-tools                    1.33.2
protobuf                        3.20.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
!pip install grpcio==1.56.0 grpcio-tools==1.33.2 protobuf==3.20.3


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
!pip list | grep -e grpcio -e protobuf

grpcio                          1.56.0
grpcio-tools                    1.33.2
protobuf                        3.20.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
grpc_host = 'modelmesh-serving'
grpc_port = 8033

textencoder_model_name = 'textencoder'
unet_model_name = 'unet'
vaeencoder_model_name = 'vaeencoder'
vaedecoder_model_name = 'vaedecoder'

### Inspecting the gRPC Endpoint

Let's check out the gRPC endpoint's model metadata.

In [5]:
import grpc
import grpc_predict_v2_pb2
import grpc_predict_v2_pb2_grpc


channel = grpc.insecure_channel(f"{grpc_host}:{grpc_port}")
stub = grpc_predict_v2_pb2_grpc.GRPCInferenceServiceStub(channel)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=textencoder_model_name)
# response = stub.ModelMetadata(request)
# print(response)

request = grpc_predict_v2_pb2.ModelMetadataRequest(name=unet_model_name)
response = stub.ModelMetadata(request)
print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=vaeencoder_model_name)
# response = stub.ModelMetadata(request)
# print(response)

# request = grpc_predict_v2_pb2.ModelMetadataRequest(name=vaedecoder_model_name)
# response = stub.ModelMetadata(request)
# print(response)


name: "unet__isvc-78cd96e258"
versions: "1"
platform: "onnxruntime_onnx"
inputs {
  name: "encoder_hidden_states"
  datatype: "FP32"
  shape: -1
  shape: -1
  shape: 1024
}
inputs {
  name: "timestep"
  datatype: "INT64"
  shape: -1
  shape: 1
}
inputs {
  name: "sample"
  datatype: "FP32"
  shape: -1
  shape: -1
  shape: -1
  shape: -1
}
outputs {
  name: "out_sample"
  datatype: "FP32"
  shape: -1
  shape: -1
  shape: -1
  shape: -1
}



### Request Function

Builds and submits our gRPC request.

In [6]:
import torch

def unet_grpc_request(encoder_hidden_states, timestep, sample):
    inputs = []
    inputs.append(grpc_predict_v2_pb2.ModelInferRequest().InferInputTensor())
    inputs[0].name = "encoder_hidden_states"
    inputs[0].datatype = "FP32"
    inputs[0].shape.extend([2, 77, 1024])
    arr = encoder_hidden_states.flatten()
    inputs[0].contents.fp32_contents.extend(arr)

    inputs.append(grpc_predict_v2_pb2.ModelInferRequest().InferInputTensor())
    inputs[1].name = "timestep"
    inputs[1].datatype = "INT64"
    inputs[1].shape.extend([2, 1])
    arr = timestep.flatten()
    inputs[1].contents.int64_contents.extend(arr)

    inputs.append(grpc_predict_v2_pb2.ModelInferRequest().InferInputTensor())
    inputs[2].name = "sample"
    inputs[2].datatype = "FP32"
    inputs[2].shape.extend([2, 4, 64, 64])
    arr = sample.flatten()
    inputs[2].contents.fp32_contents.extend(arr)

    request = grpc_predict_v2_pb2.ModelInferRequest()
    request.model_name = unet_model_name
    request.inputs.extend(inputs)

    response = stub.ModelInfer(request)
    out_sample = np.frombuffer(response.raw_output_contents[0], dtype=np.float32)
    print(out_sample.dtype)
    print(out_sample.shape)
    print(out_sample)
    return torch.tensor(out_sample)    
    # return torch.tensor(out_sample.reshape([-1, 4, 64, 64]))

### Run the Request

In [7]:
import numpy as np
import torch

from IPython.display import Image

latent_model_input = np.load("latent_model_input.npy")
print(latent_model_input.dtype)
print(latent_model_input.shape)
print(latent_model_input)

encoder_hidden_states = np.load("encoder_hidden_states.npy")
print(encoder_hidden_states.dtype)
print(encoder_hidden_states.shape)
print(encoder_hidden_states)

timestep = np.load("t.npy")
print(timestep.dtype)
print(timestep.shape)
print(timestep)

float32
(2, 4, 64, 64)
[[[[-0.8897597  -0.81190264 -0.5321495  ... -0.8871557   1.2300298
    -1.2197485 ]
   [-0.4000458   0.21738213  0.317678   ...  0.13193801  0.8565978
     0.9220387 ]
   [-0.67728937 -0.48643437 -0.70665723 ... -0.6259676  -0.61032146
    -0.50187075]
   ...
   [-0.8353103  -0.45297387 -1.6257945  ... -1.2519588  -0.49927092
    -0.3828153 ]
   [-0.31360683 -0.6438074  -1.1611359  ... -0.455653   -0.5686751
    -0.3175882 ]
   [-0.61284566 -1.1918628  -1.1147459  ... -1.4896585  -1.164741
    -0.70370245]]

  [[ 0.09438869 -0.29805583 -0.48049027 ... -0.07705937 -0.1229637
     0.6576069 ]
   [-0.654859   -0.77021813 -0.82511634 ...  0.24248192  0.32008582
     0.985103  ]
   [-0.0546063  -0.34833267  0.24034634 ... -0.4311598  -0.9319254
    -0.30207822]
   ...
   [-1.2818838  -0.08484254 -0.50462127 ... -0.26499438 -0.88069105
    -0.24047701]
   [-1.4174095  -0.15417747 -0.02103706 ... -0.44481596 -0.33733866
    -0.2445958 ]
   [-0.8281576  -0.06538612 -0.60

In [8]:
import torch
import numpy as np


out_sample = unet_grpc_request(encoder_hidden_states, torch.tensor([1, 1]), latent_model_input)

float32
(32768,)
[ 0.01648445 -0.3543526  -0.0860291  ...  0.2566524  -0.10423775
 -0.2969734 ]


In [9]:
print(out_sample.dtype)
print(out_sample.shape)
print(out_sample)

torch.float32
torch.Size([32768])
tensor([ 0.0165, -0.3544, -0.0860,  ...,  0.2567, -0.1042, -0.2970])
