Reference: https://pythontutorials.eu/deep-learning/image-classification/

## Save model to filesystem

In [1]:
from keras.applications.mobilenet_v2 import MobileNetV2

model = MobileNetV2(weights='imagenet')

2024-03-18 13:17:30.958528: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-18 13:17:30.959276: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-18 13:17:30.963567: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-18 13:17:31.011937: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# save the model in saved_model format
import tensorflow as tf
tf.saved_model.save(model, 'model/mobilenet_v2')

INFO:tensorflow:Assets written to: model/mobilenet_v2/assets


INFO:tensorflow:Assets written to: model/mobilenet_v2/assets


## Load model from FS

In [3]:
loaded_model=tf.saved_model.load('model/mobilenet_v2')


In [4]:
#inference signature
signature = loaded_model.signatures["serving_default"]

## Prepare Input 

Image `data/boef.jpg` is converted into an ndarray

In [5]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array  # Use these for image processing
from PIL import Image
import numpy as np
img = load_img('data/boef.jpg', target_size=(224, 224))  # Load and resize the image
img_array = img_to_array(img)  # Convert to NumPy array
img_array = np.expand_dims(img_array, axis=0)  # Add a batch dimension as the model expects 4D input



In [6]:
from keras.applications.mobilenet_v2 import preprocess_input
preprocessed_img_array = preprocess_input(img_array)

## Get predictions from the Model

In [7]:
result = signature(tf.convert_to_tensor(preprocessed_img_array))
print(result)
predictions = result['output_0'].numpy()

{'output_0': <tf.Tensor: shape=(1, 1000), dtype=float32, numpy=
array([[2.97425540e-05, 1.49535947e-04, 6.46366825e-05, 2.41630078e-05,
        1.23810078e-05, 4.99482594e-05, 2.09212922e-05, 5.64167494e-05,
        8.87669230e-05, 1.48241743e-05, 5.99497980e-05, 1.00155463e-04,
        9.64530955e-06, 1.46131761e-05, 2.96990474e-05, 1.02922786e-05,
        1.38889918e-05, 1.75212863e-05, 1.51923714e-05, 2.63074580e-05,
        2.23107363e-05, 1.83268639e-05, 3.39019862e-05, 4.56150156e-05,
        1.78380378e-05, 6.53904281e-05, 2.41485395e-05, 2.03414329e-05,
        4.05538522e-05, 2.68821022e-05, 2.95622776e-05, 3.07345726e-05,
        2.68368676e-05, 3.62167702e-05, 4.56950402e-05, 2.70987312e-05,
        8.97565114e-05, 1.90525625e-05, 1.92482530e-05, 1.89615148e-05,
        1.07471051e-05, 1.91563031e-05, 4.87090147e-05, 2.91436736e-05,
        8.49843491e-06, 5.47949494e-05, 2.35121934e-05, 3.94863173e-06,
        1.51520326e-05, 3.23202366e-05, 7.40126916e-06, 9.00560553e-06,


## Analyze predictions

In [8]:
output_neuron = np.argmax(predictions[0])
print('Most active neuron: {} ({:.2f}%)'.format(
    output_neuron,
    100 * predictions[0][output_neuron]
))

Most active neuron: 155 (82.17%)


In [9]:
from keras.applications.mobilenet_v2 import decode_predictions

for name, desc, score in decode_predictions(predictions)[0]:
    print('- {} ({:.2f}%)'.format(desc, 100 * score))

- Shih-Tzu (82.17%)
- Pekinese (8.71%)
- Lhasa (3.47%)
- Tibetan_terrier (0.77%)
- Old_English_sheepdog (0.28%)


## Convert input to MLServer InferenceRequest format

In [10]:
# inference request
from mlserver.types import InferenceRequest
from mlserver.codecs import NumpyCodec
import requests
inference_request = InferenceRequest(
    inputs=[NumpyCodec.encode_input(name="payload", payload=preprocessed_img_array)]
)

In [11]:
# write to file
import json
with open('inference_request.json', 'w') as f:
    f.write(json.dumps(inference_request.dict()))

## For grpc

In [19]:
import mlserver.grpc.converters as converters

inference_request_g = converters.ModelInferRequestConverter.from_types(
    inference_request, model_name="mobilenet_v2"
)

type(inference_request_g)

dataplane_pb2.ModelInferRequest

In [36]:
from google import protobuf

grpc_input = protobuf.json_format.MessageToDict(inference_request_g)

grpc_input

{'modelName': 'mobilenet_v2',
 'inputs': [{'name': 'payload',
   'datatype': 'FP32',
   'shape': ['1', '224', '224', '3'],
   'parameters': {'content_type': {'stringParam': 'np'}},
   'contents': {'fp32Contents': [-0.31764704,
     -0.16862744,
     -0.1372549,
     -0.29411763,
     -0.1372549,
     -0.082352936,
     -0.3333333,
     -0.15294117,
     -0.09019607,
     -0.6313726,
     -0.4823529,
     -0.4352941,
     -0.6392157,
     -0.5529412,
     -0.5372549,
     -0.5058824,
     -0.4588235,
     -0.49019605,
     -0.44313723,
     -0.40392154,
     -0.4588235,
     -0.44313723,
     -0.3960784,
     -0.47450978,
     -0.49019605,
     -0.45098037,
     -0.4980392,
     -0.6156863,
     -0.5686275,
     -0.58431375,
     -0.58431375,
     -0.52156866,
     -0.5058824,
     -0.60784316,
     -0.52156866,
     -0.49019605,
     -0.3333333,
     -0.21568626,
     -0.1607843,
     -0.4352941,
     -0.2862745,
     -0.23137254,
     -0.42745095,
     -0.27058822,
     -0.19999999,
 

In [37]:
# write to json file 
import json
with open('inference_request_grpc.json', 'w') as f:
    f.write(json.dumps(grpc_input))

## Sending input programmatically to MLServer 

## REST

In [29]:


inference_url="http://localhost:8080/v2/models/mobilenet_v2/infer"
print("Sending Inference Request...")
res = requests.post(inference_url, json=inference_request.dict())
print("Got Response...")

print(res.json())



Sending Inference Request...
Got Response...
{'model_name': 'mobilenet_v2', 'id': '07919d33-7bb6-4a23-a608-5ec664d5c555', 'parameters': {}, 'outputs': [{'name': 'output-0', 'shape': [1, 1000], 'datatype': 'FP32', 'parameters': {'content_type': 'np'}, 'data': [2.9742554033873603e-05, 0.00014953594654798508, 6.463668250944465e-05, 2.4163007765309885e-05, 1.2381007763906382e-05, 4.99482594022993e-05, 2.092129216180183e-05, 5.6416749430354685e-05, 8.876692299963906e-05, 1.4824174286331981e-05, 5.994979801471345e-05, 0.00010015546286012977, 9.645309546613134e-06, 1.4613176062994171e-05, 2.9699047445319593e-05, 1.0292278602719307e-05, 1.3888991816202179e-05, 1.752128628140781e-05, 1.5192371392913628e-05, 2.630745802889578e-05, 2.231073631264735e-05, 1.8326863937545568e-05, 3.3901986171258613e-05, 4.561501555144787e-05, 1.7838037820183672e-05, 6.539042806252837e-05, 2.414853952359408e-05, 2.0341432900750078e-05, 4.055385215906426e-05, 2.6882102247327566e-05, 2.956227763206698e-05, 3.073457264

## GRPC

In [33]:
import grpc
import mlserver.grpc.converters as converters
import mlserver.grpc.dataplane_pb2_grpc as dataplane
from pprint import PrettyPrinter
from mlserver.codecs import NumpyRequestCodec

pp = PrettyPrinter(indent=1)
grpc_channel = grpc.insecure_channel("localhost:8081")
grpc_stub = dataplane.GRPCInferenceServiceStub(grpc_channel)

response = grpc_stub.ModelInfer(inference_request_g)

print("full response:\n")
print(response)


full response:

model_name: "mobilenet_v2"
outputs {
  name: "output-0"
  datatype: "FP32"
  shape: 1
  shape: 1000
  parameters {
    key: "content_type"
    value {
      string_param: "np"
    }
  }
  contents {
    fp32_contents: 2.9742554e-05
    fp32_contents: 0.000149535947
    fp32_contents: 6.46366825e-05
    fp32_contents: 2.41630078e-05
    fp32_contents: 1.23810078e-05
    fp32_contents: 4.99482594e-05
    fp32_contents: 2.09212922e-05
    fp32_contents: 5.64167494e-05
    fp32_contents: 8.8766923e-05
    fp32_contents: 1.48241743e-05
    fp32_contents: 5.9949798e-05
    fp32_contents: 0.000100155463
    fp32_contents: 9.64531e-06
    fp32_contents: 1.46131761e-05
    fp32_contents: 2.96990474e-05
    fp32_contents: 1.02922786e-05
    fp32_contents: 1.38889918e-05
    fp32_contents: 1.75212863e-05
    fp32_contents: 1.51923714e-05
    fp32_contents: 2.6307458e-05
    fp32_contents: 2.23107363e-05
    fp32_contents: 1.83268639e-05
    fp32_contents: 3.39019862e-05
    fp32_c

In [35]:
# retrive output as dictionary
inference_response = converters.ModelInferResponseConverter.to_types(response)
ndarrayOutput = NumpyRequestCodec.decode_response(inference_response)
print(ndarrayOutput)


[[2.97425540e-05 1.49535947e-04 6.46366825e-05 2.41630078e-05
  1.23810078e-05 4.99482594e-05 2.09212922e-05 5.64167494e-05
  8.87669230e-05 1.48241743e-05 5.99497980e-05 1.00155463e-04
  9.64530955e-06 1.46131761e-05 2.96990474e-05 1.02922786e-05
  1.38889918e-05 1.75212863e-05 1.51923714e-05 2.63074580e-05
  2.23107363e-05 1.83268639e-05 3.39019862e-05 4.56150156e-05
  1.78380378e-05 6.53904281e-05 2.41485395e-05 2.03414329e-05
  4.05538522e-05 2.68821022e-05 2.95622776e-05 3.07345726e-05
  2.68368676e-05 3.62167702e-05 4.56950402e-05 2.70987312e-05
  8.97565114e-05 1.90525625e-05 1.92482530e-05 1.89615148e-05
  1.07471051e-05 1.91563031e-05 4.87090147e-05 2.91436736e-05
  8.49843491e-06 5.47949494e-05 2.35121934e-05 3.94863173e-06
  1.51520326e-05 3.23202366e-05 7.40126916e-06 9.00560553e-06
  2.02446481e-05 5.98887709e-05 4.56162161e-06 3.06172587e-05
  4.34249414e-05 3.10940159e-05 1.23228047e-05 1.51832910e-05
  2.39097808e-05 1.41078735e-05 2.17590386e-05 2.53568105e-05
  4.6645