In [None]:
# VGG16 in tensorflow lite

In [13]:
import tensorflow as tf
import timeit
import sys
import psutil
import time
import numpy as np

from keras.applications.vgg16 import VGG16
from keras.utils import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import decode_predictions

In [14]:
number_of_samples = 1

# returns an ndarray
def preprocess(image):
    image = load_img(image, target_size=(224, 224))
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)
    return image

# yhat is np.array((1,1000), dtype="float32")
def postprocess(yhat, outputname):
    yhat = yhat[outputname]
    # convert the probabilities to class labels
    label = decode_predictions(yhat, 3)
    for i in range(3):
        print(label[0][i])

# returns key1 in {key1: value1, key2: value2 ...}
def get_first_key(dictionary):
    for key in dictionary:
        return key
    return None

# returns a tuple (runner, inputname, outputname)
def get_runner_and_io_names(interpreter):
    # signature list is of the form {key: {inputs: value, outputs: values}}
    signature_dict = interpreter.get_signature_list()
    first_key = get_first_key(signature_dict)
    runner = interpreter.get_signature_runner(first_key)
    io_dict = signature_dict[first_key]
    inputname = io_dict['inputs'][0]
    outputname = io_dict['outputs'][0]
    return (runner, inputname, outputname)

# tflite based forward inference
def forward(image_array, runner, inputname):
    output = runner(input_1=image_array)
    return output

In [None]:
tf.debugging.set_log_device_placement(True)
interpreter = tf.lite.Interpreter(model_path="vgg16.tflite")
interpreter.allocate_tensors()

In [None]:
start_time = time.time()

interpreter = tf.lite.Interpreter(model_path="vgg16.tflite")
mem_occupied_bfr_alloc = psutil.Process().memory_info().rss / (1024 * 1024)
allocation_time = timeit.timeit(lambda: interpreter.allocate_tensors(), number=1)
mem_occupied_aft_alloc = psutil.Process().memory_info().rss / (1024 * 1024)
runner, inputname, outputname = get_runner_and_io_names(interpreter)
preprocess_time = timeit.timeit(lambda: preprocess('images/mug.jpg'), number=number_of_samples)

image_array = preprocess('images/mug.jpg')
inference_time = timeit.timeit(lambda: forward(image_array, runner, inputname), number=number_of_samples)
output = forward(image_array, runner, inputname)
post_time = timeit.timeit(lambda: postprocess(output, outputname), number=number_of_samples)
postprocess(output, outputname)
resident_mem_occupied = psutil.Process().memory_info().rss / (1024 * 1024)

end_time = time.time()

print(f"Number of samples: {number_of_samples}")
print(f"Run time: {end_time - start_time}s")
print()
print(f"== Time ==")
print(f"allocate_tensors: {allocation_time}s")
print(f"preprocess: {preprocess_time/number_of_samples}s")
print(f"inference: {inference_time/number_of_samples}s")
print(f"postprocess: {post_time/number_of_samples}s")
print()
print(f"== Memory ==")
print(f"total memory occupied: {resident_mem_occupied} MB")
print(f"memory occupied before alloc: {mem_occupied_bfr_alloc} MB")
print(f"memory occupied after alloc: {mem_occupied_aft_alloc} MB")
print(f"memory occupied by model: {mem_occupied_aft_alloc - mem_occupied_bfr_alloc} MB")

In [None]:
runner, inputname, outputname = get_runner_and_io_names(interpreter)
image_array = preprocess('images/mug.jpg')
output = forward(image_array, runner, inputname)
postprocess(output, outputname)