In [1]:
import numpy as np
import PIL.Image

import tensorflow as tf
import coremltools as ct



In [2]:
Height = 320  
Width = 320

def load_image(path, resize_to=None):
    # resize_to: (Width, Height)
    img = PIL.Image.open(path)
    if resize_to is not None:
        img = img.resize(resize_to, PIL.Image.ANTIALIAS)
    img_np = np.array(img).astype(np.float32)
    return img_np, img

In [3]:
img_np, img = load_image('test_images/image2.jpg', resize_to=(Width, Height))

model = ct.models.MLModel('MobileDet_4_outputs.mlmodel')
out_dict = model.predict({'image': np.expand_dims((img_np/127.5 - 1.0), 0)})

In [4]:
# run with 2 outputs model verified with Xcode model preview

model_2 = ct.models.MLModel('MobileDet.mlmodel')
out_dict_2 = model_2.predict({'image': img})

In [5]:
# check if bounding boxes match
out_dict['coordinates'], out_dict_2['coordinates']

(array([[[0.35302734, 0.38208008, 0.01686096, 0.02557373],
         [0.4338379 , 0.39282227, 0.01980591, 0.0231781 ],
         [0.06713867, 0.5942383 , 0.02168274, 0.0713501 ],
         [0.21789551, 0.28466797, 0.02227783, 0.05194092],
         [0.39697266, 0.5805664 , 0.01860046, 0.02972412],
         [0.46655273, 0.12683105, 0.06091309, 0.08190918],
         [0.02911377, 0.5961914 , 0.01695251, 0.05224609],
         [0.17797852, 0.8574219 , 0.0453186 , 0.17919922],
         [0.10083008, 0.76171875, 0.03689575, 0.15405273],
         [0.5620117 , 0.42919922, 0.01029205, 0.01647949]]], dtype=float32),
 array([[0.35302734, 0.38208008, 0.01686096, 0.02563477],
        [0.4338379 , 0.39282227, 0.01980591, 0.0231781 ],
        [0.06713867, 0.5942383 , 0.02168274, 0.0713501 ],
        [0.21789551, 0.28466797, 0.02224731, 0.05194092],
        [0.39697266, 0.5805664 , 0.01856995, 0.02978516],
        [0.46655273, 0.12683105, 0.06088257, 0.08190918],
        [0.02911377, 0.5961914 , 0.01695251,

In [6]:
# Inference with TF saved

original_model = tf.saved_model.load('mobiledet')
input_nodes = ['image_tensor:0']
output_nodes = ['detection_boxes:0', 'detection_classes:0', 'detection_scores:0', 'num_detections:0']

p = original_model.prune(input_nodes, output_nodes)

img_tensor = tf.expand_dims(tf.convert_to_tensor(np.asarray(img), dtype=tf.uint8), 0)
p(img_tensor)

[<tf.Tensor: shape=(1, 10, 4), dtype=float32, numpy=
 array([[[0.36930528, 0.3446542 , 0.3948696 , 0.36154133],
         [0.38126433, 0.42392153, 0.40443414, 0.44371176],
         [0.55859756, 0.05629784, 0.62990713, 0.07799782],
         [0.258843  , 0.20680143, 0.3107981 , 0.22903194],
         [0.56560236, 0.38757455, 0.59533376, 0.40617454],
         [0.08592385, 0.4358106 , 0.16783425, 0.49678382],
         [0.5697846 , 0.02065526, 0.6220425 , 0.03757773],
         [0.26443386, 0.2014876 , 0.30679524, 0.22032481],
         [0.7677408 , 0.15532103, 0.9469773 , 0.20065379],
         [0.6850095 , 0.08234983, 0.8390647 , 0.11924426]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 10), dtype=float32, numpy=array([[38., 38.,  1., 38.,  1., 38.,  1., 38.,  1.,  1.]], dtype=float32)>,
 <tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[0.74776036, 0.7410878 , 0.71043926, 0.7037864 , 0.69625336,
         0.6919317 , 0.68733567, 0.6803323 , 0.6753018 , 0.621736  ]],
       dtype=float32)>

In [7]:
# to get the classes of boxes 

tf.math.argmax(out_dict['confidence'], 2)

<tf.Tensor: shape=(1, 10), dtype=int64, numpy=array([[38, 38,  1, 38,  1, 38,  1,  1,  1, 38]])>

In [8]:
# to get bounding boxes
# TensorFlow/TF Lite (y1, x1, y2, x2)
# Core ML (x, y, x_len, y_len)

for box in out_dict['coordinates'][0]:
    x1 = box[0] - box [2] / 2
    y1 = box[1] - box [3] / 2
    x2 = box[0] + box [2] / 2
    y2 = box[1] + box [3] / 2
    print([y1, x1, y2, x2])

[0.369293212890625, 0.34459686279296875, 0.394866943359375, 0.36145782470703125]
[0.38123321533203125, 0.4239349365234375, 0.40441131591796875, 0.4437408447265625]
[0.558563232421875, 0.05629730224609375, 0.629913330078125, 0.07798004150390625]
[0.258697509765625, 0.206756591796875, 0.310638427734375, 0.229034423828125]
[0.565704345703125, 0.38767242431640625, 0.595428466796875, 0.40627288818359375]
[0.08587646484375, 0.43609619140625, 0.16778564453125, 0.49700927734375]
[0.570068359375, 0.02063751220703125, 0.622314453125, 0.03759002685546875]
[0.767822265625, 0.1553192138671875, 0.947021484375, 0.2006378173828125]
[0.6846923828125, 0.0823822021484375, 0.8387451171875, 0.1192779541015625]
[0.42095947265625, 0.5568656921386719, 0.43743896484375, 0.5671577453613281]
