In [None]:
import urllib, os, sys, zipfile
from os.path import dirname

import numpy as np

import tensorflow as tf
from tensorflow.core.framework import graph_pb2

In [None]:
with open("./mobilenet_ssd_iot2040/graph.pb", 'rb') as f:
    print("Reading model from disk...")
    serialized = f.read()

tf.reset_default_graph()
original_gdef = tf.GraphDef()
original_gdef.ParseFromString(serialized)

with tf.Graph().as_default() as g:
    print("Importing model in TF...")
    tf.import_graph_def(original_gdef, name='')

print("Done.")

Strip unused subgraphs and save it as another frozen TF model

In [None]:
from tensorflow.python.tools import strip_unused_lib
from tensorflow.python.framework import dtypes
from tensorflow.python.platform import gfile

In [None]:
gdef = strip_unused_lib.strip_unused(
    input_graph_def = original_gdef,
    input_node_names = ['Preprocessor/sub'],
    output_node_names = ['concat', 'concat_1'],
    placeholder_type_enum = dtypes.float32.as_datatype_enum
)

Save the feature extractor to an output file

In [None]:
frozen_model_file = "./mobilenet_ssd_iot2040/feature_extractor.pb"
with gfile.GFile(frozen_model_file, "wb") as f:
    f.write(gdef.SerializeToString())

Now we have a TF model ready to be converted to CoreML

In [None]:
import tfcoreml

# Call the converter. This may take a while
coreml_model = tfcoreml.convert(
#     red_bias = -1, 
#     green_bias = -1, 
#     blue_bias = -1, 
#     image_scale = 2.0/255.0,
#     image_input_names = "Preprocessor/sub:0",
    
    tf_model_path         = frozen_model_file,
    mlmodel_path          = "./ssd_mobilenet_iot2040.mlmodel",
    input_name_shape_dict = {"Preprocessor/sub:0":[1,300,300,3]}, # batch size is 1,
    output_feature_names  = ['concat:0', 'concat_1:0']
)

Now that we have converted the model to CoreML, we can test its numerical correctness by comparing it with TensorFlow model.

In [None]:
import PIL.Image
import requests
from io import BytesIO
from matplotlib.pyplot import imshow
img = PIL.Image.open("mobilenet_ssd_iot2040_v1/test_image.jpg")
imshow(np.asarray(img))

In [None]:
# Preprocess the image - normalize to [-1,1]
img = img.resize([300,300], PIL.Image.ANTIALIAS)
img_array = np.array(img).astype(np.float32) * 2.0 / 255 - 1
batch_img_array = img_array[None,:,:,:]

# Evaluate TF
tf.reset_default_graph()
g = tf.import_graph_def(gdef) # gdef – stripped model

tf_input_name = 'Preprocessor/sub:0'
tf_output_names = ['concat:0', 'concat_1:0']
# concat:0 are the bounding-box encodings of the 1917 anchor boxes
# concat_1:0 are the confidence scores of 91 classes of anchor boxes
with tf.Session(graph = g) as sess:
    image_input_tensor = sess.graph.get_tensor_by_name("import/" + tf_input_name)
    tf_output_tensors = [sess.graph.get_tensor_by_name("import/" + output_name)
                             for output_name in tf_output_names]
    tf_output_values = sess.run(
        tf_output_tensors, 
        feed_dict={image_input_tensor: batch_img_array}
    )
    tf_box_encodings, tf_scores = tf_output_values

In [None]:
print(tf_box_encodings.shape, tf_scores.shape)

In [None]:
tf_scores[0, 0, :]

Now we evaluate CoreML model and compare result against TensorFlow model. CoreML uses 5D arrays to represent rank-1 to rank-5 tensors. The 5 axes are in the order of (S,B,C,H,W), where S is sequence length, B is batch size, C is number of channels, H is height and W is width. This data layout is usually different from TensorFlow's default layout, where a rank-4 tensor for convolutional nets usually uses (B,H,W,C) layout. To make a comparison, one of the result should be transposed.

In [None]:
import coremltools

#  CoreML expects input shape of [1, 1, 3, 300, 300]
print("Preparing image input for CoreML...")
img_array_coreml = np.transpose(img_array, (2,0,1))[None,None,:,:,:]
print("Importing model into CoreML...")
mlmodel = coremltools.models.MLModel("./ssd_mobilenet_iot2040.mlmodel")

# Pay attention to '__0'. We change ':0' to '__0' to make sure MLModel's 
# generated Swift/Obj-C code is semantically correct
coreml_input_name = tf_input_name.replace(':', '__').replace('/', '__')
coreml_output_names = [output_name.replace(':', '__').replace('/', '__') 
                       for output_name in tf_output_names]
coreml_input = {coreml_input_name: img_array_coreml}

# When useCPUOnly == True, Relative error should be around 0.001
# When useCPUOnly == False on GPU enabled devices, relative errors 
# are expected to be larger due to utilization of lower-precision arithmetics
print("Predicting with CoreML...")
coreml_outputs_dict = mlmodel.predict(coreml_input, useCPUOnly=True)
coreml_outputs = [coreml_outputs_dict[out_name] for out_name in coreml_output_names]
coreml_box_encodings, coreml_scores = coreml_outputs
print("Done")

In [None]:
print(coreml_box_encodings.shape, coreml_scores.shape)
print(coreml_scores[0, 0, :, 0, 0])

In [None]:
# Now we compare the differences of two results
def max_relative_error(x, y):
    den = np.maximum(x, y)
    den = np.maximum(den, 1)
    rel_err = (np.abs(x-y)) / den
    return np.max(rel_err)

rel_error_box = max_relative_error(
    coreml_box_encodings.squeeze(), 
    np.transpose(tf_box_encodings.squeeze(), (1,0))
)
rel_error_score = max_relative_error(
    coreml_scores.squeeze(), 
    np.transpose(tf_scores.squeeze(),(1,0))
)

In [None]:
print('Max relative error on box encoding: %f' %(rel_error_box))
print('Max relative error on scores: %f' %(rel_error_score))