## Imports

Import things needed for Tensorflow and CoreML

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __builtin__ import any as b_any

import math
import os
os.environ["CUDA_VISIBLE_DEVICES"]=""
import numpy as np
from PIL import Image

import tensorflow as tf

import configuration
import inference_wrapper
import sys
sys.path.insert(0, 'im2txt/inference_utils')
sys.path.insert(0, 'im2txt/ops')
import caption_generator
import image_processing
import vocabulary

import urllib, os, sys, zipfile
from os.path import dirname
from tensorflow.core.framework import graph_pb2
from tensorflow.python.tools.freeze_graph import freeze_graph
from tensorflow.python.tools import strip_unused_lib
from tensorflow.python.framework import dtypes
from tensorflow.python.platform import gfile
import tfcoreml
import configuration
from coremltools.proto import NeuralNetwork_pb2

In [2]:
# Turn on debugging on error
%pdb on

Automatic pdb calling has been turned ON


## Create the models

Create the Tensorflow model and strip all unused nodes

In [3]:
checkpoint_file = './trainlogIncNEW/model.ckpt'
pre_frozen_model_file = './frozen_model_textgenNEW.pb'
frozen_model_file = './frozen_model_textgenNEW.pb'

# Which nodes we want to input for the network
# Use ['image_feed'] for just Memeception
input_node_names = ['seq_embeddings','lstm/state_feed']

# Which nodes we want to output from the network
# Use ['lstm/initial_state'] for just Memeception
output_node_names = ['softmax','lstm/state']

# Set the depth of the beam search
beam_size = 2

In [4]:
# Build the inference graph.

g = tf.Graph()
with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               checkpoint_file)
g.finalize()

INFO:tensorflow:Building model.
About to decide if splitting
splitting mat mul
(1, 812)
Tensor("lstm/basic_lstm_cell/concat:0", shape=(1, 812), dtype=float32)
(812, 2048)
<tf.Variable 'lstm/basic_lstm_cell/kernel:0' shape=(812, 2048) dtype=float32_ref>
___
FIRST (1, 2048)
{'num_or_size_splits': 4, 'value': <tf.Tensor 'lstm/basic_lstm_cell/BiasAdd:0' shape=(1, 2048) dtype=float32>, 'axis': 1}
new_h Tensor("lstm/basic_lstm_cell/Mul_2:0", shape=(1, 512), dtype=float32)
new_state LSTMStateTuple(c=<tf.Tensor 'lstm/basic_lstm_cell/Add_1:0' shape=(1, 512) dtype=float32>, h=<tf.Tensor 'lstm/basic_lstm_cell/Mul_2:0' shape=(1, 512) dtype=float32>)
About to decide if splitting
splitting mat mul
(2, 812)
Tensor("lstm/basic_lstm_cell/Squeeze_8:0", shape=(2, 812), dtype=float32)
(812, 2048)
<tf.Variable 'lstm/basic_lstm_cell/kernel:0' shape=(812, 2048) dtype=float32_ref>
___
('NUMSPLITS', 2)
{'num_or_size_splits': 2, 'value': <tf.Tensor 'lstm/basic_lstm_cell/Squeeze_8:0' shape=(2, 812) dtype=float32

In [5]:
# Write the graph

tf_model_path = './log/pre_graph_textgenNEW.pb'
tf.train.write_graph(
    g,
    './log',
    'pre_graph_textgenNEW.pb',
    as_text=False,
)

with open(tf_model_path, 'rb') as f:
    serialized = f.read()
tf.reset_default_graph()
original_gdef = tf.GraphDef()
original_gdef.ParseFromString(serialized)

47087360

In [6]:
# Strip unused graph elements and serialize the output to file

gdef = strip_unused_lib.strip_unused(
        input_graph_def = original_gdef,
        input_node_names = input_node_names,
        output_node_names = output_node_names,
        placeholder_type_enum = dtypes.float32.as_datatype_enum)
# Save it to an output file
with gfile.GFile(pre_frozen_model_file, 'wb') as f:
    f.write(gdef.SerializeToString())

In [7]:
# Freeze the graph with checkpoint data inside

freeze_graph(input_graph=pre_frozen_model_file,
             input_saver='',
             input_binary=True,
             input_checkpoint=checkpoint_file,
             output_node_names=','.join(output_node_names),
             restore_op_name='save/restore_all',
             filename_tensor_name='save/Const:0',
             output_graph=frozen_model_file,
             clear_devices=True,
             initializer_nodes='')

INFO:tensorflow:Restoring parameters from ./trainlogIncNEW/model.ckpt
INFO:tensorflow:Froze 4 variables.
Converted 4 variables to const ops.


## Verify the model

Check that it is producing legit captions for *One does not simply*

In [8]:
# Configure the model and load the vocab

config = configuration.ModelConfig()

vocab_file ='vocab4.txt'
vocab = vocabulary.Vocabulary(vocab_file)

INFO:tensorflow:Initializing vocabulary from file: vocab4.txt
INFO:tensorflow:Created vocabulary with 38521 words


In [9]:
# Generate captions on a hard-coded image

with tf.Session(graph=g) as sess:
  restore_fn(sess)
  generator = caption_generator.CaptionGenerator(
      model, vocab, beam_size=beam_size)
  for i,filename in enumerate(['memes/one-does-not-simply.jpg']):
    with tf.gfile.GFile(filename, "rb") as f:
      image = Image.open(f)
      image = ((np.array(image.resize((299,299)))/255.0)-0.5)*2.0
    for k in range(5):
      captions = generator.beam_search(sess, image)    
      for i, caption in enumerate(captions):
        sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence)
        print(sentence)

INFO:tensorflow:Loading model from checkpoint: ./trainlogIncNEW/model.ckpt
INFO:tensorflow:Restoring parameters from ./trainlogIncNEW/model.ckpt
INFO:tensorflow:Successfully loaded checkpoint: model.ckpt
one does not simply take the top of the head
one does not simply take the top of the head to mordor
one does not simply go to the toilet
one does not simply go to the canteen
one does not simply get a letter
one does not simply get a straight guy
one does not simply put a trojan on his pc
one does not simply put a trojan on his head
one does not simply walk into mordor at 7
one does not simply walk into mordor at the bar


## Convert the model to CoreML

Specify output variables from the graph to be used

In [10]:
# Define basic shapes
# If using Memeception, add 'image_feed:0': [299, 299, 3]
input_tensor_shapes = {
    'seq_embeddings:0': [1, beam_size, 300],
    'lstm/state_feed:0': [1, beam_size, 1024],
}

coreml_model_file = './Textgen_NEW.mlmodel'

In [11]:
output_tensor_names = [node + ':0' for node in output_node_names]

In [12]:
coreml_model = tfcoreml.convert(
        tf_model_path=frozen_model_file, 
        mlmodel_path=coreml_model_file, 
        input_name_shape_dict=input_tensor_shapes,
        output_feature_names=output_tensor_names,
        add_custom_layers=True,
)

Shapes not found for 69 tensors. Executing graph to determine shapes. 
Automatic shape interpretation succeeded for input blob lstm/state_feed:0
1/107: Analysing op name: seq_embeddings ( type:  Placeholder )
Skipping name of placeholder
2/107: Analysing op name: lstm/basic_lstm_cell/kernel ( type:  Const )
3/107: Analysing op name: lstm/basic_lstm_cell/kernel/read ( type:  Identity )
4/107: Analysing op name: lstm/basic_lstm_cell/bias ( type:  Const )
5/107: Analysing op name: lstm/basic_lstm_cell/bias/read ( type:  Identity )
6/107: Analysing op name: lstm/state_feed ( type:  Placeholder )
Skipping name of placeholder
7/107: Analysing op name: lstm/ExpandDims/dim ( type:  Const )
8/107: Analysing op name: lstm/ExpandDims ( type:  ExpandDims )
9/107: Analysing op name: lstm/split/split_dim ( type:  Const )
10/107: Analysing op name: lstm/split ( type:  Split )
11/107: Analysing op name: lstm/Squeeze ( type:  Squeeze )
12/107: Analysing op name: lstm/Squeeze_1 ( type:  Squeeze )
13/107

## Test the model

Run a predictable randomly seeded inputs through and see where the disparities are

In [13]:
seq_rand = np.random.rand(300)
seq_embeddings_tf = np.array([[seq_rand, seq_rand]])
seq_embeddings_ml = np.array([[[sr, sr]] for sr in seq_rand])

state_rand = np.random.rand(1024)
state_feed_tf = np.array([[state_rand, state_rand]])
state_feed_ml = np.array([[[sr, sr]] for sr in state_rand])

In [14]:
coreml_inputs = {
    'seq_embeddings__0': seq_embeddings_ml,
    'lstm__state_feed__0': state_feed_ml,
}
coreml_output = coreml_model.predict(coreml_inputs, useCPUOnly=True)
# print(coreml_output['lstm__state__0'].shape)
# print(coreml_output['softmax__0'].shape)
# print(coreml_output['softmax__0'].reshape(38521, 1, 2))
# print(coreml_output)
def print_ml(ml):
    for key in sorted(ml.keys()):
        print(key)
        print(ml[key].shape)
        print(ml[key])
print_ml(coreml_output)

lstm__state__0
(812, 1, 1024, 1, 2)
[[[[[ 1.28964019  0.9222219 ]]

   [[ 1.15617633  0.58406985]]

   [[ 0.49004108  0.16452032]]

   ...

   [[ 0.05512157 -0.04562627]]

   [[ 0.26553747  0.14197578]]

   [[ 0.06053435  0.03637091]]]]



 [[[[ 0.9222219   0.9222219 ]]

   [[ 0.58406985  0.58406985]]

   [[ 0.16452032  0.16452032]]

   ...

   [[-0.04562627 -0.04562627]]

   [[ 0.14197578  0.14197578]]

   [[ 0.03637091  0.03637091]]]]



 [[[[ 0.9222219   0.9222219 ]]

   [[ 0.58406985  0.58406985]]

   [[ 0.16452032  0.16452032]]

   ...

   [[-0.04562627 -0.04562627]]

   [[ 0.14197578  0.14197578]]

   [[ 0.03637091  0.03637091]]]]



 ...



 [[[[ 0.9222219   0.9222219 ]]

   [[ 0.58406985  0.58406985]]

   [[ 0.16452032  0.16452032]]

   ...

   [[-0.04562627 -0.04562627]]

   [[ 0.14197578  0.14197578]]

   [[ 0.03637091  0.03637091]]]]



 [[[[ 0.9222219   0.9222219 ]]

   [[ 0.58406985  0.58406985]]

   [[ 0.16452032  0.16452032]]

   ...

   [[-0.04562627 -0.04562627]]

   [

In [19]:
with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)
    input_names = ['lstm/state:0', 'softmax:0']
    output_values = sess.run(
        fetches=input_names,
        feed_dict={
            #"input_feed:0": input_feed,
            "lstm/state_feed:0": state_feed_tf,
            "seq_embeddings:0": seq_embeddings_tf,
            #"seq_embedding/embedding_map:0": self.embedding_map
        })
    for (index, value) in sorted(enumerate(input_names), key=lambda x: x[1]):
        print(value)
        print(output_values[index].shape)
        print(output_values[index])

INFO:tensorflow:Loading model from checkpoint: ./trainlogIncNEW/model.ckpt
INFO:tensorflow:Restoring parameters from ./trainlogIncNEW/model.ckpt
INFO:tensorflow:Successfully loaded checkpoint: model.ckpt
lstm/state:0
(1, 2, 1024)
[[[ 1.1081882   0.13651441 -0.8467241  ...  0.01476664  0.15899816
    0.591206  ]
  [ 1.1081882   0.13651441 -0.8467241  ...  0.01476664  0.15899816
    0.591206  ]]]
softmax:0
(2, 1, 38521)
[[[1.5861393e-05 8.5379388e-03 6.5156026e-03 ... 6.5134992e-03
   9.9487688e-06 6.8167374e-02]]

 [[1.5861393e-05 8.5379388e-03 6.5156026e-03 ... 6.5134992e-03
   9.9487688e-06 6.8167374e-02]]]
