## Imports

Import things needed for Tensorflow and CoreML

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __builtin__ import any as b_any

import math
import os
os.environ["CUDA_VISIBLE_DEVICES"]=""
import numpy as np
from PIL import Image

import tensorflow as tf

import configuration
import inference_wrapper
import sys
sys.path.insert(0, 'im2txt/inference_utils')
sys.path.insert(0, 'im2txt/ops')
import caption_generator
import image_processing
import vocabulary

import urllib, os, sys, zipfile
from os.path import dirname
from tensorflow.core.framework import graph_pb2
from tensorflow.python.tools.freeze_graph import freeze_graph
from tensorflow.python.tools import strip_unused_lib
from tensorflow.python.framework import dtypes
from tensorflow.python.platform import gfile
import tfcoreml
import configuration
from coremltools.proto import NeuralNetwork_pb2

In [2]:
# Turn on debugging on error
%pdb off

Automatic pdb calling has been turned OFF


## Create the models

Create the Tensorflow model and strip all unused nodes

In [3]:
checkpoint_file = './trainlogIncNEW/model.ckpt-1000000'
pre_frozen_model_file = './frozen_model_textgenCUSTOM.pb'
frozen_model_file = './frozen_model_textgenCUSTOM.pb'

# Which nodes we want to input for the network
# Use ['image_feed'] for just Memeception
input_node_names = ['seq_embeddings','lstm/state_feed']

# Which nodes we want to output from the network
# Use ['lstm/initial_state'] for just Memeception
output_node_names = ['softmax','lstm/state']

# Set the depth of the beam search
beam_size = 2

In [4]:
# Build the inference graph.

g = tf.Graph()
with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               checkpoint_file)
g.finalize()

INFO:tensorflow:Building model.
About to decide if splitting
FIRST (1, 2048)
{'num_or_size_splits': 4, 'value': <tf.Tensor 'lstm/basic_lstm_cell/BiasAdd:0' shape=(1, 2048) dtype=float32>, 'axis': 1}
new_h Tensor("lstm/basic_lstm_cell/Mul_2:0", shape=(1, 512), dtype=float32)
new_state LSTMStateTuple(c=<tf.Tensor 'lstm/basic_lstm_cell/Add_1:0' shape=(1, 512) dtype=float32>, h=<tf.Tensor 'lstm/basic_lstm_cell/Mul_2:0' shape=(1, 512) dtype=float32>)
About to decide if splitting
SECOND (2, 2048)
{'num_or_size_splits': 4, 'value': <tf.Tensor 'lstm/basic_lstm_cell/BiasAdd_1:0' shape=(2, 2048) dtype=float32>, 'axis': 1}
new_h Tensor("lstm/basic_lstm_cell/Mul_5:0", shape=(1, 2, 512), dtype=float32)
new_state LSTMStateTuple(c=<tf.Tensor 'lstm/basic_lstm_cell/Add_3:0' shape=(1, 2, 512) dtype=float32>, h=<tf.Tensor 'lstm/basic_lstm_cell/Mul_5:0' shape=(1, 2, 512) dtype=float32>)
lstm_outputs Tensor("lstm/basic_lstm_cell/Mul_5:0", shape=(1, 2, 512), dtype=float32)
state_output LSTMStateTuple(c=<tf.

In [5]:
# Write the graph

tf_model_path = './log/pre_graph_textgenCUSTOM.pb'
tf.train.write_graph(
    g,
    './log',
    'pre_graph_textgenCUSTOM.pb',
    as_text=False,
)

with open(tf_model_path, 'rb') as f:
    serialized = f.read()
tf.reset_default_graph()
original_gdef = tf.GraphDef()
original_gdef.ParseFromString(serialized)

47082275

In [6]:
# Strip unused graph elements and serialize the output to file

gdef = strip_unused_lib.strip_unused(
        input_graph_def = original_gdef,
        input_node_names = input_node_names,
        output_node_names = output_node_names,
        placeholder_type_enum = dtypes.float32.as_datatype_enum)
# Save it to an output file
with gfile.GFile(pre_frozen_model_file, 'wb') as f:
    f.write(gdef.SerializeToString())

In [7]:
# Freeze the graph with checkpoint data inside

freeze_graph(input_graph=pre_frozen_model_file,
             input_saver='',
             input_binary=True,
             input_checkpoint=checkpoint_file,
             output_node_names=','.join(output_node_names),
             restore_op_name='save/restore_all',
             filename_tensor_name='save/Const:0',
             output_graph=frozen_model_file,
             clear_devices=True,
             initializer_nodes='')

INFO:tensorflow:Restoring parameters from ./trainlogIncNEW/model.ckpt-1000000
INFO:tensorflow:Froze 4 variables.
Converted 4 variables to const ops.


## Verify the model

Check that it is producing legit captions for *One does not simply*

In [8]:
# Configure the model and load the vocab

config = configuration.ModelConfig()

vocab_file ='vocab4.txt'
vocab = vocabulary.Vocabulary(vocab_file)

INFO:tensorflow:Initializing vocabulary from file: vocab4.txt
INFO:tensorflow:Created vocabulary with 38521 words


In [9]:
# Generate captions on a hard-coded image

with tf.Session(graph=g) as sess:
  restore_fn(sess)
  generator = caption_generator.CaptionGenerator(
      model, vocab, beam_size=beam_size)
  for i,filename in enumerate(['memes/advice-god.jpg']):
    with tf.gfile.GFile(filename, "rb") as f:
      image = Image.open(f)
      image = ((np.array(image.resize((299,299)))/255.0)-0.5)*2.0
    for k in range(50):
      captions = generator.beam_search(sess, image)    
      for i, caption in enumerate(captions):
        sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence)
        print(sentence)

INFO:tensorflow:Loading model from checkpoint: ./trainlogIncNEW/model.ckpt-1000000
INFO:tensorflow:Restoring parameters from ./trainlogIncNEW/model.ckpt-1000000
INFO:tensorflow:Successfully loaded checkpoint: model.ckpt-1000000
god is the biggest badass
god is the biggest badass of the world
creates all the male character in the workplace
creates all the male character who wants to be able to be able to know what to do


KeyboardInterrupt: 

## Convert the model to CoreML

Specify output variables from the graph to be used

In [10]:
# Define basic shapes
# If using Memeception, add 'image_feed:0': [299, 299, 3]
input_tensor_shapes = {
    'seq_embeddings:0': [1, beam_size, 300],
    'lstm/state_feed:0': [1, beam_size, 1024],
}

coreml_model_file = './Textgen_CUSTOM.mlmodel'

In [11]:
output_tensor_names = [node + ':0' for node in output_node_names]

In [27]:
weightLSTM = np.loadtxt('weightLSTM')
weightFully = np.loadtxt('weightFully')

In [30]:

def convert_matmul(**kwargs):
    # Only convert this Lambda layer if it is for our swish function.
    tf_op = kwargs["op"]
    if tf_op.name == 'lstm/basic_lstm_cell/LSTMmatmul2':
        W = weightLSTM
        print('LSTM')
    else:
        W = weightFully
        print('Fully')
    coreml_nn_builder = kwargs["nn_builder"]
    constant_inputs = kwargs["constant_inputs"]
    
    params = NeuralNetwork_pb2.CustomLayerParams()

    # The name of the Swift or Obj-C class that implements this layer.
    params.className = "MatMul"

    # The desciption is shown in Xcode's mlmodel viewer.
    params.description = "A fancy new matmul"
    
    #W = constant_inputs.get(tf_op.inputs[1].name,[0,100,0,0])
    #print(tf_op.inputs[1])
    #size = constant_inputs.get(tf_op.inputs[2].name, [0,0,0,0])
    # add begin and size as two repeated weight fields
    for i,weightvec in enumerate(W):
        W_as_weights = params.weights.add()
        W_as_weights.floatValue.extend(map(float, weightvec))
    #print(W_as_weights)
    #size_as_weights = params.weights.add()
    #size_as_weights.floatValue.extend(map(float, size))
    coreml_nn_builder.add_custom(name=tf_op.name,
                                input_names=[tf_op.inputs[0].name],
                                output_names=[tf_op.outputs[0].name],
                                custom_proto_spec=params)

    #return params


coreml_model = tfcoreml.convert(
        tf_model_path=frozen_model_file, 
        mlmodel_path=coreml_model_file, 
        input_name_shape_dict=input_tensor_shapes,
        output_feature_names=output_tensor_names,
        add_custom_layers=True,
        custom_conversion_functions={ "lstm/basic_lstm_cell/LSTMmatmul2": convert_matmul, "logits/Fullymatmul": convert_matmul}
        #custom_conversion_functions={ "MatMuldlskfjslkfj": convert_matmul}
)

Shapes not found for 40 tensors. Executing graph to determine shapes. 
Automatic shape interpretation succeeded for input blob lstm/state_feed:0
1/52: Analysing op name: seq_embeddings ( type:  Placeholder )
Skipping name of placeholder
2/52: Analysing op name: lstm/basic_lstm_cell/kernel ( type:  Const )
3/52: Analysing op name: lstm/basic_lstm_cell/kernel/read ( type:  Identity )
4/52: Analysing op name: lstm/basic_lstm_cell/bias ( type:  Const )
5/52: Analysing op name: lstm/basic_lstm_cell/bias/read ( type:  Identity )
6/52: Analysing op name: lstm/state_feed ( type:  Placeholder )
Skipping name of placeholder
7/52: Analysing op name: lstm/ExpandDims/dim ( type:  Const )
8/52: Analysing op name: lstm/ExpandDims ( type:  ExpandDims )
9/52: Analysing op name: lstm/split/split_dim ( type:  Const )
10/52: Analysing op name: lstm/split ( type:  Split )
11/52: Analysing op name: lstm/Squeeze ( type:  Squeeze )
12/52: Analysing op name: lstm/Squeeze_1 ( type:  Squeeze )
13/52: Analysing o

## Test the model

Run a predictable randomly seeded inputs through and see where the disparities are

In [None]:
seq_rand = np.random.rand(300)
seq_embeddings_tf = np.array([[seq_rand, seq_rand]])
seq_embeddings_ml = np.array([[[sr, sr]] for sr in seq_rand])

state_rand = np.random.rand(1024)
state_feed_tf = np.array([[state_rand, state_rand]])
state_feed_ml = np.array([[[sr, sr]] for sr in state_rand])

In [None]:
coreml_inputs = {
    'seq_embeddings__0': seq_embeddings_ml,
    'lstm__state_feed__0': state_feed_ml,
}
coreml_output = coreml_model.predict(coreml_inputs, useCPUOnly=True)
# print(coreml_output['lstm__state__0'].shape)
# print(coreml_output['softmax__0'].shape)
# print(coreml_output['softmax__0'].reshape(38521, 1, 2))
# print(coreml_output)
def print_ml(ml):
    for key in sorted(ml.keys()):
        print(key)
        print(ml[key].shape)
        print(ml[key])
print_ml(coreml_output)

In [None]:
with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)
    input_names = ['lstm/state:0', 'softmax:0']
    output_values = sess.run(
        fetches=input_names,
        feed_dict={
            #"input_feed:0": input_feed,
            "lstm/state_feed:0": state_feed_tf,
            "seq_embeddings:0": seq_embeddings_tf,
            #"seq_embedding/embedding_map:0": self.embedding_map
        })
    for (index, value) in sorted(enumerate(input_names), key=lambda x: x[1]):
        print(value)
        print(output_values[index].shape)
        print(output_values[index])

In [None]:
np.matmul(np.random.rand(1, 20), np.random.rand(20, 45)).shape

In [None]:
np.random.rand(1, 2, 812)[0,:].shape