# DeepSpeech Conversion

This example demonstrates the workflow to download a publicly available 
TensorFlow model and convert it to Core ML format using the tfcoreml converter.

We use an open source implementation of DeepSpeech model (https://arxiv.org/abs/1412.5567) 
provided by Mozilla: https://github.com/mozilla/DeepSpeech.

Note that this notebook is tested using following dependencies: 

```
tensorflow==1.14.0
coremltools==3.0
```

It will **NOT** work on TensorFlow 2.0+ because of deprecated and removed `tf.contrib` module in TensorFlow.

In [1]:
import tfcoreml
import numpy as np
import tensorflow as tf

In [2]:
# Download TensorFlow implementation of DeepSpeech model from https://github.com/mozilla/DeepSpeech
# wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz
# tar xvfz deepspeech-0.4.1-models.tar.gz

In [3]:
tfmodel_path = './models/output_graph.pb'  # path to the downloaded model
mlmodel_path = './deep_speech.mlmodel'  # path to save converted Core ML model

# convert model and save to the local directory
model = tfcoreml.convert(
    tf_model_path=tfmodel_path, 
    mlmodel_path=mlmodel_path,
        input_name_shape_dict={
        'input_node': [1, 16, 19, 26],
        'input_lengths': [1],
        'previous_state_h__invar__': [1, 2048],
        'previous_state_c__invar__': [1, 2048]
    },
    output_feature_names=['logits'],
    minimum_ios_deployment_target='13'
)

# Optionally, we can print and inspect converted Core ML model
# from coremltools.models.neural_network.printer import print_network_spec_coding_style
# print_network_spec_coding_style(model.get_spec())

0 assert nodes deleted
['h2:0', 'lstm_fused_cell/kernel:0', 'lstm_fused_cell/kernel/read:0', 'lstm_fused_cell/SequenceMask/Const_1:0', 'lstm_fused_cell/ExpandDims_1/dim:0', 'lstm_fused_cell/ExpandDims_2/dim:0', 'Reshape/shape:0', 'lstm_fused_cell/SequenceMask/Const:0', 'h6:0', 'lstm_fused_cell/SequenceMask/Range:0', 'zeros/shape_as_tensor:0', 'Minimum_2/y:0', 'h5/read:0', 'lstm_fused_cell/ExpandDims/dim:0', 'lstm_fused_cell/range_1/delta:0', 'b1/read:0', 'lstm_fused_cell/range/delta:0', 'Minimum/y:0', 'h1/read:0', 'b5:0', 'lstm_fused_cell/concat_1/axis:0', 'lstm_fused_cell/range/start:0', 'b1:0', 'b2/read:0', 'lstm_fused_cell/Const:0', 'h5:0', 'zeros/Const:0', 'lstm_fused_cell/SequenceMask/Const_2:0', 'b3/read:0', 'lstm_fused_cell/zeros/shape_as_tensor:0', 'zeros:0', 'lstm_fused_cell/range_1/limit:0', 'Minimum_1/y:0', 'raw_logits/shape:0', 'lstm_fused_cell/bias/read:0', 'lstm_fused_cell/zeros:0', 'lstm_fused_cell/Tile/multiples:0', 'Reshape_1/shape:0', 'lstm_fused_cell/zeros/Const:0', 

[SSAConverter] [68/76] Converting op type: 'set_global', name: 'Assign_2', output_shape: (1, 2048).
[SSAConverter] [69/76] Converting op type: 'set_global', name: 'Assign_3', output_shape: (1, 2048).
[SSAConverter] [70/76] Converting op type: 'Add', name: 'Add_3', output_shape: (16, 2048).
[SSAConverter] [71/76] Converting op type: 'Relu', name: 'Relu_3', output_shape: (16, 2048).
[SSAConverter] [72/76] Converting op type: 'Minimum', name: 'Minimum_3', output_shape: (16, 2048).
[SSAConverter] [73/76] Converting op type: 'MatMul', name: 'MatMul_4', output_shape: (16, 29).
[SSAConverter] [74/76] Converting op type: 'Add', name: 'Add_4', output_shape: (16, 29).
[SSAConverter] [75/76] Converting op type: 'Reshape', name: 'raw_logits', output_shape: (16, 1, 29).
[SSAConverter] [76/76] Converting op type: 'Softmax', name: 'logits', output_shape: (16, 1, 29).
[Core ML Pass] 15 disconnected constants nodes deleted


In [4]:
# Generate some random data as inputs
input_node = np.random.rand(1, 16, 19, 26)
input_lengths = np.array([16], dtype=np.int32)
previous_state_c = np.random.rand(1, 2048)
previous_state_h = np.random.rand(1, 2048)

In [5]:
# Run predictions
out = model.predict({
    'input_node': input_node,
    'input_lengths': input_lengths,
    'previous_state_h__invar__': previous_state_h,
    'previous_state_c__invar__': previous_state_c
})['logits']

output = np.array(out)
# print(output)

In [6]:
# Optionally we can verify the predictions are consistant with TensorFlow's output
from tensorflow.contrib.rnn import *

with open(tfmodel_path, 'rb') as f:
    serialized = f.read()
original_gdef = tf.compat.v1.GraphDef()
original_gdef.ParseFromString(serialized)

tf.import_graph_def(original_gdef, name="")

with tf.Session() as sess:
    g = sess.graph
    out = g.get_tensor_by_name('Softmax:0')
    in1 = g.get_tensor_by_name('input_node:0')
    in2 = g.get_tensor_by_name('input_lengths:0')
    in3 = g.get_tensor_by_name('previous_state_c:0')
    in4 = g.get_tensor_by_name('previous_state_h:0')

    tf_out = sess.run(out, feed_dict={
        in1: input_node, in2: input_lengths, in3: previous_state_c, in4: previous_state_h,
    })

tf_output = np.array(tf_out)

In [7]:
np.testing.assert_array_equal(output.shape, tf_output.shape)
np.testing.assert_almost_equal(output.flatten(), tf_output.flatten(), decimal=2)