### Convert the model to tflite

In [1]:
import tensorflow as tf

In [2]:
model = tf.saved_model.load('sf_lite.pb')

In [3]:
converter = tf.lite.TFLiteConverter.from_saved_model("sf_lite.pb")

In [4]:
tflite_model = converter.convert()
open("sf_lite.tflite", "wb").write(tflite_model)

38956

### Model Compression

In [5]:
from __future__ import absolute_import, division, print_function, unicode_literals

import matplotlib.pylab as plt
import tensorflow as tf
#import tensorflow_hub as hub
import numpy as np

In [6]:
TFLITE_MODEL = "sf_lite.tflite"
TFLITE_QUANT_MODEL = "sf_quant_lite.tflite"

In [7]:
TFLITE_QUANT_MODEL = "sf_quant_lite.tflite"

In [8]:
def representative_dataset():
    for _ in range(100):
      data = np.random.rand(1, 12, 7)
      yield [data.astype(np.float32)]

In [9]:
import tensorflow as tf
converter = tf.lite.TFLiteConverter.from_saved_model("sf_lite.pb")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_quant_model = converter.convert()

In [10]:
open(TFLITE_QUANT_MODEL, "wb").write(tflite_quant_model)

30560

### Test Inference

In [11]:
gru_input = np.random.rand(1, 12, 7)

In [12]:
import numpy as np
import tensorflow as tf
import time

# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="sf_lite.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details[0]['shape'],output_details[0]['shape'])


results = []

for i in range(0,gru_input.shape[0]):
    print(i)
   
    model_interpreter_time = 0
    
    
    interpreter.allocate_tensors()
    #print(i)
    tmp = (gru_input[i])
    #print(tmp.shape)
    
    ## add one dimenstion and change data type
    tmp_expanded = np.expand_dims(tmp,axis=0)
    tmp_expanded = tmp_expanded.astype('float32')
    #print(tmp_expanded.shape)
    
    ## load data
    model_interpreter_start_time = time.time()
    input_tensor_z= tf.convert_to_tensor(tmp_expanded, np.float32)
    interpreter.set_tensor(input_details[0]['index'],input_tensor_z)
    
    ## invoke model
    interpreter.invoke()
    start_time = time.time()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    print('model time: ', (time.time()-start_time)*1000*1600)
    model_interpreter_time += time.time() - model_interpreter_start_time
    
    result = np.squeeze(output_data)
    results.append(result)
    #break
used_time = time.time() - start_time
tflite_pred= np.array(results).reshape(-1,1)

print('used_time:{}'.format(used_time*1000))
print('model_interpreter_time:{}'.format(model_interpreter_time*1000))

[ 1 12  7] [ 1 12  1]
0
model time:  21.7437744140625
used_time:0.20647048950195312
model_interpreter_time:1.4498233795166016


In [13]:
import numpy as np
import tensorflow as tf
import time

# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="sf_quant_lite.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details[0]['shape'],output_details[0]['shape'])


results = []

for i in range(0,gru_input.shape[0]):
    print(i)
    
    model_interpreter_time = 0
    
    
    interpreter.allocate_tensors()
    #print(i)
    tmp = (gru_input[i])
    #print(tmp.shape)
    
    ## add one dimenstion and change data type
    tmp_expanded = np.expand_dims(tmp,axis=0)
    tmp_expanded = tmp_expanded.astype('float32')
    #print(tmp_expanded.shape)
    
    ## load data
    model_interpreter_start_time = time.time()
    input_tensor_z= tf.convert_to_tensor(tmp_expanded, np.float32)
    interpreter.set_tensor(input_details[0]['index'],input_tensor_z)
    
    ## invoke model
    interpreter.invoke()
    start_time = time.time()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    print('model time: ', (time.time()-start_time)*1000*1600)
    model_interpreter_time += time.time() - model_interpreter_start_time
    
    result = np.squeeze(output_data)
    results.append(result)
    #break
used_time = time.time() - start_time
tflite_pred= np.array(results).reshape(-1,1)

print('used_time:{}'.format(used_time*1000))
print('model_interpreter_time:{}'.format(model_interpreter_time*1000))

[ 1 12  7] [ 1 12  1]
0
model time:  15.6402587890625
used_time:0.21505355834960938
model_interpreter_time:0.5826950073242188


### Conver the tflite model model to cc files used in Arduino

In [14]:
! xxd -i sf_quant_lite.tflite > sf_quant_lite.cc