## Sharing machine learning models

### Save and retrieve Scikit-learn model

In [22]:
# Train a model.
import os
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = RandomForestClassifier()
clr.fit(X_train, y_train)

# accuracy on test data with trained model
clr.score(X_test, y_test)

0.9473684210526315

In [23]:
# Convert into ONNX format
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)

if not os.path.exists('sklearn_model'):
    os.makedirs('sklearn_model')

# save trained model
with open("sklearn_model/rf_iris.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [24]:
# Compute the prediction with ONNX Runtime
import onnxruntime as rt
import numpy

# retrieve trained model
sess = rt.InferenceSession("sklearn_model/rf_iris.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

# predict labels of test data
pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]

In [25]:
pred_onx

array([1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 2, 0, 1, 1, 2, 1, 1, 2, 2, 0, 0, 2,
       2, 1, 2, 0, 2, 0, 2, 0, 1, 2, 2, 2, 2, 2, 1, 0], dtype=int64)

In [26]:
from sklearn import metrics as score

# accuracy on test data using retrieved model
score.accuracy_score(y_test, pred_onx)

0.9473684210526315

## Save Tensorflow model as ONNX

In [27]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
import tf2onnx

In [28]:
## https://www.tensorflow.org/tutorials/quickstart/advanced

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

In [29]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

In [30]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)

# Create an instance of the model
model = MyModel()

In [31]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

In [32]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [33]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)


@tf.function
def test_step(images, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [34]:
EPOCHS = 5

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_states()
  train_accuracy.reset_states()
  
  test_loss.reset_states()
  test_accuracy.reset_states()

  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result()}, '
    f'Accuracy: {train_accuracy.result() * 100}, '
    f'Test Loss: {test_loss.result()}, '
    f'Test Accuracy: {test_accuracy.result() * 100}'
  )

Epoch 1, Loss: 0.1339331716299057, Accuracy: 95.92000579833984, Test Loss: 0.061983346939086914, Test Accuracy: 97.97000122070312
Epoch 2, Loss: 0.041492823511362076, Accuracy: 98.76333618164062, Test Loss: 0.04900696501135826, Test Accuracy: 98.32999420166016
Epoch 3, Loss: 0.022452985867857933, Accuracy: 99.28666687011719, Test Loss: 0.06442192941904068, Test Accuracy: 97.90999603271484
Epoch 4, Loss: 0.012921309098601341, Accuracy: 99.59666442871094, Test Loss: 0.058912117034196854, Test Accuracy: 98.36000061035156
Epoch 5, Loss: 0.009565617889165878, Accuracy: 99.69166564941406, Test Loss: 0.06853362917900085, Test Accuracy: 98.18000030517578


In [35]:
if not os.path.exists('tf_model'):
    os.makedirs('tf_model')

if not os.path.exists('onnx_loaded_model'):
    os.makedirs('onnx_loaded_model')

tf.saved_model.save(model, 'tf_model')

INFO:tensorflow:Assets written to: tf_model/assets


In [36]:
%%bash

python -m tf2onnx.convert --saved-model tf_model --output onnx_loaded_model/mnist_model.onnx --opset 7

2021-06-18 13:13:00,912 - INFO - Signatures found in model: [serving_default].
2021-06-18 13:13:00,912 - INFO - Output names: ['output_1']
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
2021-06-18 13:13:01,341 - INFO - Using tensorflow=2.4.1, onnx=1.9.0, tf2onnx=1.8.5/50049d
2021-06-18 13:13:01,341 - INFO - Using opset <onnx, 7>
2021-06-18 13:13:01,843 - INFO - Computed 0 values for constant folding
2021-06-18 13:13:02,848 - INFO - Optimizing ONNX model
2021-06-18 13:13:03,093 - INFO - After optimization: Cast -1 (1->0), Const +1 (7->8), Identity -5 (5->0), Reshape +1 (1->2), Transpose -1 (2->1)
2021-06-18 13:13:03,125 - INFO - 
2021-06-18 13:13:03,125 - INFO - Successfully converted TensorFlow model tf_model to ONNX
2021-06-18 13:13:03,125 - INFO - Model inputs: ['input_1:0', 'new_shape__12', 'const_fold_opt__13', 'StatefulPartitionedCall/my_model_1/dense_3/MatMul/ReadVariableOp:0', 

## Retrieve ONNX model as Tensorflow model

In [37]:
import onnx
from onnx_tf.backend import prepare

In [38]:
loaded_model = onnx.load("onnx_loaded_model/mnist_model.onnx")

In [39]:
tf_loaded_model = prepare(loaded_model)

In [40]:
import numpy as np

total_test_accuracy = []
total_test_loss = []

def predict_test(images, labels):
  # Reset the metrics at the start of the next epoch
  test_loss.reset_states()
  test_accuracy.reset_states()
  true_labels = []
  predicted_labels = []
  for i, item in enumerate(images):
      prediction = tf_loaded_model.run(item, training=False)
      pred = np.array(prediction).squeeze()
      true_labels.append([labels[i].numpy()])
      predicted_labels.append(pred)
  t_loss = loss_object(true_labels, predicted_labels)  
  return test_loss(t_loss), test_accuracy(labels, predicted_labels)


for test_images, test_labels in test_ds:
  ls, acc = predict_test(test_images, test_labels)
  loss = test_loss.result()
  accuracy = test_accuracy.result() * 100
  total_test_accuracy.append(accuracy.numpy())
  total_test_loss.append(loss.numpy())

print("Mean test loss: {}".format(np.mean(total_test_loss)))
print("Mean test accuracy: {}".format(np.mean(total_test_accuracy)))

Mean test loss: 0.06853365153074265
Mean test accuracy: 98.18290710449219
