In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython import display
%matplotlib inline

from sklearn.metrics import mean_squared_error
import joblib

# Generate data: $Y_1 = X_1 \cos(X_2)$, $Y_2 = X_1 \sin(X_2)$

In [34]:
n_events = 2 * 10**6 # dataset size
feature_std = 0.7

In [35]:
X1, X2 = (np.random.normal(0, feature_std, (n_events, 1)), 
          np.random.uniform(-np.pi, np.pi, (n_events, 1)))
Y1, Y2 = X1*np.cos(X2), X1*np.sin(X2)

# TF.Keras Neural Network

## Custom layer def: Dense layer with tanh bias

In [36]:
from tensorflow import keras
import tensorflow as tf
class CustomLayer(keras.layers.Layer):
    """
    Custom Dense Layer that applies a tanh on bias
    """
    
    def __init__(self, units=32):
        super(CustomLayer, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(name="w",
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(name="b",
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + tf.math.tanh(self.b)
        

In [37]:
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Concatenate, LeakyReLU
from tensorflow.keras.models import Model

n_hidden = 3
n_nodes = 32

input1 = Input(shape=(X1.shape[1],))
input2 = Input(shape=(X2.shape[1],))
inputs = Concatenate(axis=-1)([input1, input2])
hidden = Dense(n_nodes, activation='relu')(inputs)

for i in range(n_hidden -1):
    hidden = CustomLayer(n_nodes)(hidden) 
    hidden = LeakyReLU()(hidden)
predictionY1 = Dense(1, activation='linear')(hidden)
predictionY2 = Dense(1, activation='linear')(hidden)

nn_model = Model(inputs=[input1, input2], outputs=[predictionY1, predictionY2])
nn_model.compile(optimizer='adam',
              loss='mean_squared_error',)


In [38]:
nn_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 2)            0           input_3[0][0]                    
                                                                 input_4[0][0]                    
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 32)           96          concatenate_1[0][0]        

In [39]:
nn_model.fit(x=[X1, X2], y=[Y1, Y2], epochs=1, batch_size=1024)



<tensorflow.python.keras.callbacks.History at 0x7f73e86f9a90>

In [40]:
Y1_pred, Y2_pred = nn_model.predict([X1, X2], batch_size=1024*4)
Y1_pred, Y2_pred = Y1_pred.ravel(), Y2_pred.ravel()

In [41]:
mean_squared_error(y_pred=Y1_pred, y_true=Y1), mean_squared_error(y_pred=Y2_pred, y_true=Y2),

(0.000318922683635748, 0.0006225795475563963)

# Save NN model
## Note: not in HDF5 format

In [42]:
nn_model.save("nn_model")

INFO:tensorflow:Assets written to: nn_model/assets


# Convert to onnx format
## Note: use tf2onnx not keras2onnx

In [43]:
!python -m tf2onnx.convert --saved-model nn_model/ --output nn_model.onnx

2022-03-17 13:17:34.148139: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/lib::/usr/local/cuda/lib64
2022-03-17 13:17:34.148185: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-03-17 13:17:35.999373: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2022-03-17 13:17:36.024846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:05:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.91GiB deviceMemoryBandwidth: 451.17GiB/s
2022-03-17 13:17:36.025013: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcu

2022-03-17 13:17:36,843 - INFO - Using tensorflow=2.5.0, onnx=1.9.0, tf2onnx=1.8.5/50049d
2022-03-17 13:17:36,843 - INFO - Using opset <onnx, 9>
2022-03-17 13:17:36,848 - INFO - Computed 0 values for constant folding
2022-03-17 13:17:36,902 - INFO - Optimizing ONNX model
2022-03-17 13:17:36,993 - INFO - After optimization: Const -1 (11->10), Identity -10 (10->0)
2022-03-17 13:17:36,996 - INFO - 
2022-03-17 13:17:36,996 - INFO - Successfully converted TensorFlow model nn_model/ to ONNX
2022-03-17 13:17:36,996 - INFO - Model inputs: ['input_3:0', 'input_4:0']
2022-03-17 13:17:36,996 - INFO - Model outputs: ['dense_4', 'dense_5']
2022-03-17 13:17:36,996 - INFO - ONNX model is saved at nn_model.onnx


# Load model in ONNX RT for inference

In [44]:
import onnxruntime
sess = onnxruntime.InferenceSession("nn_model.onnx",)
input_names = sess.get_inputs()

def pred_ONNX(X):
    #
    # Mimic any training pre-preocessing here
    #
    
    data = [x.astype(np.float32) for x in X]
    feed = zip(sorted(i_.name for i_ in input_names), data)
    pred = sess.run(None, dict(feed))
    return pred


# could turn off multi-threading etc
#sess_options = onnxruntime.SessionOptions()
#sess = onnxruntime.InferenceSession("nn_model.onnx", sess_options)
# sess_options.inter_op_num_threads = 1
# sess_options.intra_op_num_threads = 1
# sess_options.execution_mode = rt.ExecutionMode.ORT_SEQUENTIAL
# sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL

## Similar functions for Keras model prediction

In [45]:
from tensorflow.keras.models import load_model
nn_model = load_model("nn_model")
def pred_Keras(X):
    #
    # Mimic any training pre-preocessing here
    #
    
    return nn_model.predict(X, batch_size=1024)

def pred_KerasNoTrain(X):
    #
    # Mimic any training pre-preocessing here
    #
    
    return nn_model(X, training=False)

# Predict and compare ORT to Keras

In [46]:
y1_pred_k, y2_pred_k = pred_Keras([X1, X2])
y1_pred_k[:5]

array([[ 0.04793903],
       [-0.29843515],
       [-0.27908307],
       [ 0.02063516],
       [ 0.03614256]], dtype=float32)

In [47]:
# X is expected to be a list of inputs, if only one, pass [X]
y1_pred_o, y2_pred_o = pred_ONNX([X1, X2])
y1_pred_o[:5]

array([[ 0.04793907],
       [-0.29843518],
       [-0.279083  ],
       [ 0.02063511],
       [ 0.03614252]], dtype=float32)

In [48]:
print ("ORT & Keras values same for all Y1: ", np.isclose(y1_pred_k, y1_pred_o, atol=1e-05).all(), 
 "\nORT & Keras values same for all Y2: ", np.isclose(y2_pred_k, y2_pred_o, atol=1e-05).all())

ORT & Keras values same for all Y1:  True 
ORT & Keras values same for all Y2:  True


In [50]:
print ("MSE for Keras: ",
       mean_squared_error( y_pred=y1_pred_k, y_true=Y1), 
       mean_squared_error(y_pred=y2_pred_k, y_true=Y2),)
print ("MSE for ORT: ", 
       mean_squared_error(y_pred=y1_pred_o, y_true=Y1), 
       mean_squared_error(y_pred=y2_pred_o, y_true=Y2),)

MSE for Keras:  0.000318922683635748 0.0006225795475563963
MSE for ORT:  0.0003189226812967008 0.0006225795522400696


# Timing comparisons

In [51]:
l = 15

In [52]:
%%time
for i in range(l):
    _ = pred_Keras([X1, X2])

CPU times: user 53 s, sys: 3.47 s, total: 56.5 s
Wall time: 43.2 s


In [53]:
%%time
for i in range(l):
    __ = pred_ONNX([X1, X2])

CPU times: user 29.8 s, sys: 277 ms, total: 30 s
Wall time: 4.81 s


In [54]:
%%time
for i in range(l):
    _ = pred_KerasNoTrain([X1, X2])

CPU times: user 26.5 s, sys: 15.8 s, total: 42.3 s
Wall time: 6.78 s


In [26]:
%%time
_ = pred_Keras([X1, X2])

CPU times: user 4.58 s, sys: 271 ms, total: 4.85 s
Wall time: 10.3 s


In [28]:
%%time
__ = pred_ONNX([X1, X2])

CPU times: user 2.19 s, sys: 7.12 ms, total: 2.2 s
Wall time: 1.01 s


In [27]:
%%time
_ = pred_KerasNoTrain([X1, X2])

CPU times: user 1.42 s, sys: 1.06 s, total: 2.48 s
Wall time: 1.55 s
