In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras import datasets, layers, models  # Import Keras modules
import matplotlib.pyplot as plt
import numpy as np

!pip install onnx onnxruntime-gpu tf2onnx

# Import ONNX-related libraries
import onnx
import onnxruntime
import tf2onnx

# Import evaluation metrics
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

# Load CIFAR-10 dataset (Images are 32x32 RGB images, 10 classes)
(X_train, y_train), (X_test, y_test) = datasets.cifar10.load_data()

# Normalize pixel values to range [0,1] for better training stability
X_train, X_test = X_train / 255.0, X_test / 255.0

# Data augmentation: Helps improve model generalization
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),    # Randomly flip images horizontally
    layers.RandomRotation(0.1),         # Rotate images by a small angle
    layers.RandomZoom(0.1),             # Apply a small zoom
])

Collecting onnx
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.21.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl.metadata (1.3 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting protobuf>=3.20.2 (from onnx)
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m89.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnxruntime_gpu-1.21.0-cp311-cp311-manylinux_2_27_x86_64.man

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [None]:
# Define CNN model for image classification
cnn = models.Sequential([
    layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),  # Pooling reduces spatial dimensions

    layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),                # Flatten into 1D array for Dense layers
    layers.Dense(128, activation='relu'),  # Fully connected layer
    layers.Dense(10, activation='softmax') # Output layer (10 classes, softmax for classification)
])

# Compile the model
cnn.compile(optimizer='adam',          # Adam optimizer (adaptive learning rate)
            loss='sparse_categorical_crossentropy',  # Suitable for integer labels
            metrics=['accuracy'])       # Track accuracy

# Train the model for 10 epochs
cnn.fit(X_train, y_train, epochs=10)

# Evaluate model performance on test data
cnn.evaluate(X_test, y_test)

# Save trained model in TensorFlow SavedModel format
tf.saved_model.save(cnn, "cnn_model")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 103ms/step - accuracy: 0.3535 - loss: 1.7524
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 94ms/step - accuracy: 0.5991 - loss: 1.1370
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 94ms/step - accuracy: 0.6659 - loss: 0.9549
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 95ms/step - accuracy: 0.7103 - loss: 0.8221
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 93ms/step - accuracy: 0.7407 - loss: 0.7414
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 94ms/step - accuracy: 0.7649 - loss: 0.6713
Epoch 7/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 105ms/step - accuracy: 0.7892 - loss: 0.5985
Epoch 8/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 95ms/step - accuracy: 0.8109 - loss: 0.539

In [None]:
# Convert to ONNX (Colab Safe)
!python -m tf2onnx.convert --saved-model cnn_model --output cnn_model.onnx

# Load ONNX model
onnx_model = onnx.load("cnn_model.onnx")
onnx.checker.check_model(onnx_model)

# Run inference on CPU using ONNX Runtime
ort_session = onnxruntime.InferenceSession("cnn_model.onnx", providers=['CPUExecutionProvider'])

def to_numpy(tensor):
    return tensor.numpy() if hasattr(tensor, 'numpy') else tensor

# Prepare input for ONNX
X_test_onnx = to_numpy(X_test).astype(np.float32)
ort_inputs = {ort_session.get_inputs()[0].name: X_test_onnx}

# Run inference
ort_outs = ort_session.run(None, ort_inputs)

# Get predictions
y_pred = np.argmax(ort_outs[0], axis=1)

# Evaluate model
print('Accuracy:', accuracy_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred, average='weighted'))
print('Precision:', precision_score(y_test, y_pred, average='weighted'))
print('Recall:', recall_score(y_test, y_pred, average='weighted'))

E0000 00:00:1741792384.380186    9936 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741792384.389329    9936 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-12 15:13:13,168 - INFO - Signatures found in model: [serving_default].
2025-03-12 15:13:13,169 - INFO - Output names: ['output_0']
I0000 00:00:1741792393.191644    9936 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
I0000 00:00:1741792393.191929    9936 single_machine.cc:361] Starting new session
I0000 00:00:1741792393.426366    9936 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
I0000 00:00:1741792393.426557    9936 single_machine.cc:361] Starting new session
2025-03-12 15:13:13,498 - INFO - Using tensorflow=2.18.0, onnx=1.17.0, tf2onnx=1.16.1/15c810
2

In [None]:
import time

# Run inference multiple times to measure speed
start_time = time.time()
for _ in range(100):  # Run 100 times for better benchmarking
    ort_outs = ort_session.run(None, ort_inputs)
end_time = time.time()

print(f"Average inference time: {(end_time - start_time) / 100:.6f} seconds")


Average inference time: 5.710011 seconds


In [None]:
import time

# TensorFlow Model Inference
start_tf = time.time()
cnn_predictions = cnn.predict(X_test)
end_tf = time.time()
print(f"TensorFlow Inference Time: {(end_tf - start_tf) / len(X_test):.6f} sec per sample")

# ONNX Model Inference
start_onnx = time.time()
ort_outs = ort_session.run(None, ort_inputs)
end_onnx = time.time()
print(f"ONNX Runtime Inference Time: {(end_onnx - start_onnx) / len(X_test):.6f} sec per sample")


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step
TensorFlow Inference Time: 0.000838 sec per sample
ONNX Runtime Inference Time: 0.001213 sec per sample


In [None]:
# Convert TensorFlow predictions
y_pred_tf = np.argmax(cnn_predictions, axis=1)

# Convert ONNX predictions
y_pred_onnx = np.argmax(ort_outs[0], axis=1)

# Compare accuracy
print("TensorFlow Accuracy:", accuracy_score(y_test, y_pred_tf))
print("ONNX Runtime Accuracy:", accuracy_score(y_test, y_pred_onnx))

# Check similarity between TensorFlow and ONNX predictions
matching = np.sum(y_pred_tf == y_pred_onnx) / len(y_test) * 100
print(f"Prediction Match: {matching:.2f}%")


TensorFlow Accuracy: 0.7326
ONNX Runtime Accuracy: 0.7326
Prediction Match: 100.00%
