## BigDL-Nano Keras Inference Example
---
This example shows the usage of BigDL-Nano Tensorflow Keras inference pipeline. 

In [1]:
import os
from time import time

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from bigdl.nano.tf.keras import Model, Sequential
import numpy as np

### CIFAR10 Data Module
---
Import the existing data module from keras.datasets and Normalize the images.<br>
You could access [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) for a view of the whole dataset.

In [2]:
cifar10 = keras.datasets.cifar10
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# Convert class vectors to binary class matrices.
train_labels = keras.utils.to_categorical(train_labels, 10)
y_test = test_labels
test_labels = keras.utils.to_categorical(test_labels, 10)

### Implement ResNet-18 model and load model weights from hdf5 file
---

In [3]:
class BasicBlock(Model):
    def __init__(self, channels:int, downsample = False):
        super().__init__()
        self.downsample = downsample
        self.conv1 = layers.Conv2D(filters=channels, strides=2 if downsample else 1, kernel_size=(3, 3),
                                         padding="same", kernel_initializer="he_normal")
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.ReLU()
        self.conv2 = layers.Conv2D(filters=channels, strides=1, kernel_size=(3, 3), 
                                         padding="same", kernel_initializer="he_normal")
        self.bn2 = keras.layers.BatchNormalization()
        if downsample:
            self.downsample = keras.Sequential([
                keras.layers.Conv2D(filters=channels, strides=2, kernel_size=(1, 1),
                                    padding="same", kernel_initializer="he_normal"),
                keras.layers.BatchNormalization()
            ])
    def call(self, x):
        identity = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample:
            identity = self.downsample(x)
        
        out += identity
        out = self.relu(out)
        
        return out

In [4]:
class Resnet18(Model):
    def __init__(self, num_classes, **kwargs):
        """
            num_classes: number of classes in specific classification task.
        """
        super().__init__(**kwargs)
        
        self.conv1 = layers.Conv2D(64, kernel_size=(3, 3), strides=1, padding="same")
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.ReLU()
        
        self.layer1 = keras.Sequential([
            BasicBlock(64),
            BasicBlock(64)
        ])
        self.layer2 = keras.Sequential([
            BasicBlock(128, downsample=True),
            BasicBlock(128)
        ])
        self.layer3 = keras.Sequential([
            BasicBlock(256, downsample=True),
            BasicBlock(256)
        ])
        self.layer4 = keras.Sequential([
            BasicBlock(512, downsample=True),
            BasicBlock(512)
        ])
        self.avgpool = layers.GlobalAveragePooling2D()
        self.flat = layers.Flatten()
        self.fc = layers.Dense(num_classes)
        self.activate = layers.Softmax()
        
    def call(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = self.avgpool(out)
        out = self.flat(out)
        out = self.fc(out)
        out = self.activate(out)
        
        return out

### Define Inputs and Outputs of Model
---
Optimization of KERAS Model using BigDL-nano quantization requires obtaining inputs and outputs of the Model. However, the two attributes of Model subclasses cannot be set directly at present, so we need to call keras.Model() to set them.

In [5]:
model = Resnet18(10)
model.build(input_shape=(None, 32, 32, 3))
model.load_weights("checkpoints/basic_resnet18.hdf5")
inputs=keras.Input(shape=(32, 32, 3))
outputs = model(inputs)
model = keras.Model(inputs, outputs)
model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=['accuracy'])
model.summary()

2022-05-23 00:38:31.624950: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 resnet18 (Resnet18)         (None, 10)                11188362  
                                                                 
Total params: 11,188,362
Trainable params: 11,178,762
Non-trainable params: 9,600
_________________________________________________________________


### Measure basic model inference time and accuracy

In [6]:
start = time()
model.predict(test_images, batch_size=64)
infer_time_model_basic = time() - start
acc_model_basic = model.evaluate(test_images, test_labels, verbose=1)



### Quantize Model
---
Use Model.quantize from bigdl.nano.tf.keras to calibrate a keras model for post-training quantization.<br>
Here are the parameters used in the notebook:
```
    :param calib_dataset:  A tf.data.Dataset object for calibration. Required for
                            static quantization.
    :param val_dataset:    A tf.data.Dataset object for evaluation.
    :param batch:          Batch size of dataloader for both calib_dataset and val_dataset.
    :param metric:         A Metric object for evaluation.
    
    :param tuning_strategy:    'bayesian', 'basic', 'mse', 'sigopt'. Default: 'bayesian'.
    
```
Access more details from [Source](https://github.com/intel-analytics/BigDL/blob/main/python/nano/src/bigdl/nano/tf/quantization.py#L22)

In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
model_quantized = model.quantize(calib_dataset=test_dataset,
                                 val_dataset=test_dataset,
                                 batch=64,
                                 metric=tf.keras.metrics.CategoricalCrossentropy(),
                                 tuning_strategy='basic')

2022-05-23 00:39:02.285747: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2022-05-23 00:39:02.285865: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-05-23 00:39:02.295360: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1149] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.007ms.
  function_optimizer: function_optimizer did nothing. time = 0.002ms.

2022-05-23 00:39:02.830939: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2022-05-23 00:39:02.831076: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-05-23 00:39:02.989192: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:114

Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
2022-05-23 00:39:07 [INFO] Pass StripUnusedNodesOptimizer elapsed time: 1120.48 ms
2022-05-23 00:39:07 [INFO] Pass GraphCseOptimizer elapsed time: 276.26 ms
2022-05-23 00:39:08 [INFO] Pass FoldBatchNormNodesOptimizer elapsed time: 696.7 ms
2022-05-23 00:39:08 [INFO] Pass UpdateEnterOptimizer elapsed time: 0.92 ms
2022-05-23 00:39:08 [INFO] Pass ConvertLeakyReluOptimizer elapsed time: 1.56 ms
2022-05-23 00:39:08 [INFO] Pass ConvertAddToBiasAddOptimizer elapsed time: 1.9 ms
2022-05-23 00:39:08 [INFO] Pass FuseTransposeReshapeOptimizer elapsed time: 1.74 ms
2022-05-23 00:39:08 [INFO] Pass FuseConvWithMathOptimizer elapsed time: 1.74 ms
2022-05-23 00:39:08 [INFO] Pass ExpandDimsOptimizer elapsed time: 1.58 ms
2022-05-23 00:39:08 [INFO] Pass InjectDummyBiasAddOptimizer elapsed time: 1.81 ms
2022-05-23 00:39:08 [INFO] Pass MoveSqueezeAfterReluOptimizer elapsed time: 1.42 ms
2022-05-23 00:39:08 [INFO] Pass Pre Optimiza

2022-05-23 00:39:21 [INFO] Pass StripUnusedNodesOptimizer elapsed time: 5.79 ms
2022-05-23 00:39:21 [INFO] Pass RemoveTrainingNodesOptimizer elapsed time: 2.24 ms
2022-05-23 00:39:21 [INFO] Pass FoldBatchNormNodesOptimizer elapsed time: 2.04 ms
2022-05-23 00:39:21 [INFO] Pass MetaOpOptimizer elapsed time: 1.24 ms
2022-05-23 00:39:22 [INFO] Pass PostCseOptimizer elapsed time: 157.27 ms
2022-05-23 00:39:22 [INFO] |******Mixed Precision Statistics*****|
2022-05-23 00:39:22 [INFO] +-----------------+----------+--------+
2022-05-23 00:39:22 [INFO] |     Op Type     |  Total   |  INT8  |
2022-05-23 00:39:22 [INFO] +-----------------+----------+--------+
2022-05-23 00:39:22 [INFO] |      Conv2D     |    20    |   20   |
2022-05-23 00:39:22 [INFO] |      MatMul     |    1     |   1    |
2022-05-23 00:39:22 [INFO] |    QuantizeV2   |    2     |   2    |
2022-05-23 00:39:22 [INFO] |    Dequantize   |    2     |   2    |
2022-05-23 00:39:22 [INFO] +-----------------+----------+--------+
2022-05-2

### Inference with Quantized Model
---

In [8]:
start = time()
with model_quantized.sess as sess:
    out = sess.run(model_quantized.output_tensor,
             feed_dict={model_quantized.input_tensor[0]: test_images})
infer_time_model_quantized = time() - start
acc_model_quantized = np.mean(np.equal(tf.argmax(out)[0], test_labels))

tcmalloc: large alloc 1073741824 bytes == 0x55e795ed6000 @  0x7fcfb5323d3f 0x7fcfb535a0c0 0x7fcfb535d082 0x7fcfb535d243 0x7fcfa598f402 0x7fcf99d29eb0 0x7fcf99d49a93 0x7fcf99d4d9ea 0x7fcf99d4df69 0x7fcf99d4e2d1 0x7fcf99d42ce3 0x7fcf95408051 0x7fcf952631d5 0x7fcf94ff6087 0x7fcf94ff691e 0x7fcf94ff6b1d 0x7fcf9df49c92 0x7fcf9df5e7f0 0x7fcf9e00254d 0x7fcf9e0041b7 0x7fcf95409d7c 0x7fcf95393cec 0x7fcf9a75676e 0x7fcf9a7531f3 0x7fcf95ae5313 0x7fcfb52ba609 0x7fcfb51df163


In [9]:
template = """
|    Precision   | Inference Time(s) |
|      FP32      |       {:5.2f}       |
|      INT8      |       {:5.2f}       |
| Improvement(%) |       {:5.2f}       |
"""
summary = template.format(
    infer_time_model_basic,
    infer_time_model_quantized,
    (1 - infer_time_model_quantized /infer_time_model_basic) * 100
)
print(summary)


|    Precision   | Inference Time(s) |
|      FP32      |        5.21       |
|      INT8      |        1.07       |
| Improvement(%) |       79.46       |

