## Bigdl-Nano Keras Inference Example
---
This example shows the usage of bigdl-nano tf.keras inference pipeline. 

In [1]:
import os
from time import time

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from bigdl.nano.tf.keras import Model, Sequential
import numpy as np

### CIFAR10 Data Module
---
Import the existing data module from keras.datasets and Normalize the images.<br>
You could access [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) for a view of the whole dataset.

In [2]:
cifar10 = keras.datasets.cifar10
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# Convert class vectors to binary class matrices.
train_labels = keras.utils.to_categorical(train_labels, 10)
y_test = test_labels
test_labels = keras.utils.to_categorical(test_labels, 10)

### Implement ResNet-18 model and load model weights from hdf5 file
---

In [3]:
class BasicBlock(Model):
    def __init__(self, channels:int, downsample = False):
        super().__init__()
        self.downsample = downsample
        self.conv1 = layers.Conv2D(filters=channels, strides=2 if downsample else 1, kernel_size=(3, 3),
                                         padding="same", kernel_initializer="he_normal")
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.ReLU()
        self.conv2 = layers.Conv2D(filters=channels, strides=1, kernel_size=(3, 3), 
                                         padding="same", kernel_initializer="he_normal")
        self.bn2 = keras.layers.BatchNormalization()
        if downsample:
            self.downsample = keras.Sequential([
                keras.layers.Conv2D(filters=channels, strides=2, kernel_size=(1, 1),
                                    padding="same", kernel_initializer="he_normal"),
                keras.layers.BatchNormalization()
            ])
    def call(self, x):
        identity = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample:
            identity = self.downsample(x)
        
        out += identity
        out = self.relu(out)
        
        return out

In [4]:
def getModel(num_classes):
    inputs = keras.Input(shape=(32, 32, 3), dtype=tf.float32)
    conv1 = layers.Conv2D(64, kernel_size=(3, 3), strides=1, padding="same")(inputs)
    bn1 = layers.BatchNormalization()(conv1)
    relu = layers.ReLU()(bn1)
    
    layer1 = keras.Sequential([
            BasicBlock(64),
            BasicBlock(64)
        ])(relu)
    
    layer2 = keras.Sequential([
            BasicBlock(128, downsample=True),
            BasicBlock(128)
        ])(layer1)
    
    layer3 = keras.Sequential([
            BasicBlock(256, downsample=True),
            BasicBlock(256)
        ])(layer2)
    
    layer4 = keras.Sequential([
            BasicBlock(512, downsample=True),
            BasicBlock(512)
        ])(layer3)
    
    avgpool = layers.GlobalAveragePooling2D()(layer4)
    flat = layers.Flatten()(avgpool)
    fc = layers.Dense(num_classes)(flat)
    activate = layers.Softmax()(fc)
    
    return keras.Model(inputs=inputs, outputs=activate)

In [5]:
model = getModel(10)

model.load_weights("checkpoints/basic_resnet18.hdf5")
model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=['accuracy'])
model.summary()

2022-05-19 03:30:48.941007: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 64)        1792      
                                                                 
 batch_normalization (BatchN  (None, 32, 32, 64)       256       
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                (None, 32, 32, 64)        0         
                                                                 
 sequential (Sequential)     (None, 32, 32, 64)        148736    
                                                                 
 sequential_2 (Sequential)   (None, 16, 16, 128)       527488    
                                                             

### Measure basic model inference time and accuracy

In [6]:
start = time()
model.predict(test_images, batch_size=64)
infer_time_model_basic = time() - start
acc_model_basic = model.evaluate(test_images, test_labels, verbose=1)



### Quantize Model
---
Use Model.quantize from bigdl.nano.tf.keras to calibrate a keras model for post-training quantization.<br>
Here are the parameters used in the notebook:
```
    :param calib_dataset:  A tf.data.Dataset object for calibration. Required for
                            static quantization.
    :param val_dataset:    A tf.data.Dataset object for evaluation.
    :param batch:          Batch size of dataloader for both calib_dataset and val_dataset.
    :param metric:         A Metric object for evaluation.
    
    :param tuning_strategy:    'bayesian', 'basic', 'mse', 'sigopt'. Default: 'bayesian'.
    
```
Access more details from [Source](https://github.com/intel-analytics/BigDL/blob/main/python/nano/src/bigdl/nano/tf/quantization.py#L22)

In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
model_quantized = model.quantize(calib_dataset=test_dataset,
                                 val_dataset=test_dataset,
                                 batch=64,
                                 metric=tf.keras.metrics.CategoricalCrossentropy(),
                                 tuning_strategy='basic')

2022-05-19 03:31:15.416102: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2022-05-19 03:31:15.416231: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-05-19 03:31:15.423254: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1149] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.005ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2022-05-19 03:31:15.911786: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2022-05-19 03:31:15.911928: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-05-19 03:31:16.062058: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:114

Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
2022-05-19 03:31:23 [INFO] Pass StripUnusedNodesOptimizer elapsed time: 2723.78 ms
2022-05-19 03:31:24 [INFO] Pass GraphCseOptimizer elapsed time: 706.19 ms
2022-05-19 03:31:26 [INFO] Pass FoldBatchNormNodesOptimizer elapsed time: 1892.39 ms
2022-05-19 03:31:26 [INFO] Pass UpdateEnterOptimizer elapsed time: 2.23 ms
2022-05-19 03:31:26 [INFO] Pass ConvertLeakyReluOptimizer elapsed time: 4.12 ms
2022-05-19 03:31:26 [INFO] Pass ConvertAddToBiasAddOptimizer elapsed time: 4.32 ms
2022-05-19 03:31:26 [INFO] Pass FuseTransposeReshapeOptimizer elapsed time: 4.55 ms
2022-05-19 03:31:26 [INFO] Pass FuseConvWithMathOptimizer elapsed time: 4.48 ms
2022-05-19 03:31:26 [INFO] Pass ExpandDimsOptimizer elapsed time: 3.5 ms
2022-05-19 03:31:26 [INFO] Pass InjectDummyBiasAddOptimizer elapsed time: 2.9 ms
2022-05-19 03:31:26 [INFO] Pass MoveSqueezeAfterReluOptimizer elapsed time: 1.8 ms
2022-05-19 03:31:26 [INFO] Pass Pre Optimiza

;model/sequential_2/basic_block_3/conv2d_8/Conv2D_eightbit_min_model/sequential_2/basic_block_2/re_lu_3/Relu_1__print__;__min:[0]
;model/sequential_2/basic_block_2/conv2d_6/Conv2D_eightbit_requant_range__print__;__requant_min:[0]
;model/sequential_2/basic_block_3/conv2d_8/Conv2D_eightbit_max_model/sequential_2/basic_block_2/re_lu_3/Relu_1__print__;__max:[15.5657043]
;model/sequential_2/basic_block_2/conv2d_6/Conv2D_eightbit_requant_range__print__;__requant_max:[15.5657043]
;model/sequential_2/basic_block_3/conv2d_9/Conv2D_eightbit_min_model/sequential_2/basic_block_3/re_lu_4/Relu__print__;__min:[0]
;model/sequential_2/basic_block_3/conv2d_8/Conv2D_eightbit_requant_range__print__;__requant_min:[0]
;model/sequential_2/basic_block_3/conv2d_9/Conv2D_eightbit_max_model/sequential_2/basic_block_3/re_lu_4/Relu__print__;__max:[7.74928427]
;model/sequential_2/basic_block_3/conv2d_8/Conv2D_eightbit_requant_range__print__;__requant_max:[7.74928427]
;model/sequential_4/basic_block_4/sequential_3/c

2022-05-19 03:31:56 [INFO] |      Accuracy      | 2.2458   |    2.2449     |     2.2449       |
2022-05-19 03:31:56 [INFO] | Duration (seconds) | 15.8979  |    8.0481     |     8.0481       |
2022-05-19 03:31:56 [INFO] +--------------------+----------+---------------+------------------+
2022-05-19 03:31:56 [INFO] Save tuning history to /home/projects/notebooks/nc_workspace/2022-05-19_03-31-14/./history.snapshot.
2022-05-19 03:31:56 [INFO] Specified timeout or max trials is reached! Found a quantized model which meet accuracy goal. Exit.
2022-05-19 03:31:56 [INFO] Save deploy yaml to /home/projects/notebooks/nc_workspace/2022-05-19_03-31-14/deploy.yaml


In [8]:
start = time()
with model_quantized.sess as sess:
    out = sess.run(model_quantized.output_tensor,
             feed_dict={model_quantized.input_tensor[0]: test_images})
infer_time_model_quantized = time() - start
acc_model_quantized = np.mean(np.equal(tf.argmax(out)[0], test_labels))

tcmalloc: large alloc 1073741824 bytes == 0x5584e0b98000 @  0x7fbf54de6680 0x7fbf54e07824 0x7fbf54e07b8a 0x7fbf45486402 0x7fbf39820eb0 0x7fbf39840a93 0x7fbf398449ea 0x7fbf39844f69 0x7fbf398452d1 0x7fbf39839ce3 0x7fbf34eff051 0x7fbf34d5a1d5 0x7fbf34aed087 0x7fbf34aed91e 0x7fbf34aedb1d 0x7fbf3da40c92 0x7fbf3da557f0 0x7fbf3daf954d 0x7fbf3dafb1b7 0x7fbf34f00d7c 0x7fbf34e8acec 0x7fbf3a24d76e 0x7fbf3a24a1f3 0x7fbf355dc313 0x7fbf54db1609 0x7fbf54cd6163


In [9]:
template = """
|    Precision   | Inference Time(s) |
|      FP32      |       {:5.2f}       |
|      INT8      |       {:5.2f}       |
| Improvement(%) |       {:5.2f}       |
"""
summary = template.format(
    infer_time_model_basic,
    infer_time_model_quantized,
    (1 - infer_time_model_quantized /infer_time_model_basic) * 100
)
print(summary)


|    Precision   | Inference Time(s) |
|      FP32      |        5.21       |
|      INT8      |        1.07       |
| Improvement(%) |       79.46       |

