## Bigdl-nao onnxruntime example
--- 
This example shows the usage of bigdl-nano pytorch onnxtuntime. 

In [None]:
import os
from time import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from pl_bolts.datamodules import CIFAR10DataModule
from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
from pytorch_lightning import LightningModule, seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.optim.lr_scheduler import OneCycleLR
from torchmetrics.functional import accuracy
from bigdl.nano.pytorch.trainer import Trainer
from bigdl.nano.pytorch.vision import transforms
import numpy as np
import pdb

### CIFAR10 Data Module
---
Import the existing data module from bolts and modify the train and test transforms.
You could access [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) for a view of the whole dataset.

In [None]:
def prepare_data(data_path, batch_size, num_workers):
    train_transforms = transforms.Compose(
        [
            transforms.RandomCrop(32, 4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            cifar10_normalization()
        ]
    )

    test_transforms = transforms.Compose(
        [
            transforms.ToTensor(),
            cifar10_normalization()
        ]
    )
    cifar10_dm = CIFAR10DataModule(
        data_dir=data_path,
        batch_size=batch_size,
        num_workers=num_workers,
        train_transforms=train_transforms,
        test_transforms=test_transforms,
        val_transforms=test_transforms
    )
    return cifar10_dm

### Resnet
___
Modify the pre-existing Resnet architecture from TorchVision. The pre-existing architecture is based on ImageNet images (224x224) as input. So we need to modify it for CIFAR10 images (32x32).

In [None]:
def create_model():
    model = torchvision.models.resnet18(pretrained=False, num_classes=10)
    model.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    model.maxpool = nn.Identity()
    return model

### Lightning Module
___
Check out the [configure_optimizers](https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#configure-optimizers) method to use custom Learning Rate schedulers. The OneCycleLR with SGD will get you to around 92-93% accuracy in 20-30 epochs and 93-94% accuracy in 40-50 epochs. Feel free to experiment with different LR schedules from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate

In [None]:
class LitResnet(LightningModule):

    def __init__(self, learning_rate=0.05):
        super().__init__()

        self.save_hyperparameters()
        self.model = create_model()
        self.example_input_array = torch.Tensor(64, 3, 32, 32)

    def forward(self, x):
        out = self.model(x)
        return F.log_softmax(out, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log("train_loss", loss)
        return loss

    def evaluate(self, batch, stage=None):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)

        if stage:
            self.log(f"{stage}_loss", loss, prog_bar=True)
            self.log(f"{stage}_acc", acc, prog_bar=True)

    def validation_step(self, batch, batch_idx):
        self.evaluate(batch, "val")

    def test_step(self, batch, batch_idx):
        self.evaluate(batch, "test")

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(
            self.parameters(),
            lr=self.hparams.learning_rate,
            momentum=0.9,
            weight_decay=5e-4,
        )
        steps_per_epoch = 45000
        scheduler_dict = {
            "scheduler": OneCycleLR(
                optimizer,
                0.1,
                epochs=self.trainer.max_epochs,
                steps_per_epoch=steps_per_epoch,
            ),
            "interval": "step",
        }
        return {"optimizer": optimizer, "lr_scheduler": scheduler_dict}

In [None]:
seed_everything(7)
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
BATCH_SIZE = 64
NUM_WORKERS = int(os.cpu_count() / 2)
data_module = prepare_data(PATH_DATASETS, BATCH_SIZE, NUM_WORKERS)

In [None]:
pl_model = LitResnet(learning_rate=0.05)
pl_model.datamodule = data_module
trainer = Trainer(num_processes = 1,
                  use_ipex = False,
                  progress_bar_refresh_rate=10,
                  max_epochs=30,
                  logger=TensorBoardLogger("lightning_logs/", name="basic"),
                  callbacks=[LearningRateMonitor(logging_interval="step")])
trainer.fit(pl_model, datamodule=data_module)
trainer.test(pl_model, datamodule=data_module)

### Get Accelerated Module
---
Use Train.trace from bigdl.nano.pytorch.trainer to convert a model into an accelerated module for inference.
The definition of trace is:
```
trace(model: nn.Module, input_sample=None, accelerator=None)

      :param model: An torch.nn.Module model, including pl.LightningModule.
      
      :param input_sample: A set of inputs for trace, defaults to None if you have trace before or
                             model is a LightningModule with an example_input_array.
                             
      :param accelerator: The accelerator to use, defaults to None meaning staying in Pytorch
                            backend. 'openvino' and 'onnxruntime' are supported for now.
                            
      :return: Model with different acceleration(OpenVINO/ONNX Runtime).
```
- *Note* <br>
trace is a class method. You should use your Trainer class to call it instead of the Trainer instance.

In [None]:
onnx_model = Trainer.trace(pl_model, accelerator="onnxruntime")
start = time()
for x, _ in pred_loader:
    inference_res_onnx = onnx_model(x)
onnx_infer_time = time() - start

In [None]:
openvino_model = Trainer.trace(pl_model, accelerator="openvino")
start = time()
for x, _ in data_module.test_dataloader():
    inference_res_openvino = openvino_model(x)
openvino_infer_time = time() - start

In [None]:
template = """
|    Precision   | Inference Time(s) |
|     Pytorch    |       {:5.2f}       |
|      ONNX      |       {:5.2f}       |
|    Openvino    |       {:5.2f}       |
"""
summary = template.format(
    
    infer_time,
    onnx_infer_time,
    openvino_infer_time
)
print(summary)

### Calibrate Model
Use Trainer.quantize from bigdl.nano.pytorch.trainer to calibrate a Pytorch-Lightning model for post-training quantization. Here are some important paramters:
```
:param pl_model:         A Pytorch-Lightning model to be quantized.
:param calib_dataloader:         A torch.utils.data.dataloader.DataLoader object for calibration.     
                                 Required for static quantization.
:param approach:         'static' or 'dynamic'.
                         'static': post_training_static_quant,
                         'dynamic': post_training_dynamic_quant.
                          Default: 'static'.

```
Access more details from [Source](https://github.com/intel-analytics/BigDL/blob/main/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py)

In [None]:
i8_model = trainer.quantize(pl_model, calib_dataloader=data_module.test_dataloader())
start = time()
for x, _ in data_module.test_dataloader():
    inference_res_i8 = i8_model(x)
i8_inference_time = time() - start
outputs = trainer.test(i8_model, datamodule=data_module)
i8_acc = outputs[0]['test_acc'] * 100

In [None]:
template = """
|    Precision   | Inference Time(s) | Accuracy(%) |
|      FP32      |       {:5.2f}       |    {:5.2f}    |
|      INT8      |       {:5.2f}       |    {:5.2f}    |
| Improvement(%) |       {:5.2f}       |    {:5.2f}    |
"""
summary = template.format(
    infer_time, fp32_acc,
    i8_inference_time, i8_acc,
    (1 - i8_inference_time /infer_time) * 100,
    i8_acc - fp32_acc
)
print(summary)