# Classification example inference with Ryzen AI

This example demonstrates the 5 steps of classification model inference on the embedded Neural Processing Unit (NPU) in your AMD Ryzen AI enabled PC. The steps are as follows:

In [None]:
# Before starting, be sure you've installed the requirements listed in the requirements.txt file:
!python -m pip install -r requirements.txt

### 1. Get Model
Here, we'll use the resnet50 model as an example. You may choose any classification models train with Imagenet from torchvision.

In [None]:
import torch
from torchvision.models import resnet50, ResNet50_Weights
from classification_utils import get_directories

_, models_dir = get_directories()

# load model from torchvision
model = resnet50(weights="IMAGENET1K_V2")

# Save the model
model.to("cpu")
torch.save(model, str(models_dir / "resnet50.pt"))


### 2. Export to ONNX

The model inference with Ryzen AI is based on onnxruntime. The following code is used for exporting a PyTorch model to the ONNX (Open Neural Network Exchange) format. The ONNX file is needed to use the VitisAI Quantizer. 

In [None]:
# Prep for ONNX export
dummy_inputs = torch.randn(1, 3, 224, 224)
input_names = ['input']
output_names = ['output']
dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
tmp_model_path = str(models_dir / "resnet50.onnx")

# Call export function
torch.onnx.export(
        model,
        dummy_inputs,
        tmp_model_path,
        export_params=True,
        opset_version=13,  # Recommended opset
        input_names=input_names,
        output_names=output_names,
        dynamic_axes=dynamic_axes,
    )
    

### 3. Quantize Model

Using the static quantization method provided by the Vitis AI Quantizer and providing the newly exported ONNX model, we'll quantize the model to INT8. For more information on this quantization method, see [Vitis AI ONNX Quantization](https://ryzenai.docs.amd.com/en/latest/vai_quant/vai_q_onnx.html).

In [None]:
import os
import torch
import torchvision
from torch.utils.data import DataLoader

import onnx
import onnxruntime
from onnxruntime.quantization import CalibrationDataReader, QuantType, QuantFormat, CalibrationMethod, quantize_static
import vai_q_onnx

data_dir = "../../Ryzen-AI-Model-Zoo/dataset/imagenet"

# `input_model_path` is the path to the original, unquantized ONNX model.
input_model_path = "models/resnet50.onnx"

# `output_model_path` is the path where the quantized model will be saved.
output_model_path = "models/resnet50_quantized.onnx"

preprocess = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

calib_dir = os.path.join(data_dir, 'calib')
calib_dataset = torchvision.datasets.ImageFolder(root=calib_dir, transform=preprocess)

class ClassificationCalibrationDataReader(CalibrationDataReader):
    def __init__(self, calib_dir: str, batch_size: int = 1):
        super().__init__()
        self.iterator = iter(DataLoader(calib_dir, batch_size))

    def get_next(self) -> dict:
        try:
            images, labels = next(self.iterator)
            return {"input": images.numpy()}
        except Exception:
            return None


def classification_calibration_reader(calib_dir, batch_size=1):
    return ClassificationCalibrationDataReader(calib_dir, batch_size=batch_size)

dr = classification_calibration_reader(calib_dataset)

vai_q_onnx.quantize_static(
    input_model_path,
    output_model_path,
    dr,
    quant_format=vai_q_onnx.QuantFormat.QDQ,
    calibrate_method=vai_q_onnx.PowerOfTwoMethod.MinMSE,
    activation_type=vai_q_onnx.QuantType.QUInt8,
    weight_type=vai_q_onnx.QuantType.QInt8,
    enable_ipu_cnn=True, 
    extra_options={'ActivationSymmetric': True} 
)
print('Calibrated and quantized model saved at:', output_model_path)

### 4. Model inference on CPU / iGPU / NPU with single image

Now we have successfully quantized the model, and we will use the onnxruntime to do the inference on CPU, iGPU and NPU.
A single image is applied to compare the execution time on different processors. The image pre and post processing function is defined as below.

In [None]:
import numpy as np 
import json

# display images in notebook
import matplotlib.pyplot as plt
import torchvision.transforms as transforms 
from PIL import Image, ImageDraw, ImageFont

%matplotlib inline

def load_labels(path):
    with open(path) as f:
        data = json.load(f)
    return np.asarray(data)

def preprocess(input):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((224, 224)),
        normalize,
    ])
    img_tensor = transform(input).unsqueeze(0)
    return img_tensor.numpy()
    
def softmax(x):
    x = x.reshape(-1)
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def postprocess(result):
    return softmax(np.array(result)).tolist()
labels = load_labels('data/imagenet-simple-labels.json')
image = Image.open('data/dog.jpg')

print("Image size: ", image.size)
plt.axis('off')
display_image = plt.imshow(image)
input_data = preprocess(image)

#### CPU Inference

In [None]:
import onnxruntime
import numpy as np
import time

# Specify the path to the quantized ONNX Model
onnx_model_path = "models/resnet50_quantized.onnx"

cpu_options = onnxruntime.SessionOptions()

# Create Inference Session to run the quantized model on the CPU
cpu_session = onnxruntime.InferenceSession(
    onnx_model_path,
    providers = ['CPUExecutionProvider'],
    sess_options=cpu_options,
)
start = time.time()
cpu_outputs = cpu_session.run(None, {'input': input_data})
end = time.time()

cpu_results = postprocess(cpu_outputs)
inference_time = np.round((end - start) * 1000, 2)
idx = np.argmax(cpu_results)

print('----------------------------------------')
print('Final top prediction is: ' + labels[idx])
print('----------------------------------------')

print('----------------------------------------')
print('Inference time: ' + str(inference_time) + " ms")
print('----------------------------------------')

sort_idx = np.flip(np.squeeze(np.argsort(cpu_results)))
print('------------ Top 5 labels are: ----------------------------')
print(labels[sort_idx[:5]])
print('-----------------------------------------------------------')

#### iGPU Inference

We will leverage the onnxruntime DirectML ep to inference the model on AMD radeon 780m iGPU.

In [None]:
#DML options
dml_options = onnxruntime.SessionOptions()

# Create Inference Session to run the quantized model on the iGPU
dml_session = onnxruntime.InferenceSession(
    onnx_model_path,
    providers = ['DmlExecutionProvider'],
    provider_options = [{"device_id": "0"}]
)
start = time.time()
dml_outputs = dml_session.run(None, {'input': input_data})
end = time.time()

dml_results = postprocess(dml_outputs)
inference_time = np.round((end - start) * 1000, 2)
idx = np.argmax(dml_results)

print('----------------------------------------')
print('Final top prediction is: ' + labels[idx])
print('----------------------------------------')

print('----------------------------------------')
print('Inference time: ' + str(inference_time) + " ms")
print('----------------------------------------')

sort_idx = np.flip(np.squeeze(np.argsort(dml_results)))
print('------------ Top 5 labels are: ----------------------------')
print(labels[sort_idx[:5]])
print('-----------------------------------------------------------') 

#### NPU Inference

In [None]:
# Compile and run

# Point to the config file path used for the VitisAI Execution Provider
config_file_path = "./vaip_config.json"
provider_options = [{
              'config_file': config_file_path,
              'ai_analyzer_visualization': True,
              'ai_analyzer_profiling': True,
          }]

npu_session = onnxruntime.InferenceSession(
    onnx_model_path,
    providers = ['VitisAIExecutionProvider'],
    provider_options = provider_options
)

start = time.time()
npu_outputs = npu_session.run(None, {'input': input_data})
end = time.time()

npu_results = postprocess(npu_outputs)
inference_time = np.round((end - start) * 1000, 2)
idx = np.argmax(npu_results)

print('----------------------------------------')
print('Final top prediction is: ' + labels[idx])
print('----------------------------------------')

print('----------------------------------------')
print('Inference time: ' + str(inference_time) + " ms")
print('----------------------------------------')

sort_idx = np.flip(np.squeeze(np.argsort(npu_results)))
print('------------ Top 5 labels are: ----------------------------')
print(labels[sort_idx[:5]])
print('-----------------------------------------------------------')

### 5. Model Analysis on NPU

After NPU inference, there are several '.json' files generated by the Ryzen AI tracing tool, which could be open by the AI Analyzer for further optimization.

In [None]:
!aianalyzer ./ -p 8001