# AWS Neuron compilation on Yolov8

This notebook shows how to compile Yolov8/Pytorch to AWS Inferentia (inf1 instances) using NeuronSDK.

Reference: 
- Model Prediction with Ultralytics YOLO
    - https://docs.ultralytics.com/modes/predict/

In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
print(os.getcwd())
sys.path.append(os.path.abspath(".."))

# for i in sys.path:
#     print(i)


/home/ubuntu/lab/03-yolo8-inf1/notebook


## 1. Neuron Compilation using  Native Neuron SDK

### Load yolo8 model using ultralytics Lib

In [2]:
from ultralytics import YOLO

model = YOLO("../model/yolov8n.pt", task="detect")


  from .autonotebook import tqdm as notebook_tqdm


### Compile pytorch model to neuron model
- When having an error, skip this cell

In [None]:
from utils.local_util import * 

In [4]:
import torch
import torch_neuron
import os

pt_model_path = '../model/yolov8n.pt'
neuron_model_path = "../model/traced_yolo8_model_neuron.pt"

# generate dummy input example
batch_sizes = 1
input_shape = (batch_sizes, 3, 640, 640)
inputs_example = torch.ones(input_shape)  # or numpy array for TF, MX
print("input example shape: ", inputs_example.shape)


if os.path.exists(neuron_model_path):
    # Load the existing model
    neuron_model = load_neuron_model(neuron_model_path)
    print(f"Loaded existing model from {neuron_model_path}")
else:
    # trace the model forward
    neuron_model = torch_neuron.trace(model.model.eval(), inputs_example)
    print(f"Compile and Load model from pytorch model, {pt_model_path}, and neuron model, {neuron_model_path}")
    print(f"Neuron model is saved at, {neuron_model_path}")
    save_neuron_model(model=trace, path=neuron_model_path)

input example shape:  torch.Size([1, 3, 640, 640])
/home/ubuntu/lab/03-yolo8-inf1/model/traced_yolo8_model_neuron.pt is given
Loaded existing model from ../model/traced_yolo8_model_neuron.pt


### Inference on neuron model

##### infereince on dummy data

In [None]:
result_neuron = neuron_model(inputs_example)
print("result_neuron: ", len(result_neuron), ", shape: ", result_neuron[0].shape)

result_neuron:  2 , shape:  torch.Size([1, 84, 8400])




##### infereince on bus image and post_processing

In [6]:
import cv2
import numpy as np
from ultralytics import YOLO

# convert image to numpy array which shapes, [1,3,640,640]
image_path = "../test_image/bus.jpg"
preprocessed_image, original_size = preprocess_image(image_path)

print("preprocessed_image: ", preprocessed_image.shape)
print("original_size: ", original_size)

preprocessed_image_torch = torch.from_numpy(preprocessed_image)

# inference on neuron model
result_neuron = neuron_model(preprocessed_image_torch)
print("result_neuron: ", len(result_neuron), ", shape:", result_neuron[0].shape)

# convert tensor to numpy array, [1,84,8400]
result_np = result_neuron[0].numpy()
print(result_np.shape)

# post_process for showing bound box
post_process_ultralytics(input_image=image_path, outputs=result_np)

preprocessed_image:  (1, 3, 640, 640)
original_size:  (1080, 810)
result_neuron:  2 , shape: torch.Size([1, 84, 8400])
(1, 84, 8400)


[{'class_id': 0,
  'class_name': 'person',
  'confidence': 0.8887587785720825,
  'box': [478.0, 226.0, 84.0, 296.0],
  'scale': 1.6875},
 {'class_id': 0,
  'class_name': 'person',
  'confidence': 0.8807970881462097,
  'box': [210.75, 241.0, 72.5, 266.0],
  'scale': 1.6875},
 {'class_id': 0,
  'class_name': 'person',
  'confidence': 0.8774768114089966,
  'box': [109.25, 236.0, 115.5, 300.0],
  'scale': 1.6875},
 {'class_id': 5,
  'class_name': 'bus',
  'confidence': 0.8459424376487732,
  'box': [97.0, 137.0, 458.0, 322.0],
  'scale': 1.6875},
 {'class_id': 0,
  'class_name': 'person',
  'confidence': 0.4234580993652344,
  'box': [79.875, 326.0, 34.25, 188.0],
  'scale': 1.6875}]

## 2. Compile and inference using ultralytics lib

### Load pytorch model, yolo8, and compile it to neuron model

In [7]:
from ultralytics import YOLO

import os

pt_model_path = '../model/yolov8n.pt'
neuron_model_path = '../model/yolov8n.neuron'

if os.path.exists(neuron_model_path):
    # Load the existing model
    # m_inf= YOLO("../model/traced_yolo8_model_neuron.pt", task="detect")
    m_inf= YOLO(neuron_model_path, task="detect")
    print(f"Loaded existing model from {neuron_model_path}")
else:
    mx=YOLO(pt_model_path)
    mx.export(format="neuron")
    # m_inf= YOLO("model/yolov8n.neuron", task="detect")
    m_inf= YOLO(neuron_model_path, task="detect")
    print(f"Compile and Load model from pytorch model, {pt_model_path}, and neuron model, {neuron_model_path}")



Loaded existing model from ../model/yolov8n.neuron


### inference on neuron model

In [8]:
results = m_inf.predict("../test_image/bus.jpg", 
                            # show=True,
                            save=True, 
                            save_txt=True, 
                            save_crop=True, 
                            save_conf=True,
                            project='result_image')


Loading ../model/yolov8n.neuron for Neuron (NeuronCore-v1) inference...

image 1/1 /home/ubuntu/lab/03-yolo8-inf1/notebook/../test_image/bus.jpg: 640x640 4 persons, 1 bus, 29.4ms
Speed: 5.7ms preprocess, 29.4ms inference, 74.9ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mresult_image/predict[0m
1 label saved to result_image/predict/labels


### Bounding Box information
Refer to the link 
- [Model Prediction with Ultralytics YOLO](https://docs.ultralytics.com/modes/predict/#working-with-results)

In [None]:
# View results
for r in results:
    print(r.boxes)  # print the Boxes object containing the detection bounding boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0., 0., 0., 5., 0.])
conf: tensor([0.8909, 0.8833, 0.8779, 0.8442, 0.4408])
data: tensor([[6.7083e+02, 3.8008e+02, 8.0986e+02, 8.7969e+02, 8.9086e-01, 0.0000e+00],
        [2.2162e+02, 4.0706e+02, 3.4353e+02, 8.5626e+02, 8.8332e-01, 0.0000e+00],
        [5.0671e+01, 3.9760e+02, 2.4420e+02, 9.0507e+02, 8.7790e-01, 0.0000e+00],
        [3.1541e+01, 2.3063e+02, 8.0153e+02, 7.7584e+02, 8.4424e-01, 5.0000e+00],
        [4.2298e-01, 5.4981e+02, 5.7900e+01, 8.6834e+02, 4.4076e-01, 0.0000e+00]])
id: None
is_track: False
orig_shape: (1080, 810)
shape: torch.Size([5, 6])
xywh: tensor([[740.3431, 629.8870, 139.0354, 499.6159],
        [282.5750, 631.6615, 121.9024, 449.1991],
        [147.4372, 651.3355, 193.5327, 507.4696],
        [416.5346, 503.2327, 769.9878, 545.2150],
        [ 29.1616, 709.0754,  57.4772, 318.5244]])
xywhn: tensor([[0.9140, 0.5832, 0.1716, 0.4626],
        [0.3489, 0.5849, 0.1505, 0.4159],
        [0.18

### Benchmarking

In [10]:
# # 이미지 경로
# image_path = "../test_image/bus.jpg"

# # 벤치마킹 실행
# results = benchmark_inference(m_inf, image_path, 
#                               num_runs=50, num_warmup=10)

# # 결과 출력
# print(f"Average Inference Time: {results['average_time']:.2f} ms")
# print(f"Standard Deviation: {results['std_dev']:.2f} ms")
# print(f"Min Inference Time: {results['min_time']:.2f} ms")
# print(f"Max Inference Time: {results['max_time']:.2f} ms")

# # 히스토그램 그리기 (선택사항)
# import matplotlib.pyplot as plt

# plt.hist(results['all_times'], bins=20)
# plt.title('Inference Time Distribution')
# plt.xlabel('Time (ms)')
# plt.ylabel('Frequency')
# plt.show()

In [11]:
# print("result_inf2): \n", result)