In [1]:
import sys
sys.path.append('./YOLOv8-test')

### Inference test


In [2]:
import torch
import yaml
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
import time
import os
from tqdm import tqdm
from nets import nn
from utils.util import non_max_suppression
import onnx
import onnxruntime
from deepsparse import Engine
from ultralytics import YOLO

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
global batch_size, class_no
batch_size = 1
class_no = 80
torch.jit.enable_onednn_fusion(True)
def load_custom_model(weights_path, num_classes):
    model = nn.yolo_v8_n(num_classes).cpu()
    ckpt = torch.load(weights_path, map_location='cpu')
    model.load_state_dict(ckpt['model'].float().state_dict(), strict=False)
    model.eval()
    return model.fuse()


def preprocess_images(image_folder, input_size):
    resize_transform = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
    ])

    image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
    batches = []
    
    preprocess_time = 0  # Timer for pre-processing

    for i in range(0, len(image_files), batch_size):
        batch = []
        for j in range(i, min(i + batch_size, len(image_files))):
            start_time = time.time()
            image = Image.open(os.path.join(image_folder, image_files[j]))
            tensor = resize_transform(image)
            tensor = tensor / 255
            end_time = time.time()
            preprocess_time += end_time - start_time
            
            batch.append(tensor)
        batches.append(torch.stack(batch))
    
    print(f"Preprocessing Time per image ({input_size}x{input_size}): {preprocess_time/batch_size:.4f} seconds")
    return batches

@torch.inference_mode()
def inference_pytorch_cpu(model, batches, num_warmup=5):
    # Warm-up runs
    torch.compile(model)
    for _ in range(num_warmup):
        with torch.no_grad():
            _ = model(batches[0])
    
    total_time = 0
    post_processing_time = 0  # Timer for post-processing
    
    for batch in tqdm(batches, desc="Processing images (PyTorch CPU)"):
        with torch.no_grad():
            start_time = time.time()
            pred = model(batch)
            end_time = time.time()
            inference_time = end_time - start_time
            total_time += inference_time
            
            # Post-processing (if any)
            start_time = time.time()
            pred = non_max_suppression(pred,classes=class_no)
            end_time = time.time()
            post_processing_time += end_time - start_time
            
    avg_inference_time = total_time / batch_size
    avg_post_processing_time = post_processing_time / batch_size
    
    print(f"Average Inference Time (PyTorch CPU): {avg_inference_time:.4f} seconds")
    print(f"Average Post-Processing Time (PyTorch CPU): {avg_post_processing_time:.4f} seconds")
    
    return avg_inference_time, avg_post_processing_time


def inference_onnx(onnx_path, batches, num_warmup=5):
    session = onnxruntime.InferenceSession(onnx_path, providers=['CPUExecutionProvider'])
    input_name = session.get_inputs()[0].name

    # Warm-up runs
    for _ in range(num_warmup):
        _ = session.run(None, {input_name: batches[0].numpy()})

    total_time = 0
    post_processing_time = 0  # Timer for post-processing
    
    for batch in tqdm(batches, desc="Processing images (ONNX)"):
        start_time = time.time()
        pred = session.run(None, {input_name: batch.numpy()})
        end_time = time.time()
        inference_time = end_time - start_time
        total_time += inference_time
        
        # Post-processing (if any)
        start_time = time.time()
        non_max_suppression(torch.tensor(pred[0]),classes=class_no)
        end_time = time.time()
        post_processing_time += end_time - start_time
        
    avg_inference_time = total_time / batch_size
    avg_post_processing_time = post_processing_time / batch_size
    
    print(f"Average Inference Time (ONNX): {avg_inference_time:.4f} seconds")
    print(f"Average Post-Processing Time (ONNX): {avg_post_processing_time:.4f} seconds")
    
    return avg_inference_time, avg_post_processing_time

def inference_deepsparse(onnx_path, batches, num_warmup=5):
    pipe = Engine(onnx_path, batch_size=batch_size)

    for _ in range(num_warmup):
        _ = pipe([batches[0].numpy()])

    total_time = 0
    post_processing_time = 0  # Timer for post-processing
    
    for batch in tqdm(batches, desc="Processing images (DeepSparse)"):
        start_time = time.time()
        pred = pipe([batch.numpy()])
        end_time = time.time()
        inference_time = end_time - start_time
        total_time += inference_time
        
        start_time = time.time()
        non_max_suppression(torch.tensor(pred[0]),classes=class_no)
        end_time = time.time()
        post_processing_time += end_time - start_time
        
    avg_inference_time = total_time / batch_size
    avg_post_processing_time = post_processing_time / batch_size
    
    print(f"Average Inference Time (DeepSparse): {avg_inference_time:.4f} seconds")
    print(f"Average Post-Processing Time (DeepSparse): {avg_post_processing_time:.4f} seconds")
    
    return avg_inference_time, avg_post_processing_time

def inference_ultralytics(model, batches, num_warmup=5):
    # Warm-up runsYOLOv8-test/weights/yolov8_model_slim.onnx

    for _ in range(num_warmup):
        _ = model.model(batches[0])

    total_time = 0
    post_processing_time = 0  # Timer for post-processing
    
    for batch in tqdm(batches, desc="Processing images (DeepSparse)"):
        start_time = time.time()
        pred = model.model(batch)
        end_time = time.time()
        inference_time = end_time - start_time
        total_time += inference_time
        
        start_time = time.time()
        non_max_suppression(torch.tensor(pred[0]),classes=class_no)
        end_time = time.time()
        post_processing_time += end_time - start_time
        
    avg_inference_time = total_time / batch_size
    avg_post_processing_time = post_processing_time / batch_size
    
    print(f"Average Inference Time (Ultralytics.pt): {avg_inference_time:.4f} seconds")
    print(f"Average Post-Processing Time (Ultralytics.pt): {avg_post_processing_time:.4f} seconds")
    
    return avg_inference_time, avg_post_processing_time

def run_comparisons(input_size):
    print(f"\nRunning comparisons for {input_size}x{input_size} images:")
    
    # Prepare data
    batches = preprocess_images('./datasets/coco128/images/train2017', input_size)

    # Your custom YOLOv8 implementations
    dummy_input = torch.randn(1, 3, input_size, input_size)  
    custom_model = load_custom_model('./YOLOv8-test/weights/v8_n(1).pt', class_no)

    onnx_path_custom = f'./YOLOv8-test/weights/yolov8_custom_{input_size}.onnx'
    torch.onnx.export(custom_model, 
                  dummy_input, 
                  onnx_path_custom,
                  opset_version=13,
                  input_names=['input'],
                  output_names=['output'],
                  dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})
    
    # Ultralytics YOLOv8
    ultralytics_model = YOLO('yolov8n.pt')
    ultralytics_model.fuse()
    ultralytics_model.export(format="onnx", batch=1, imgsz=input_size,opset=13)

    # Run inferences with warm-up
    num_warmup = 10  # Number of warm-up runs
    pytorch_cpu_time = inference_pytorch_cpu(custom_model, batches, num_warmup)
    onnx_time = inference_onnx(onnx_path_custom, batches, num_warmup)
    deepsparse_time = inference_deepsparse(onnx_path_custom, batches, num_warmup)
    ultralytics_time = inference_ultralytics(ultralytics_model, batches, num_warmup)
    ultralytics_onnx_time = inference_onnx(f'./yolov8n.onnx', batches, num_warmup)
    ultralytics_deepsparse = inference_deepsparse(f'./yolov8n.onnx', batches, num_warmup)

    return {
        'pytorch_cpu': pytorch_cpu_time,
        'onnx': onnx_time,
        'deepsparse': deepsparse_time,
        'ultralytics': ultralytics_time,
        'ultralytics_onnx': ultralytics_onnx_time,
        'ultralytics_deepsparse': ultralytics_deepsparse
    }

def main():
    with open('./YOLOv8-test/utils/args.yaml', errors='ignore') as f:
        params = yaml.safe_load(f)

    results_1280 = run_comparisons(1280)
    results_640 = run_comparisons(640)
    results_256 = run_comparisons(256)
    

    print("\nComparison Results:")
    print("640x640 Images:")
    for model, times in results_640.items():
        print(f"{model}: Inference Time = {times[0]:.4f}s, Post-processing Time = {times[1]:.4f}s")
    
    print("\n256x256 Images:")
    for model, times in results_256.items():
        print(f"{model}: Inference Time = {times[0]:.4f}s, Post-processing Time = {times[1]:.4f}s")

if __name__ == "__main__":
    main()  


Running comparisons for 1280x1280 images:
Preprocessing Time per image (1280x1280): 2.4218 seconds


  ckpt = torch.load(weights_path, map_location='cpu')
  return torch.load(file, map_location='cpu'), file  # load
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs
Ultralytics YOLOv8.0.124 🚀 Python-3.11.9 torch-2.4.0+cu124 CPU

[34m[1mPyTorch:[0m starting from yolov8n.pt with input shape (1, 3, 1280, 1280) BCHW and output shape(s) (1, 84, 33600) (6.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 13...
[34m[1mONNX:[0m export success ✅ 0.4s, saved as yolov8n.onnx (12.7 MB)

Export complete (0.8s)
Results saved to [1m/home/muhammadfasi/Downloads/YOLOV8[0m
Predict:         yolo predict task=detect model=yolov8n.onnx imgsz=1280 
Validate:        yolo val task=detect model=yolov8n.onnx imgsz=1280 data=coco.yaml 
Visualize:       https://netron.app
Processing images (PyTorch CPU): 100%|██████████| 127/127 [00:15<00:00,  8.04it/s]


Average Inference Time (PyTorch CPU): 15.6868 seconds
Average Post-Processing Time (PyTorch CPU): 0.0290 seconds


Processing images (ONNX): 100%|██████████| 127/127 [00:19<00:00,  6.46it/s]


Average Inference Time (ONNX): 17.0578 seconds
Average Post-Processing Time (ONNX): 2.5141 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:06<00:00, 18.83it/s]


Average Inference Time (DeepSparse): 6.5325 seconds
Average Post-Processing Time (DeepSparse): 0.1813 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:14<00:00,  8.59it/s]


Average Inference Time (Ultralytics.pt): 14.5750 seconds
Average Post-Processing Time (Ultralytics.pt): 0.1403 seconds


Processing images (ONNX): 100%|██████████| 127/127 [00:18<00:00,  6.94it/s]


Average Inference Time (ONNX): 16.6907 seconds
Average Post-Processing Time (ONNX): 1.5047 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:06<00:00, 18.72it/s]


Average Inference Time (DeepSparse): 6.5735 seconds
Average Post-Processing Time (DeepSparse): 0.1786 seconds

Running comparisons for 640x640 images:
Preprocessing Time per image (640x640): 0.4791 seconds


  ckpt = torch.load(weights_path, map_location='cpu')
  return torch.load(file, map_location='cpu'), file  # load
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs
Ultralytics YOLOv8.0.124 🚀 Python-3.11.9 torch-2.4.0+cu124 CPU

[34m[1mPyTorch:[0m starting from yolov8n.pt with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 13...
[34m[1mONNX:[0m export success ✅ 0.3s, saved as yolov8n.onnx (12.2 MB)

Export complete (0.3s)
Results saved to [1m/home/muhammadfasi/Downloads/YOLOV8[0m
Predict:         yolo predict task=detect model=yolov8n.onnx imgsz=640 
Validate:        yolo val task=detect model=yolov8n.onnx imgsz=640 data=coco.yaml 
Visualize:       https://netron.app
Processing images (PyTorch CPU): 100%|██████████| 127/127 [00:03<00:00, 33.31it/s]


Average Inference Time (PyTorch CPU): 3.7760 seconds
Average Post-Processing Time (PyTorch CPU): 0.0175 seconds


Processing images (ONNX): 100%|██████████| 127/127 [00:05<00:00, 24.18it/s]


Average Inference Time (ONNX): 3.8394 seconds
Average Post-Processing Time (ONNX): 1.3650 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:02<00:00, 51.27it/s]


Average Inference Time (DeepSparse): 2.4012 seconds
Average Post-Processing Time (DeepSparse): 0.0647 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:03<00:00, 39.20it/s]


Average Inference Time (Ultralytics.pt): 3.1928 seconds
Average Post-Processing Time (Ultralytics.pt): 0.0320 seconds


Processing images (ONNX): 100%|██████████| 127/127 [00:05<00:00, 22.54it/s]


Average Inference Time (ONNX): 3.8457 seconds
Average Post-Processing Time (ONNX): 1.7374 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:02<00:00, 48.12it/s]


Average Inference Time (DeepSparse): 2.5676 seconds
Average Post-Processing Time (DeepSparse): 0.0598 seconds

Running comparisons for 256x256 images:
Preprocessing Time per image (256x256): 0.3121 seconds


  ckpt = torch.load(weights_path, map_location='cpu')
  return torch.load(file, map_location='cpu'), file  # load
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs
Ultralytics YOLOv8.0.124 🚀 Python-3.11.9 torch-2.4.0+cu124 CPU

[34m[1mPyTorch:[0m starting from yolov8n.pt with input shape (1, 3, 256, 256) BCHW and output shape(s) (1, 84, 1344) (6.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 13...
[34m[1mONNX:[0m export success ✅ 0.2s, saved as yolov8n.onnx (12.1 MB)

Export complete (0.3s)
Results saved to [1m/home/muhammadfasi/Downloads/YOLOV8[0m
Predict:         yolo predict task=detect model=yolov8n.onnx imgsz=256 
Validate:        yolo val task=detect model=yolov8n.onnx imgsz=256 data=coco.yaml 
Visualize:       https://netron.app
Processing images (PyTorch CPU): 100%|██████████| 127/127 [00:01<00:00, 89.89it/s]


Average Inference Time (PyTorch CPU): 1.3930 seconds
Average Post-Processing Time (PyTorch CPU): 0.0104 seconds


Processing images (ONNX): 100%|██████████| 127/127 [00:03<00:00, 35.35it/s]


Average Inference Time (ONNX): 1.1541 seconds
Average Post-Processing Time (ONNX): 2.4086 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:01<00:00, 78.83it/s]


Average Inference Time (DeepSparse): 1.5794 seconds
Average Post-Processing Time (DeepSparse): 0.0244 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:01<00:00, 98.03it/s]


Average Inference Time (Ultralytics.pt): 1.2745 seconds
Average Post-Processing Time (Ultralytics.pt): 0.0151 seconds


Processing images (ONNX): 100%|██████████| 127/127 [00:03<00:00, 35.65it/s]


Average Inference Time (ONNX): 1.2994 seconds
Average Post-Processing Time (ONNX): 2.2389 seconds


Processing images (DeepSparse): 100%|██████████| 127/127 [00:01<00:00, 70.47it/s]

Average Inference Time (DeepSparse): 1.7650 seconds
Average Post-Processing Time (DeepSparse): 0.0296 seconds

Comparison Results:
640x640 Images:
pytorch_cpu: Inference Time = 3.7760s, Post-processing Time = 0.0175s
onnx: Inference Time = 3.8394s, Post-processing Time = 1.3650s
deepsparse: Inference Time = 2.4012s, Post-processing Time = 0.0647s
ultralytics: Inference Time = 3.1928s, Post-processing Time = 0.0320s
ultralytics_onnx: Inference Time = 3.8457s, Post-processing Time = 1.7374s
ultralytics_deepsparse: Inference Time = 2.5676s, Post-processing Time = 0.0598s

256x256 Images:
pytorch_cpu: Inference Time = 1.3930s, Post-processing Time = 0.0104s
onnx: Inference Time = 1.1541s, Post-processing Time = 2.4086s
deepsparse: Inference Time = 1.5794s, Post-processing Time = 0.0244s
ultralytics: Inference Time = 1.2745s, Post-processing Time = 0.0151s
ultralytics_onnx: Inference Time = 1.2994s, Post-processing Time = 2.2389s
ultralytics_deepsparse: Inference Time = 1.7650s, Post-proces




In [3]:
pip install onnxruntime

Note: you may need to restart the kernel to use updated packages.


**Heatmap**

In [None]:
import torch
import yaml
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
import time
import os
from tqdm import tqdm
from nets import nn
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt

def load_custom_model(weights_path, num_classes):
    model = nn.yolo_v8_n(num_classes).cuda()
    ckpt = torch.load(weights_path, map_location='cuda')
    model.load_state_dict(ckpt['model'].float().state_dict())
    model.eval()
    return model.fuse().half()

def preprocess_images(image_folder, input_size=640, batch_size=1):
    resize_transform = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor()
    ])
    
    image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
    batches = []
    
    for i in range(64, min(74, len(image_files)), batch_size):
        batch = []
        for j in range(i, min(i + batch_size, len(image_files))):
            if j == 62:
                continue
            image = Image.open(os.path.join(image_folder, image_files[j]))
            tensor = resize_transform(image)
            batch.append(tensor.half())
        
        batches.append(torch.stack(batch).cuda())
    return batches

def create_heatmap(feature_map, writer, layer_name, global_step):
    feature_map = feature_map.squeeze().cpu().numpy().mean(axis=0)
    
    plt.figure(figsize=(10, 10))
    plt.imshow(feature_map, cmap='hot', interpolation='nearest')
    plt.colorbar()
    plt.title(f'Heatmap of {layer_name}')
    plt.axis('off')
    
    # Save the figure as a static image
    output_dir = '/kaggle/working/heatmaps'
    os.makedirs(output_dir, exist_ok=True)
    plt.savefig(f'{output_dir}/{layer_name}_{global_step}.png')
    
    # Log to TensorBoard
    writer.add_figure(f'Heatmap/{layer_name}', plt.gcf(), global_step)
    plt.close()

def inference(params):
    writer = SummaryWriter('/kaggle/working/tensorboard_logs')
    
    custom_model = load_custom_model('/kaggle/input/yolov8/pytorch/default/1/v8_n(1).pt', len(params['names']))
    
    batches = preprocess_images('/kaggle/input/coco128/coco128/images/train2017')
    
    # Warmup
    for _ in range(5):
        custom_model(torch.randn(1, 3, 640, 640).cuda().half())
    
    # Define hooks
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output
        return hook

    # Register hooks
    custom_model.net.p1.register_forward_hook(get_activation('p1'))
    custom_model.net.p2.register_forward_hook(get_activation('p2'))
    custom_model.net.p3.register_forward_hook(get_activation('p3'))
    custom_model.net.p4.register_forward_hook(get_activation('p4'))
    custom_model.net.p5.register_forward_hook(get_activation('p5'))
    
    global_step = 0
    for batch in tqdm(batches, desc="Processing images"):
        with torch.no_grad():
            # Forward pass
            custom_model(batch)
            
            # Create heatmaps for each backbone layer output
            for layer_name in ['p1', 'p2', 'p3', 'p4', 'p5']:
                create_heatmap(activation[layer_name], writer, f'Backbone_{layer_name.upper()}', global_step)
        
        global_step += 1
    
    writer.close()

def main():
    with open('/kaggle/working/YOLOv8-pt/utils/args.yaml', errors='ignore') as f:
        params = yaml.safe_load(f)
    
    inference(params)

if __name__ == "__main__":
    main()

In [None]:
%load_ext tensorboard


In [None]:
import argparse
import torch
import yaml
from ultralytics import YOLO
from PIL import Image, ImageDraw
import numpy as np
import torchvision.transforms as transforms

from nets import nn
from utils import util

def load_custom_model(weights_path, num_classes):
    model = nn.yolo_v8_n(num_classes).cuda()
    ckpt = torch.load(weights_path, map_location='cuda')
    model.load_state_dict(ckpt['model'].float().state_dict())
    model  # Use half precision
    model.eval()
    return model

def plot_boxes(image, boxes, color, label):
    draw = ImageDraw.Draw(image)
    for box in boxes:
        draw.rectangle(box, outline=color, width=2)
        draw.text((box[0], box[1]), label, fill=color)
    return image

def inference(params):
    # Load models
    custom_model = load_custom_model('/kaggle/input/yolov8/pytorch/default/1/v8_n(1).pt', len(params['names']))
    yolov8n_model = YOLO('yolov8n.pt')
    # Load image
    image = Image.open('/kaggle/input/testin2/image.png')
    original_size = image.size
    
    # Resize image
    input_size = 640  # Standard input size for YOLOv8
    resize_transform = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor()
    ])
    
    # Resize for custom model
    custom_input = resize_transform(image).unsqueeze(0).cuda()
    
    # Resize for YOLOv8n (it expects a numpy array)
    yolov8n_input = resize_transform(image).permute(1, 2, 0).numpy()
    
    # Inference with custom model
    custom_output = custom_model(custom_input)
    custom_results = util.non_max_suppression(custom_output, 0.25, 0.7)

    # Inference with YOLOv8n
    yolov8n_results = yolov8n_model.predict(custom_input)

    # Print results
    print("Custom Model Results:")
    print(custom_results)
    
    print("\nYOLOv8n Results:")
    print(yolov8n_results[0].boxes)

    # Plot results on original image
    result_image = image.copy()

    # Plot custom model results
    for det in custom_results[0]:
        box = det[:4].detach().cpu().numpy()
        # Rescale box to original image size
        box[0::2] *= original_size[0] / input_size
        box[1::2] *= original_size[1] / input_size
        result_image = plot_boxes(result_image, [box], "red", "Custom")

    # Plot YOLOv8n results
    for box in yolov8n_results[0].boxes.xyxy:
        box = box.cpu().numpy()
        # Rescale box to original image size
        box[0::2] *= original_size[0] / input_size
        box[1::2] *= original_size[1] / input_size
        result_image = plot_boxes(result_image, [box], "blue", "YOLOv8n")

    # Save the result image
    result_image.save('/kaggle/working/result.png')
    print("Result image saved as 'result.png'")

def main():
    parser = argparse.ArgumentParser()

    with open('/kaggle/working/YOLOv8-pt/utils/args.yaml', errors='ignore') as f:
        params = yaml.safe_load(f)
    inference(params)

if __name__ == "__main__":
    main()