# Test the Performance of Single Image vs Batch Inference

* https://github.com/ultralytics/ultralytics/issues/1310#issuecomment-1607435659
* https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html#using-profiler-to-analyze-memory-consumption

In [32]:
from ultralytics import YOLO
import time
import torch
from torch.profiler import profile, record_function, ProfilerActivity
from dataclasses import dataclass

model = YOLO('yolov8n-seg.yaml')

In [33]:
torch.cuda.is_available()

True

In [34]:
import numpy as np
for i in [2,4,8,16,32,256]:
    data = [np.random.rand(640, 640, 3).astype(np.float32) for n in range(i)]
    start_time = time.time()
    results = model.predict(data, verbose=False, show=False, stream=False, device='cuda')
    print("--- Batched (%d images) totalling %.2f seconds .. average of %.2f seconds per image---" % (i, time.time() - start_time, (time.time() - start_time)/i))
    data = [np.random.rand(640, 640, 3).astype(np.float32) for n in range(i)]
    start_time = time.time()
    for j in data:
        results = model.predict(j, verbose=False, show=False, stream=False, device='cuda')
    print("vs- Looped  (%d images) totalling %.2f seconds .. average of %.2f seconds per image---" % (i, time.time() - start_time, (time.time() - start_time)/i))
    print("---"*20)

--- Batched (2 images) totalling 0.08 seconds .. average of 0.04 seconds per image---
vs- Looped  (2 images) totalling 0.02 seconds .. average of 0.01 seconds per image---
------------------------------------------------------------
--- Batched (4 images) totalling 0.02 seconds .. average of 0.01 seconds per image---
vs- Looped  (4 images) totalling 0.03 seconds .. average of 0.01 seconds per image---
------------------------------------------------------------
--- Batched (8 images) totalling 0.04 seconds .. average of 0.01 seconds per image---
vs- Looped  (8 images) totalling 0.06 seconds .. average of 0.01 seconds per image---
------------------------------------------------------------
--- Batched (16 images) totalling 0.08 seconds .. average of 0.01 seconds per image---
vs- Looped  (16 images) totalling 0.12 seconds .. average of 0.01 seconds per image---
------------------------------------------------------------
--- Batched (32 images) totalling 0.16 seconds .. average of 0.00 