In [1]:
from models.common import DetectMultiBackend
from utils.general import non_max_suppression
from utils.torch_utils import smart_inference_mode

import numpy as np
import torch
from typing import List

In [13]:
class Inference:
    def __init__(
        self,
        model_path: str,
        config_path: str,
        device: torch.device,
        conf_thres=0.25,
        iou_thres=0.45,
        max_det=1000,
    ):
        self.model = DetectMultiBackend(model_path, device=device, dnn=False, data=config_path, fp16=False)
        self.device = device
        self.conf_thres = conf_thres
        self.iou_thres = iou_thres
        self.max_det = max_det
    
    @smart_inference_mode()
    def __call__(self, images: List[np.ndarray]):
        x = torch.from_numpy(np.stack(images)).float().to(self.device).permute(0, 3, 1, 2) / 255.
        pred = self.model(x, augment=False, visualize=False)
        pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, None, False, max_det=self.max_det)
        return pred

In [15]:
inference = Inference(
    "best.pt",
    "yolov5/data/lizard.yaml",
    torch.device("cpu"),
)

Fusing layers... 
Model summary: 212 layers, 20852934 parameters, 0 gradients, 47.9 GFLOPs


In [18]:
pres = inference(tiles[:10])
pres

[tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6)),
 tensor([], size=(0, 6))]

In [17]:
tiles[0].shape

(256, 256, 3)