In [40]:
import torchreid
from ultralytics import YOLO
from ultralytics.trackers.utils.matching import linear_assignment
import cv2

import numpy as np

In [41]:
extractor = torchreid.utils.FeatureExtractor(
    model_name='shufflenet_v2_x2_0',
    device='cuda'
)

Model: shufflenet_v2_x2_0
- params: 5,344,996
- flops: 379,562,752


In [42]:
model = YOLO("yolov9c.pt", task="detection")
model.fuse()

YOLOv9c summary (fused): 384 layers, 25,380,928 parameters, 0 gradients, 102.7 GFLOPs


In [43]:
im1 = cv2.imread("images/c1.jpeg")
# im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2RGB)
im1 = cv2.resize(im1, (1280, 720))

im2 = cv2.imread("images/c2.jpeg")
# im2 = cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)
im2 = cv2.resize(im2, (1280, 720))

anno = model([im1, im2], device=0)

preds1 = anno[0].boxes.xyxy.cpu().numpy()
preds2 = anno[1].boxes.xyxy.cpu().numpy()


0: 384x640 4 bottles, 1 tv, 2 mouses, 1 keyboard, 13.8ms
1: 384x640 4 bottles, 1 tv, 1 mouse, 1 keyboard, 13.8ms
Speed: 1.0ms preprocess, 13.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


In [50]:
%%time
cam1_features = []
cam2_features = []
for pred in preds1:
    x1, y1, x2, y2 = np.intp(pred)
    crop = im1[y1:y2, x1:x2, :]

    feat = extractor(crop)[0].cpu().numpy()
    feat = feat / np.linalg.norm(feat)
    cam1_features.append(feat)

for pred in preds2:
    x1, y1, x2, y2 = np.intp(pred)
    crop = im1[y1:y2, x1:x2, :]

    feat = extractor(crop)[0].cpu().numpy()
    feat = feat / np.linalg.norm(feat)
    cam2_features.append(feat)
    
cam1_features = np.array(cam1_features)
cam2_features = np.array(cam2_features)

CPU times: user 475 ms, sys: 9 μs, total: 475 ms
Wall time: 100 ms


In [45]:
sim_matrix = cam1_features @ cam2_features.T
matched_indices, _, _ = linear_assignment(-sim_matrix, 0.1)

In [46]:
for idx, match in enumerate(matched_indices):
    if sim_matrix[match[0], match[1]] < 0.1:
        continue
    else:
        # Draw bounding boxes
        x1, y1, x2, y2= np.intp(preds1[match[0]])
        cv2.rectangle(im1, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            im1,
            f"{idx}",
            (x1, y1),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (0, 255, 0),
            2,
        )
        x1, y1, x2, y2= np.intp(preds2[match[1]])
        cv2.rectangle(im2, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(im2, f"{idx}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

In [47]:
cv2.imshow("im1", im1)
cv2.imshow("im2", im2)

cv2.waitKey()

cv2.destroyAllWindows()


In [48]:
torchreid.models.show_avai_models()

['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'resnet50_fc512', 'se_resnet50', 'se_resnet50_fc512', 'se_resnet101', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'densenet121_fc512', 'inceptionresnetv2', 'inceptionv4', 'xception', 'resnet50_ibn_a', 'resnet50_ibn_b', 'nasnsetmobile', 'mobilenetv2_x1_0', 'mobilenetv2_x1_4', 'shufflenet', 'squeezenet1_0', 'squeezenet1_0_fc512', 'squeezenet1_1', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'mudeep', 'resnet50mid', 'hacnn', 'pcb_p6', 'pcb_p4', 'mlfn', 'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0', 'osnet_ain_x1_0', 'osnet_ain_x0_75', 'osnet_ain_x0_5', 'osnet_ain_x0_25']
