In [86]:
import cv2
import torch
import torch.nn as nn
import numpy as np
from pathlib import Path
from PIL import Image
from boxmot import BoostTrack, BotSort, StrongSort
from torchvision.models.detection import (
    fasterrcnn_resnet50_fpn_v2,
    FasterRCNN_ResNet50_FPN_V2_Weights as Weights
)
from mc_tracker import sct,mct
from ultralytics import YOLO
from torchvision import transforms
from boxmot.appearance.reid import auto_backend


In [87]:
device = "cuda:0"

In [88]:
class OSNet(nn.Module):
    def __init__(self, model, mode="sc"):
        
        super().__init__()
        self.model=model.cuda()
        self.data_transform=transforms.Compose([
                              transforms.ToPILImage(),
                              transforms.Resize((256, 128)),
                              transforms.ToTensor(),
                              transforms.Normalize(mean=[0.485,
                               0.456, 0.406],std=[0.229, 0.224, 0.225]),
                              ])
    def forward(self,input):
       

        # osnet_emb = torch.cat([self.model(self.data_transform(image).unsqueeze(0).cuda()) for image in input],dim=0).cpu()
        batch = torch.cat([self.data_transform(image).unsqueeze(0).cuda() for image in input],dim=0)
        osnet_emb = self.model(batch.cuda()).cpu()
        return osnet_emb




In [89]:
from qdrant_client import QdrantClient

client = QdrantClient("http://localhost:6333")
client

<qdrant_client.qdrant_client.QdrantClient at 0x1d481abaf50>

In [93]:
client.retrieve(collection_name="stream",ids=range(100))

[Record(id=0, payload={'Pid': '1', 'coords': [648, 478, 714, 596], 'cam_id': 0, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
 Record(id=1, payload={'Pid': '1', 'coords': [648, 478, 714, 596], 'cam_id': 0, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
 Record(id=2, payload={'Pid': '1', 'coords': [648, 478, 714, 596], 'cam_id': 0, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
 Record(id=3, payload={'Pid': '1', 'coords': [648, 478, 714, 596], 'cam_id': 0, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
 Record(id=4, payload={'Pid': '1', 'coords': [648, 478, 714, 596], 'cam_id': 0, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
 Record(id=5, payload={'Pid': '1', 'coords': [648, 478, 714, 596], 'cam_id': 0, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
 Record(id=6, payload={'Pid': '1', 'coords': [648, 478, 714, 596], 'cam_id': 0, 'frame_count': 7}, vector=None, shard_

In [67]:
client.recreate_collection(collection_name="stream",vectors_config={"size":512,"distance":"Cosine"})

  client.recreate_collection(collection_name="stream",vectors_config={"size":512,"distance":"Cosine"})


True

In [51]:
def cosine_distance(a, b, data_is_normalized=False):
    a = np.asarray(a)
    b = np.asarray(b)
    if len(a.shape) == 1 and len(b.shape) == 1:
        if not data_is_normalized:
            a = a / np.linalg.norm(a, axis=0)
            b = b / np.linalg.norm(b, axis=0)
        return 1. - np.dot(a, b)
    else:
        if not data_is_normalized:
            a = a / np.linalg.norm(a, axis=1, keepdims=True)
            b = b / np.linalg.norm(b, axis=1, keepdims=True)
        return 1. - np.dot(a, b.T)


In [8]:
from pathlib import Path
import os
from glob import glob
from qdrant_client.http.models import PointStruct,FieldCondition, MatchValue, Filter
from PIL import Image
# client.create_collection(
#     collection_name="gallery",
#     vectors_config={
#         "size": 512,
#         "distance": "Cosine"
#     }  
# )
reid_weights = Path(r"C:\Users\hthek\Downloads\tracking\osnet_x0_25_msmt17.pt")

osnet = auto_backend.ReidAutoBackend(
                weights=reid_weights, device=device, half=False
            ).model.model.to(device)
reid_model = OSNet(osnet)

path_to_PPL = r"C:\Users\hthek\Downloads\Face Ident Dataset (1)\Face Ident Dataset\PPl"
ppl_paths = glob(path_to_PPL + "/*")
Pid = 0
for path in ppl_paths:
    person_name = Path(path).stem
    for img_path in glob(path + "/*"):
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        emb = reid_model([img]).detach().cpu().numpy()[0]
        points = [
                PointStruct(
                    id = Pid,
                    vector = emb,
                    payload = {"name": person_name,"image" : img_path},
                    
            )
            ]
        client.upsert(
            collection_name="gallery",
            points=points
        )
        Pid+=1


In [26]:
res = client.query_points(collection_name="gallery",query=emb,limit=5, 
                query_filter=Filter(must=[FieldCondition(key="name",match=MatchValue(value="P9"))]))
res.points

[ScoredPoint(id=87, version=176, score=0.9999999, payload={'name': 'P9', 'image': 'C:\\Users\\hthek\\Downloads\\Face Ident Dataset (1)\\Face Ident Dataset\\PPl\\P9\\IMG-20250424-WA0286.jpg'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=86, version=175, score=0.8164107, payload={'name': 'P9', 'image': 'C:\\Users\\hthek\\Downloads\\Face Ident Dataset (1)\\Face Ident Dataset\\PPl\\P9\\IMG-20250424-WA0285.jpg'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=85, version=174, score=0.6452853, payload={'name': 'P9', 'image': 'C:\\Users\\hthek\\Downloads\\Face Ident Dataset (1)\\Face Ident Dataset\\PPl\\P9\\IMG-20250424-WA0284.jpg'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=84, version=173, score=0.585985, payload={'name': 'P9', 'image': 'C:\\Users\\hthek\\Downloads\\Face Ident Dataset (1)\\Face Ident Dataset\\PPl\\P9\\IMG-20250424-WA0283.jpg'}, vector=None, shard_key=None, order_value=None)]

In [69]:
res = client.query_points(collection_name="stream",query=emb,limit=5)
res.points

[ScoredPoint(id=46757, version=2314, score=0.65849066, payload={'Pid': '48', 'coords': [720, 408, 764, 531], 'cam_id': 1, 'frame_count': 115}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=47283, version=2339, score=0.65847707, payload={'Pid': '48', 'coords': [720, 410, 761, 531], 'cam_id': 1, 'frame_count': 116}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=48305, version=2387, score=0.65838283, payload={'Pid': '48', 'coords': [719, 410, 759, 532], 'cam_id': 1, 'frame_count': 118}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=47790, version=2364, score=0.6582293, payload={'Pid': '48', 'coords': [720, 410, 761, 531], 'cam_id': 1, 'frame_count': 117}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=46298, version=2293, score=0.65812886, payload={'Pid': '48', 'coords': [720, 408, 764, 531], 'cam_id': 1, 'frame_count': 114}, vector=None, shard_key=None, order_value=None)]

In [70]:
client.scroll(collection_name="stream",limit=50,scroll_filter=Filter(must=[FieldCondition(key="Pid",match=MatchValue(value='1'))]))

([Record(id=0, payload={'Pid': '1', 'coords': [778, 519, 849, 654], 'cam_id': 1, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
  Record(id=1, payload={'Pid': '1', 'coords': [778, 519, 849, 654], 'cam_id': 1, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
  Record(id=2, payload={'Pid': '1', 'coords': [778, 519, 849, 654], 'cam_id': 1, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
  Record(id=3, payload={'Pid': '1', 'coords': [778, 519, 849, 654], 'cam_id': 1, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
  Record(id=4, payload={'Pid': '1', 'coords': [778, 519, 849, 654], 'cam_id': 1, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
  Record(id=5, payload={'Pid': '1', 'coords': [778, 519, 849, 654], 'cam_id': 1, 'frame_count': 7}, vector=None, shard_key=None, order_value=None),
  Record(id=6, payload={'Pid': '1', 'coords': [778, 519, 849, 654], 'cam_id': 1, 'frame_count': 7}, vector=None,

In [77]:
recs = client.retrieve(collection_name="stream",ids=range(1000),with_vectors=True)

In [85]:
for rec1 in recs:
    for rec2 in recs:
        if rec1.payload['Pid'] == rec2.payload['Pid'] and rec1.payload['cam_id'] != rec2.payload['cam_id']:
            dist = cosine_distance(rec1.vector,rec2.vector)
            if dist < 0.4:
                print(dist)

0.38743483887483965
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.3996905358835875
0.39869373007938824
0.3996905358835875
0.39869373007938824
0.3891546208840384
0.3996905358835875
0.39869373007938824
0.3891546208840384
0.3652182067376337
0.3996905358835875
0.39869373007938824
0.3891546208840384
0.3652182067376337
0.3731618154321632
0.3996905358835875
0.39869373007938824
0.3891546208840384
0.3652182067376337
0.3731618154321632
0.3976842888146831
0.3996905358835875
0.39869373007938824
0.3891546208840384
0.3652182067376337
0.3731618154321632
0.3976842888146831
0.38388819903886295
0.3996905358835875
0.39869373007938824
0.3891546208840384
0.3652182067376337
0.3731618154321632
0.3976842888146831
0.38388819903886295
0.3756193239681328
0.3922233003453901
0.3922233003453901
0.39523345744908167
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.38743483887483965
0.399690

## Tracking

In [8]:
reid_model=OSNet(model=osnet).cuda()
tracker = sct.SingleCameraTracker(cam_id=0,reid_model=reid_model,clust_init_dis_thresh=0.2)

In [9]:
def get_screen_edges(detections, frame_shape, edge_margin=20):
    H,W = frame_shape[:2]
    edge_flages=[]
    for (x1,y1,x2,y2) in detections[:,:4]:
        near_edge=(x1 <= edge_margin or y1 <= edge_margin            
                   or W-x2 <= edge_margin or H-y2 <= edge_margin) 
        edge_flages.append(near_edge)
    return edge_flages


In [None]:
client.create_collection(
    collection_name="stream",
    vectors_config={
        "size": 512,
        "distance": "Cosine"
    }  
)

In [None]:
# Load detector with pretrained weights and preprocessing transforms
from torchvision import transforms
import matplotlib.pyplot as plt
data_transform=transforms.Compose([
                              transforms.ToPILImage(),
                              transforms.Resize((256, 128)),
                              transforms.ToTensor(),
                              transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
                              ])

det_across_frames=[]
def crop_frame(frame, box):
    x1,y1,x2,y2=box.astype(np.int32)

    cropped = frame[y1:y2,x1:x2]
    return cropped

def process_crops(crops, transform):
    transformed_crops = torch.stack([transform(crop) for crop in crops])
    final_crops = transformed_crops.unsqueeze(2).repeat(1,1,8,1,1)
    return final_crops
weights = Weights.DEFAULT


detector = YOLO(r"rtdetr-l.pt")
detector.to(device).eval()
transform = weights.transforms()



# Start video capture
video_path = r'C:\Users\hthek\OneDrive\Desktop\Collected Dataset\Compressed_Vids\Vid_1.mp4'
cap = cv2.VideoCapture(video_path)
count=0
point_id = 1
with torch.inference_mode():
    while True:
        
        success, frame = cap.read()
        if not success:
            break

        # Convert frame to RGB and prepare for detector
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
   

        # Run detection
        if count%1==0:
            output = detector(frame)[0].boxes
            labels = output.cls.cpu().numpy()
            only_people = labels == 0
            scores = output.conf.cpu().numpy()
            scores = scores[only_people]
            keep = scores >= 0.25
            filtered_scores = scores[keep]
            

        
        
        # Prepare detections for tracking
            boxes = output.xyxy.cpu().numpy()[only_people][keep]
        
        # Update tracker and draw results
        #   INPUT:  M X (x, y, x, y, conf, cls)
        #   OUTPUT: M X (x, y, x, y, id, conf, cls, ind)
        H, W, _ = frame.shape
        boundary_coord = [0, 0, W, H]
        screen_edges = get_screen_edges(boxes,frame.shape)
        boxes=boxes.astype(np.int32)
        tracker.process(frame,boxes,screen_edges,boundary_coord)
        for obj in tracker.get_tracked_objects_feat():
            if obj.display:
                x1,y1,x2,y2 = map(int,obj.rect)
                Pid = obj.track_id
                int_id = int(Pid)
                color = (max(10*int_id,255),max(100+int_id,255),min(255-5*int_id,0))
                
                cv2.rectangle(frame,(x1,y1),(x2,y2),color=color,thickness=2)
                cv2.putText(frame,"id: "+Pid,(x1,y1-5),fontFace=cv2.FONT_HERSHEY_COMPLEX,fontScale=0.5,color=color)
                points = []
                for i,feat in enumerate(obj.feats):
                    points.append(
                            PointStruct(
                                id = point_id + i,
                                vector = feat,
                                payload = {"Pid": Pid ,"coords" : [x1,y1,x2,y2]},
                                        )
                                )
                    
                point_id+=len(points)
                print(point_id)
                client.upsert(
                    collection_name="stream",
                    points = points
                                )
                
                    
        
        

        # writer.write(frame)
        # if count>5:
        #     break
        # count+=1
        
    

        
        
        # cv2.imshow('BoXMOT + Torchvision', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        

# Clean up
cap.release()
# cv2.destroyAllWindows()