In [None]:
%matplotlib inline

import cv2
import time
import numpy as np
from types import SimpleNamespace

import ultralytics
ultralytics.checks()
from ultralytics import YOLO
from ultralytics.trackers.bot_sort import BOTSORT
from ultralytics.utils.plotting import Annotator
from ultralytics.engine.results import Boxes

import torch
from torch.nn.functional import normalize
import torch.version
import torchvision
print("Torch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)
print(torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)

#from PIL import Image
import depth_pro

# Load depth model and preprocessing transform
depth_model, transform = depth_pro.create_model_and_transforms(device="cuda",precision=torch.half)
depth_model.eval()

from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt

In [None]:
from no_hungarian2 import ObjectTracker

class Detection:
    def __init__(self,detection):
        pass

class Group:
    def __init__(self,
                 occlusion_distance:float=0.5,
                 remove_distance:float=5.0,
                 velocity_threshold:float=2.0,
                 max_occlusion_frames:int=10
                 ):
        """
        occlusion_distance: max distance to consider an occlusion
        remove_distance: max distance to keep an object before removing
        velocity_threshold: max diff from group vel to join as new member
        max_occlusion_frames: frames to keep an occluded object before removal
        """
        self.occlusion_distance = occlusion_distance
        self.remove_distance = remove_distance
        self.velocity_threshold = velocity_threshold
        self.max_occlusion_frames = max_occlusion_frames

        self.next_id = 0
        # Objects: id -> info dict
        self.objects = {}
        # Groups: cluster_label -> set of ids
        self.groups = defaultdict(set)
    
    def _assign_id(self):
        _id = self.next_id
        self.next_id += 1
        return _id

    def update(self,detections):
        pass

In [None]:
def depth_pro_inference(r,f35:int=26):
    """
    Depth pro inference step.
    """
    # Load and preprocess given frame.
    d_img = transform(r.orig_img) # Transform and transfer given frame to gpu to be ready for depth inference.
    f_px = depth_pro.utils.fpx_from_f35(d_img.shape[2],d_img.shape[1],f35) # Converts focal length from camera to fpx.

    # Run depth inference on given frame.
    prediction = depth_model.infer(d_img,f_px)
    depth = prediction["depth"]  # Depth in [m].
    focallength_px = prediction["focallength_px"]  # Focal length in pixels.

    return depth,focallength_px

def BoTSORT_tracker_update(r,tracker):
    """
    BoTSORT tracker update.
    """
    # Velocity extraction from BoTSORT Kalman filter
    _ = tracker.update(r.boxes.cpu(),r.orig_img)
    # Extract Kalman states
    kalman_states = {
        track.track_id: track.mean.copy()
        for track in tracker.tracked_stracks
        if track.is_activated
    }

    return kalman_states

def speed_estimation(r,speedestimator):
    """
    Speed estimation using yolo solutions.
    """
    # Speed estimation
    speedest = speedestimator(r.orig_img)
    speeds = speedest.speed_dict ;print("Speed:",speeds)
    angles = speedest.angle_dict ;print("Angle:",angles)

    return speeds,angles

def theloop(
        r,
        d,
        f,
        tracker,
        grouptracker,
        save_img:bool=False,
        run:int=6
        ):
    """
    The loop.
    """
    # Extract Kalman means
    kalman_states = BoTSORT_tracker_update(r,tracker) ;print("Detection Kalman states:",kalman_states)
    x = torch.tensor([list(x)[0:2] for x in kalman_states.values()])[:,0] ;print("I am x:",x)
    y = torch.tensor([list(x)[0:2] for x in kalman_states.values()])[:,1] ;print("I am y:",y)
    u = torch.tensor([list(x)[4:6] for x in kalman_states.values()])[:,0] ;print("I am u:",u)
    v = torch.tensor([list(x)[4:6] for x in kalman_states.values()])[:,1] ;print("I am v:",v)
    
    # Parallelised centroid extraction and depth assignment
    d = d[y.int(),x.int()].cpu() ;print("\nDepth at each centroid:", d)
    f = f ;print("Focal length (px):", f)
    xr = x * d/f ;print("Metric x coordinates (m):",xr)
    yr = y * d/f ;print("Metric y coordinates (m):",yr)
    lumps = torch.vstack((xr,yr,d)) ;print("Bounding box x,y,z coordinates (m):",lumps)
    
    # Clustering 3D points
    xyz_clustering = DBSCAN(eps=2, min_samples=2).fit(lumps.T.cpu().numpy())
    xyz_labs = xyz_clustering.labels_ ;print("\nClustering labels:",xyz_labs)

    # For each spatial cluster, further cluster by velocity.
    vlumps = torch.vstack((u,v,torch.tensor(xyz_labs))) ;print("Vlumps:",vlumps)
    cids  = vlumps.T[:,-1]
    vids = torch.full_like(cids,-1).long()
    
    for cid in cids.unique():
        # Ignore noise outputs from previous clustering
        if cid == -1:
            continue
        # Create mask for processing and reconstruction of velocity cluster labels
        mask = cids == cid
        vels = normalize(vlumps.T[mask,:-1]).tolist() ;print("Normalised velocities:",vels)
        # Velocity Clustering
        vclustering = DBSCAN(eps=0.3, min_samples=2).fit(vels) ;print("Velocity clustering labels:",vclustering.labels_)
        vlabs = [( -1 if i==-1 else int(i+(cid*100)) ) for i in vclustering.labels_] # Distinguish spatial groups
        vids[mask] = torch.tensor(vlabs)
    uvxyz_labs = vids.tolist() ;print("uvxyz_labs:",uvxyz_labs)

    # Initiate annotator
    annotator = Annotator(r.orig_img)

    # Extracting the pixel values from the kalman state
    kal_vals = torch.tensor(list(kalman_states.values()))
    group_boxes = []

    # Produce bounding boxes for each group
    for vid in vids.unique():
        # Ignore noise outputs from previous clustering
        if vid == -1:
            continue
        # Create mask for processing and reconstruction of velocity cluster labels
        mask = vids == vid
        values = kal_vals[mask,:4]
        X1,Y1,X2,Y2 = (min(values[:,0]-values[:,2]/2),min(values[:,1]-values[:,3]/2),
                       max(values[:,0]+values[:,2]/2),max(values[:,1]+values[:,3]/2))
        # Annotate frame with group boxes
        annotator.box_label((X1,Y1,X2,Y2),f"Group: {vid}, Population: {values.dim()}",(0,180,255))
        # Make boxes for passing to ultralytics Boxes call
        raw_box = [min(values[:,0]-values[:,2]/2),
                   min(values[:,1]-values[:,3]/2),
                   max(values[:,0]+values[:,2]/2),
                   max(values[:,1]+values[:,3]/2),
                   1.,0] ;print("\nRaw box:",raw_box)
        group_boxes.append(raw_box)
    
    # Cook boxes
    cooked_boxes = (torch.tensor(group_boxes)
                   if group_boxes
                   else None)
    
    # Add group bounding boxes to YOLO call and output
    #if group_boxes: r.update(torch.cat([r.boxes.data,cooked_boxes.cuda()]))
    
    # Call ultralytics Boxes function to make them into an ultralytics Boxes object
    Group_Boxes = (Boxes(cooked_boxes.cpu().numpy(),r.orig_shape)
                   if group_boxes
                   else None)

    # Update group tracker
    #if group_boxes: grouptracker.update(Group_Boxes,r.orig_img)
    if group_boxes:
        # Velocity extraction from BoTSORT Kalman filter
        _ = grouptracker.update(Group_Boxes,r.orig_img)
        # Extract Kalman states
        group_states = {
            track.track_id: track.mean.copy()
            for track in grouptracker.tracked_stracks
            if track.is_activated
        } ;print(group_states)
        # TODO

    # Save annotated frames
    annotated_frame = annotator.result()
    annotator.save(f"runs/boxes{run}/annotated_frame_{frame}.png")

    # Save the image
    if save_img==True:
        # Saving the images of centroids and group ownership
        plt.imshow(r.orig_img)
        scatter = plt.scatter(x,y,c=uvxyz_labs,label=uvxyz_labs)
        plt.quiver(x,y,u,v,angles = "xy")
        plt.legend(*scatter.legend_elements(),title="Classes")
        plt.tight_layout
        plt.savefig(f"runs/dots{run}/frame_{frame}",dpi=100)
        plt.clf()

    # Prepare groups for tracking group momentum
    #pids = r.boxes.id.tolist() ;print("Person IDs:",pids)
    #pids = [x for x in kalman_states.keys()] ;print("Person IDs:",pids)
    #gmembs = {k: v for k, v in zip(pids, labs)} ;print("Group membership dictionary:",gmembs)
    #coords = {k: v for k, v in zip(pids, groups.T[:,:3].tolist())} ;print("XYZ coordinate dictionary:",coords)
    #velos = {k: v for k, v in zip(pids, lumps.T[:,3:].tolist())} ;print("XY velocity dictionary:",velos)
    #out_dicts = [pids,gmembs,coords,velos]
    
    # Testing out dictionaries
    clumps = torch.vstack((xr,yr,d,u,v,torch.tensor(uvxyz_labs))) ;print("The clumps:",clumps)
    out_dicts = [{k:v for k,v in zip(["x","y","z","vx","vy","cluster"],i)} for i in clumps.T.tolist()] ;print(out_dicts)

    return out_dicts

## Parallel grouping & depth & distance & velocity

In [None]:
# Load YOLO11 detector
model = YOLO("yolo11x.pt")

# Create tracker
args = SimpleNamespace(
    track_buffer=30,
    track_high_thresh=0.25,
    track_low_thresh=0.1,
    match_thresh=0.8,
    new_track_thresh=0.25,
    fuse_score=True,
    # BoT-SORT settings
    gmc_method="sparseOptFlow", # method of global motion compensation
    # ReID model related thresh (not supported yet)
    proximity_thresh=0.5,
    appearance_thresh=0.25,
    with_reid=False
)
tracker = BOTSORT(args, frame_rate=30)
grouptracker = BOTSORT(args, frame_rate=30)


# Begin online tracking
results = model.predict(source="Homebrew-video/Low-quality/IMG_5354.MP4",
                      stream=True,classes=[0],half=True,imgsz=1280,save=False)
frame=1

#group_class = ObjectTracker()

for j,r in enumerate(results):
    # Move results onto gpu
    r = r.cuda()

    # testing
    # print(r.boxes)
    # custom_data = torch.tensor([
    #     [ 50.0,  30.0, 200.0, 180.0, 0, 0.85, 0],
    #     [120.0, 100.0, 300.0, 260.0, 1, 0.92, 0]
    # ]) ;print(custom_data.shape)
    # custom_boxes = Boxes(custom_data,r.orig_shape) ;print(custom_boxes)
    # r.update(boxes=custom_data)
    # print(r.boxes)
    # if j==1: break
    # continue

    # Run depth inference
    d,f = depth_pro_inference(r)

    # Run grouping
    out_dicts = theloop(r,d,f,tracker,grouptracker,save_img=False,run=8)
    #group_class.update(out_dicts)
    #print(group_class.get_tracked_objects())
    

    # Testing break
    if j==5: break

    frame += 1

    print("\nEND OF LOOP\n")