# Visual Recognition Assignment 3 part d
 


In [31]:
import torch 
import torchvision 
import cv2 
from PIL import Image 
from torchvision import transforms as T 
import numpy as np 
from torchvision import transforms
from torch import device
from torch import cuda
import time 
import shutil
import git
import sys

from sort import *
from deep_sort_realtime.deepsort_tracker import DeepSort 

In [32]:
git.Repo.clone_from('https://github.com/KalyanRam1234/sort.git', '/kaggle/working/sample1')
sys.path.insert(0,'/kaggle/working/sample1/')

#Uncomment like below if running it for 2nd time or more --------------------------------------------

# shutil.rmtree('/kaggle/working/sample1')

In [33]:
!pip install filterpy==1.4.5
!pip install lap==0.4.0



##### We will use torch's own pretrained implementation as training an RCNN takes substantial amount of time. This implementation is made with ResNet 50 backbone and trained on MS Coco. 

In [35]:
device = device('cuda:0' if cuda.is_available() else 'cpu')

#### YOLO v5 
We used YOLOv5x for the purpose of this classification. A pre-trained model was available on pytorch.

In [37]:
%pip install  -qr https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt  # install dependencies

Note: you may need to restart the kernel to use updated packages.


In [38]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5x', pretrained=True).to(device)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2024-3-22 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15102MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x.pt to yolov5x.pt...
100%|██████████| 166M/166M [00:00<00:00, 296MB/s] 

Fusing layers... 
YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients, 205.5 GFLOPs
Adding AutoShape... 


### DeepSort and SORT

In [39]:
!pip install deep-sort-realtime

Collecting deep-sort-realtime
  Using cached deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Using cached deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
Installing collected packages: deep-sort-realtime
Successfully installed deep-sort-realtime-1.3.2


In [42]:
#SORT
object_tracker = Sort(max_age=600, min_hits=2)

#DeepSORT object tracker, uncomment to use ------------------------------------------

# object_tracker = DeepSort(max_iou_distance=0.2,max_age=20,nms_max_overlap = 0.5,gating_only_position=True,n_init=2,max_cosine_distance=0.9)

### Yolo v5 + DeepSort

In [44]:
def TrackCarSort(model, video, car_index, object_tracker, out_path):
    model.eval()
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 15, (width, height))
    
    setIDs = set()
    j = 0
    while video.isOpened():
        ret, frame = video.read()   #frames are read one by one
        if not ret:
            break
            
        with torch.no_grad():
            pred = model(frame)
            pred = pred.xyxy[0]  #take only the first element of the model output, pred, which is used ahead

        detections = []
        
        for detection in pred:
            class_id = int(detection[5])
            confidence = detection[4]
            if confidence > 0.5 and class_id == car_index:
                x_min, y_min, x_max, y_max = detection[:4].cpu().detach().numpy().astype('int')                
                detections.append([x_min, y_min,x_max , y_max])

        
        if len(detections) == 0:
            detections = np.empty((0, 5))
        else:
            detections = np.array(detections)

            tracks = object_tracker.update(detections)
            for track in tracks:
                track_id = track[4]
                setIDs.add(track_id)
                bbox = track
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
                cv2.putText(frame, "ID: " + str(track_id), (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            count = len(setIDs)   #count of the unique set IDs encountered throughout the video
            cv2.putText(frame, f'Count: {int(count)}', (20, 200), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)

            # Write frame to output video
        out.write(frame)
    
    # Release video writer
    out.release()
    

In [46]:
video = cv2.VideoCapture('/kaggle/input/car-detection/Brigade_Road_1.mp4')
TrackCarSort(model,video,2,object_tracker,'/kaggle/working/outputBrigade_YOLO+DeepSORT.mp4')

In [47]:
#Uncomment this to use DeepSORT ----------------------------------------------------------------------------------


#car_id = 2 
# confidence_threshold = 0.9 #0.8
# object_type = ['car']

# cap = cv2.VideoCapture('/kaggle/input/car-detection/Brigade_Road_1.mp4')

# width = int(cap.get(3))
# height = int(cap.get(4))

# out = cv2.VideoWriter('outputBrigade_YOLO+DeepSORT.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 15, (width, height))

# model.eval()

# prev_count = 0
# prev_max_id = 0

# setIDs=set()
# while cap.isOpened():
    
#     ret, frame = cap.read()
#     start = time.perf_counter()
#     if not ret:
#         break
    
    
#     with torch.no_grad(): 
#       pred = model(frame)
        
#     detections = [] 
#     for detection in pred.xyxy[0]:
#         class_id = int(detection[5])
#         confidence = detection[4]
#         if confidence> confidence_threshold and class_id ==car_id:
#             x_min,y_min,x_max,y_max = detection[:4].cpu().detach().numpy().astype('int')
#             x,y,w,h = [x_min,y_min,int(x_max-x_min),int(y_max-y_min)]
            
#             cv2.rectangle(frame,(int(x_min), int(y_min)),(int(x_max), int(y_max)),(0,255,0),2)
#             cv2.putText(frame, "ID: " + str(track_id), (int(x_min-5), int(y_min - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            
            
#             detections.append(([x,y,w,h],confidence,'car'))
            
#     #Tracks for DeepSORT and Annotate 
#     tracks = object_tracker.update_tracks(detections, frame=frame)
#     for track in tracks:
#         if not track.is_confirmed():
#             continue
#         track_id = track.track_id
#         setIDs.add(track_id)
#         ltrb = track.to_ltrb()
#         bbox = ltrb

#     count=len(setIDs)
#     cv2.putText(frame, f'Count: {int(count)}', (20,200), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    
#     out.write(frame)
# out.release()


# print("Count of cars = "+str(count))

