**YOLOV5 IMPORT**

In [9]:
import torch
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip


[31m[1mrequirements:[0m Ultralytics requirements ['gitpython>=3.1.30', 'pillow>=10.3.0', 'requests>=2.32.0'] not found, attempting AutoUpdate...
Collecting gitpython>=3.1.30
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.3/207.3 kB 2.1 MB/s eta 0:00:00
Collecting pillow>=10.3.0
  Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/4.5 MB 32.9 MB/s eta 0:00:00
Collecting requests>=2.32.0
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.9/64.9 kB 145.7 MB/s eta 0:00:00
Collecting gitdb<5,>=4.0.1 (from gitpython>=3.1.30)
  Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.7/62.7 kB 181.9 MB/s eta 0:00:00
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython>=3.1.30)
  Downloading smmap-5.0.1-py3-none-any.whl (24 kB)
Installing collected packages: sm

YOLOv5 🚀 2024-6-23 Python-3.10.12 torch-2.3.0+cu121 CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...

  0%|          | 0.00/14.1M [00:00<?, ?B/s][A
100%|██████████| 14.1M/14.1M [00:00<00:00, 121MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


**DEEPSORT IMPORT**

In [4]:
!git clone https://github.com/granthikhalder/deep_sort

Cloning into 'yolov8_deepsort'...
remote: Enumerating objects: 84, done.[K
remote: Counting objects: 100% (48/48), done.[K
remote: Compressing objects: 100% (43/43), done.[K
remote: Total 84 (delta 4), reused 45 (delta 4), pack-reused 36[K
Receiving objects: 100% (84/84), 51.25 MiB | 30.47 MiB/s, done.
Resolving deltas: 100% (5/5), done.


In [5]:
from deep_sort.deep_sort import DeepSort

deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'
deep_sort = DeepSort(model_path=deep_sort_weights)

**OBJECT CLASS NAMES**

In [6]:
class_names = model.names

**YOLOV5 & DEEPSORT IMPLEMENTATION**

In [None]:
import cv2
import numpy as np
from tqdm import tqdm

input_video_path = 'input.mp4'
output_video_path = 'output_yolo_deepsort.mp4'
cap = cv2.VideoCapture(input_video_path)

# Video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Initialize VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Progress bar
pbar = tqdm(total=total_frames, desc='Processing video')

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLO to detect objects
    results = model(frame)
    detections = results.xywh[0]  # Extract the detections in the format (x_center, y_center, width, height, confidence, class)

    bbox_xywh = []
    confidences = []
    class_ids = []

    for *box, conf, cls in detections:
        x_center, y_center, width, height = box
        bbox_xywh.append([x_center, y_center, width, height])
        confidences.append(conf)
        class_ids.append(int(cls))

    # Convert to numpy arrays
    bbox_xywh = np.array(bbox_xywh)
    confidences = np.array(confidences)
    class_ids = np.array(class_ids)

    # Update the DeepSORT tracker
    outputs = deep_sort.update(bbox_xywh, confidences, frame)

    # Draw the tracking results on the frame
    for output, class_id in zip(outputs, class_ids):
        x1, y1, x2, y2, track_id = output
        class_name = class_names[class_id]
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Write Output video
    out.write(frame)

    # Progress bar
    pbar.update(1)

cap.release()
out.release()
cv2.destroyAllWindows()
pbar.close()

**YOLOV5, DEEPSORT & SEMANTIC SEGMENTATION IMPLEMENTATION**

In [12]:
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torchvision.transforms as transforms
from torchvision.models.segmentation import deeplabv3_resnet101

input_video_path = 'input.mp4'
output_video_path = 'output_yolo_deepsort_semantic.mp4'
cap = cv2.VideoCapture(input_video_path)

# Video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Initialize VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Progress bar
pbar = tqdm(total=total_frames, desc='Processing video')

# Load DeepLab model
segmentation_model = deeplabv3_resnet101(pretrained=True)
segmentation_model.eval()

# Define transformations for DeepLab
transform_dl = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((height, width)),
    transforms.ToTensor(),
])

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Semantic segmentation to get masks
    input_tensor = transform_dl(frame).unsqueeze(0)
    with torch.no_grad():
        output = segmentation_model(input_tensor)['out'][0]
    masks = torch.argmax(output, dim=0).cpu().numpy()

    # YOLO to detect objects
    results = model(frame)
    detections = results.xywh[0]  # Extract the detections in the format (x_center, y_center, width, height, confidence, class)

    bbox_xywh = []
    confidences = []
    class_ids = []

    for *box, conf, cls in detections:
        x_center, y_center, width, height = box
        bbox_xywh.append([x_center, y_center, width, height])
        confidences.append(conf)
        class_ids.append(int(cls))

    # Convert to numpy arrays
    bbox_xywh = np.array(bbox_xywh)
    confidences = np.array(confidences)
    class_ids = np.array(class_ids)

    # Update the DeepSORT tracker
    outputs = deep_sort.update(bbox_xywh, confidences, frame)

    # Draw the tracking results on the frame
    for output, class_id in zip(outputs, class_ids):
        x1, y1, x2, y2, track_id = output
        class_name = class_names[class_id]
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Apply semantic segmentation masks to the frame
    for i in range(1, masks.max() + 1):  # Start from 1 to skip background
        mask = (masks == i).astype(np.uint8) * 255
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cv2.drawContours(frame, contours, -1, (0, 0, 255), 2)  # Draw contours of each mask

    # Write Output video
    out.write(frame)

    # Progress bar
    pbar.update(1)

cap.release()
out.release()
cv2.destroyAllWindows()
pbar.close()


Processing video:  13%|█▎        | 47/350 [00:32<03:31,  1.43it/s]
Downloading: "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth" to /root/.cache/torch/hub/checkpoints/deeplabv3_resnet101_coco-586e9e4e.pth
100%|██████████| 233M/233M [00:02<00:00, 107MB/s]

Processing video:   0%|          | 1/350 [00:21<2:06:09, 21.69s/it][A
Processing video:   1%|          | 2/350 [00:38<1:47:43, 18.57s/it][A
Processing video:   1%|          | 3/350 [01:00<1:58:31, 20.50s/it][A
Processing video:   1%|          | 4/350 [01:18<1:51:02, 19.26s/it][A
Processing video:   1%|▏         | 5/350 [01:35<1:46:25, 18.51s/it][A
Processing video:   2%|▏         | 6/350 [01:53<1:45:03, 18.32s/it][A
Processing video:   2%|▏         | 7/350 [02:09<1:41:23, 17.74s/it][A
Processing video:   2%|▏         | 8/350 [02:26<1:39:12, 17.40s/it][A
Processing video:   3%|▎         | 9/350 [02:44<1:39:08, 17.45s/it][A
Processing video:   3%|▎         | 10/350 [03:02<1:39:57, 17.64s/it][A
Proce