In [1]:
!pip install motmetrics
!pip install deep-sort-realtime
!git clone https://github.com/abewley/sort.git
!pip install filterpy


Collecting motmetrics
  Downloading motmetrics-1.4.0-py3-none-any.whl.metadata (20 kB)
Collecting xmltodict>=0.12.0 (from motmetrics)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading motmetrics-1.4.0-py3-none-any.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.5/161.5 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict, motmetrics
Successfully installed motmetrics-1.4.0 xmltodict-0.14.2
Collecting deep-sort-realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-sort-realtime
Successfully installed deep-sort-realtime-1.3.2
Cloning into 'sort'...
remote: Enumerating objects: 208, 

<h3>Imports</h3>

In [2]:
import matplotlib
matplotlib.use('Agg')  # non-interactive backend suitable for headless environments

import os
import zipfile
import gdown
import torch
import torchvision
import numpy as np
import pandas as pd
import motmetrics as mm
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from deep_sort_realtime.deepsort_tracker import DeepSort
from tqdm import tqdm
from PIL import Image
import sys

# Fix sort.py backend issue
with open('/content/sort/sort.py', 'r') as file:
    code = file.read()

# Replace TkAgg with Agg
fixed_code = code.replace('TkAgg', 'Agg')

with open('/content/sort/sort.py', 'w') as file:
    file.write(fixed_code)

print("Backend issue fixed!")

sys.path.append('/content/sort')


Backend issue fixed!


<h3>Configuration & Device Setup</h3>

In [3]:
# CONFIGURATION
CONFIG = {
    "dataset_url": "https://drive.google.com/uc?id=1yvOwbPks7dFzMX2z4JoUQlwdEfNYQd7-",
    "dataset_zip": "/content/MOT15.zip",
    "dataset_path": "/content/MOT15",
    "tracking": {"iou_threshold": 0.3, "max_age": 30},
    "training": {"epochs": 5, "batch_size": 8, "learning_rate": 0.0001},
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


<h3>Dataset Download & Extraction Functions</h3>

In [4]:
# DOWNLOAD & EXTRACT DATASET
def download_dataset():
    if not os.path.exists(CONFIG["dataset_zip"]):
        print("Downloading MOT15 dataset from Google Drive...")
        gdown.download(CONFIG["dataset_url"], CONFIG["dataset_zip"], quiet=False)
    else:
        print("Dataset already downloaded.")

def extract_dataset():
    if not os.path.exists(CONFIG["dataset_path"]):
        print("Extracting dataset...")
        with zipfile.ZipFile(CONFIG["dataset_zip"], 'r') as zip_ref:
            zip_ref.extractall("/content/")
        print(f"Dataset extracted to {CONFIG['dataset_path']}")
    else:
        print("Dataset already extracted.")


<h3>Data Augmentation Function</h3>

In [5]:
# DATA AUGMENTATION
def apply_augmentations(image):
    transform = transforms.Compose([
        transforms.Resize((640, 640)),
        transforms.RandomCrop(600),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform(image)


<h3>MOT15Dataset Class</h3>

In [6]:
# MOT15 DATASET CLASS
class MOT15Dataset(Dataset):
    def __init__(self, root_dir, mode="train", transform=None):
        self.root_dir = os.path.join(root_dir, mode)
        self.transform = transform
        self.data = []
        for seq in os.listdir(self.root_dir):
            img_dir = os.path.join(self.root_dir, seq, "img1")
            gt_path = os.path.join(self.root_dir, seq, "gt/gt.txt")
            if os.path.exists(gt_path):
                gt_df = pd.read_csv(gt_path, header=None)
                gt_df.columns = ["frame", "track_id", "x", "y", "w", "h", "conf", "class", "visibility", "unused"]
                for img_name in sorted(os.listdir(img_dir)):
                    frame_id = int(img_name.split('.')[0])
                    frame_gt = gt_df[gt_df["frame"] == frame_id]
                    boxes_df = frame_gt[["x", "y", "w", "h"]].copy()
                    boxes_df = pd.DataFrame({
                        'x1': boxes_df['x'],
                        'y1': boxes_df['y'],
                        'x2': boxes_df['x'] + boxes_df['w'],
                        'y2': boxes_df['y'] + boxes_df['h']
                    })
                    boxes = boxes_df[['x1', 'y1', 'x2', 'y2']].values
                    labels = np.ones(len(boxes))
                    self.data.append((os.path.join(img_dir, img_name), boxes, labels))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, boxes, labels = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        target = {"boxes": torch.tensor(boxes, dtype=torch.float32), "labels": torch.tensor(labels, dtype=torch.int64)}
        if self.transform:
            image = self.transform(image)
        return image, target


<h3>Object Detector Class</h3>

In [7]:
# OBJECT DETECTOR CLASS
class ObjectDetector:
    def __init__(self, num_classes=2):
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        self.model.to(device)
        self.model.train()

    def detect_objects(self, images):
        img_tensors = [apply_augmentations(img).to(device) for img in images]
        with torch.no_grad():
            predictions = self.model(img_tensors)
        return predictions


<h3>Adaptive Tracker Class</h3>

In [8]:
# ADAPTIVE TRACKER CLASS
# Make sure that Sort is imported from your fixed sort.py file.
from sort import Sort

class AdaptiveTracker:
    def __init__(self):
        self.deep_sort = DeepSort(max_age=30, n_init=3, max_cosine_distance=0.2)
        self.sort_tracker = Sort()
        self.previous_tracks = {}

    def track_objects(self, detections, frame_num):
        sort_tracked = self.sort_tracker.update(np.array(detections) if detections else np.empty((0, 5)))
        deep_sort_tracked = self.deep_sort.update_tracks(detections, frame_num=frame_num)

        consistent_tracks = []
        for track in deep_sort_tracked:
            track_id = track.track_id
            bbox = track.to_tlbr()
            if track_id in self.previous_tracks:
                prev_bbox = self.previous_tracks[track_id]
                if np.linalg.norm(np.array(bbox[:2]) - np.array(prev_bbox[:2])) < 50:
                    consistent_tracks.append(track)
            else:
                consistent_tracks.append(track)
            self.previous_tracks[track_id] = bbox

        return sort_tracked, consistent_tracks


<h3>Training & Evaluation Functions</h3>

In [9]:
# TRAINING FUNCTION
def train_faster_rcnn(model, train_loader, epochs=10, lr=0.0001):
    optimizer = torch.optim.Adam(model.model.parameters(), lr=lr)
    model.model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for images, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images = [img.to(device) for img in images]
            targets = [{"boxes": t["boxes"].to(device), "labels": t["labels"].to(device)} for t in targets]
            optimizer.zero_grad()
            loss_dict = model.model(images, targets)
            loss = sum(loss for loss in loss_dict.values())
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

# PERFORMANCE EVALUATION FUNCTION
def evaluate_performance(detections, dataset):
    acc = mm.MOTAccumulator(auto_id=True)
    for idx, det in enumerate(detections):
        print(f"Frame {idx}: Detected track IDs: {det['track_id']}, Bounding boxes: {det['bboxes']}")
        print("Detected track IDs:", det["track_id"])
        print("Detected bboxes:", det["bboxes"])
        gt_boxes = dataset[idx][1]["boxes"].numpy()
        gt_ids = np.arange(len(gt_boxes))
        det_boxes = np.array(det["bboxes"])
        det_ids = det["track_id"]
        distances = mm.distances.iou_matrix(gt_boxes, det_boxes, max_iou=0.3)
        acc.update(gt_ids, det_ids, distances)

    mh = mm.metrics.create()
    summary = mh.compute(acc, metrics=['mota', 'motp', 'idf1', 'num_switches'], name='Overall')
    print(summary)

def collate_fn(batch):
    return tuple(zip(*batch))


<h3>Main Execution</h3>

In [10]:
!nvidia-smi

Mon Mar 10 05:26:13 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   47C    P8             12W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# ------------------------------
# Training Cell
# ------------------------------

# Download and extract dataset
download_dataset()
extract_dataset()

# Create datasets and dataloaders (using training augmentation)
train_dataset = MOT15Dataset(CONFIG["dataset_path"], mode="train", transform=apply_augmentations)
test_dataset = MOT15Dataset(CONFIG["dataset_path"], mode="test", transform=apply_augmentations)
train_loader = DataLoader(train_dataset, batch_size=CONFIG["training"]["batch_size"],
                          shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=CONFIG["training"]["batch_size"],
                         shuffle=False, collate_fn=collate_fn)

# Initialize and train detector
detector = ObjectDetector(num_classes=2)
train_faster_rcnn(detector, train_loader, epochs=CONFIG["training"]["epochs"],
                  lr=CONFIG["training"]["learning_rate"])

# Save the trained model checkpoint
torch.save(detector.model.state_dict(), "/content/fasterrcnn_checkpoint.pth")
print("Model checkpoint saved!")

# (Optional) Run detection on the test set and save detections for later evaluation
tracker = AdaptiveTracker()
all_detections = []
for frame_num, (images, _) in tqdm(enumerate(test_loader), desc="Running Detection"):
    detections = detector.detect_objects(images)
    _, consistent_tracks = tracker.track_objects(detections, frame_num)
    all_detections.append({
        "track_id": [t.track_id for t in consistent_tracks],
        "bboxes": [t.to_tlbr() for t in consistent_tracks]
    })

# Save the detections so that you don't have to run detection again
import pickle
with open("/content/all_detections.pkl", "wb") as f:
    pickle.dump(all_detections, f)
print("Detections saved!")


Downloading MOT15 dataset from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=1yvOwbPks7dFzMX2z4JoUQlwdEfNYQd7-
From (redirected): https://drive.google.com/uc?id=1yvOwbPks7dFzMX2z4JoUQlwdEfNYQd7-&confirm=t&uuid=ac148551-656c-4648-bec8-6a426ac01570
To: /content/MOT15.zip
100%|██████████| 1.31G/1.31G [00:13<00:00, 98.9MB/s]


Extracting dataset...
Dataset extracted to /content/MOT15


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 85.5MB/s]
Epoch 1/5: 100%|██████████| 688/688 [26:54<00:00,  2.35s/it]


Epoch 1, Loss: 4227.5952


Epoch 2/5: 100%|██████████| 688/688 [26:37<00:00,  2.32s/it]


Epoch 2, Loss: 2987.5696


Epoch 3/5: 100%|██████████| 688/688 [26:27<00:00,  2.31s/it]


Epoch 3, Loss: 2698.8139


Epoch 4/5:  40%|████      | 276/688 [10:40<16:17,  2.37s/it]

In [None]:
# ------------------------------
# Evaluation Cell
# ------------------------------

# Load the trained model checkpoint for evaluation
detector = ObjectDetector(num_classes=2)
detector.model.load_state_dict(torch.load("/content/fasterrcnn_checkpoint.pth"))
detector.model.eval()  # Set to evaluation mode
print("Model loaded for evaluation!")

# Load saved detections (or you can run detection again if needed)
import pickle
with open("/content/all_detections.pkl", "rb") as f:
    all_detections = pickle.load(f)
print("Detections loaded!")

# (Optional) For evaluation, you might want to use a deterministic transform instead of training augmentations.
# For example, you could define and use a simpler transform like:
# def evaluation_transform(image):
#     transform = transforms.Compose([
#         transforms.Resize((640, 640)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     ])
#     return transform(image)
# And then recreate the test_dataset with this transform if needed.

# Re-create the test dataset (using the same transform as during training here)
test_dataset = MOT15Dataset(CONFIG["dataset_path"], mode="test", transform=apply_augmentations)

# Evaluate the performance
evaluate_performance(all_detections, test_dataset)
print("Evaluation completed!")


In [12]:
if __name__ == "__main__":
    # Download and extract dataset
    download_dataset()
    extract_dataset()

    # Create dataset and dataloaders
    train_dataset = MOT15Dataset(CONFIG["dataset_path"], mode="train", transform=apply_augmentations)
    test_dataset = MOT15Dataset(CONFIG["dataset_path"], mode="test", transform=apply_augmentations)
    train_loader = DataLoader(train_dataset, batch_size=CONFIG["training"]["batch_size"], shuffle=True, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=CONFIG["training"]["batch_size"], shuffle=False, collate_fn=collate_fn)

    # Initialize detector and tracker
    detector = ObjectDetector(num_classes=2)
    train_faster_rcnn(detector, train_loader, epochs=CONFIG["training"]["epochs"], lr=CONFIG["training"]["learning_rate"])

    tracker = AdaptiveTracker()
    all_detections = []
    for frame_num, (images, _) in tqdm(enumerate(test_loader), desc="Evaluating"):
        detections = detector.detect_objects(images)
        _, consistent_tracks = tracker.track_objects(detections, frame_num)
        all_detections.append({
            "track_id": [t.track_id for t in consistent_tracks],
            "bboxes": [t.to_tlbr() for t in consistent_tracks]
        })

    evaluate_performance(all_detections, test_dataset)
    print("Training & Tracking Completed!")


Dataset already downloaded.
Dataset already extracted.


Epoch 1/5: 100%|██████████| 688/688 [23:52<00:00,  2.08s/it]


Epoch 1, Loss: 4105.1125


Epoch 2/5: 100%|██████████| 688/688 [23:33<00:00,  2.06s/it]


Epoch 2, Loss: 2903.7199


Epoch 3/5: 100%|██████████| 688/688 [23:31<00:00,  2.05s/it]


Epoch 3, Loss: 2669.8037


Epoch 4/5: 100%|██████████| 688/688 [23:37<00:00,  2.06s/it]


Epoch 4, Loss: 2458.3566


Epoch 5/5: 100%|██████████| 688/688 [23:30<00:00,  2.05s/it]
  self.model.load_state_dict(torch.load(model_wts_path))


Epoch 5, Loss: 2339.7972


Evaluating: 0it [00:00, ?it/s]

         mota  motp  idf1  num_switches
Overall   NaN   NaN   NaN             0
Training & Tracking Completed!



