In [11]:
!pip install motmetrics
!pip install deep-sort-realtime
!git clone https://github.com/abewley/sort.git

fatal: destination path 'sort' already exists and is not an empty directory.


In [15]:
import matplotlib
matplotlib.use('Agg')  # non-interactive backend suitable for headless environments

import os
import zipfile
import gdown
import torch
import torchvision
import numpy as np
import pandas as pd
import motmetrics as mm
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from deep_sort_realtime.deepsort_tracker import DeepSort
from tqdm import tqdm
from PIL import Image


# Fix sort.py backend issue
with open('/content/sort/sort.py', 'r') as file:
    code = file.read()

# Replace TkAgg with Agg
fixed_code = code.replace('TkAgg', 'Agg')

with open('/content/sort/sort.py', 'w') as file:
    file.write(fixed_code)

print("✅ Backend issue fixed!")

import sys
sys.path.append('/content/sort')



# ✅ CONFIGURATION
CONFIG = {
    "dataset_url": "https://drive.google.com/uc?id=1yvOwbPks7dFzMX2z4JoUQlwdEfNYQd7-",
    "dataset_zip": "/content/MOT15.zip",
    "dataset_path": "/content/MOT15",
    "tracking": {"iou_threshold": 0.3, "max_age": 30},
    "training": {"epochs": 10, "batch_size": 8, "learning_rate": 0.0001},
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ DOWNLOAD & EXTRACT DATASET
def download_dataset():
    if not os.path.exists(CONFIG["dataset_zip"]):
        print("📥 Downloading MOT15 dataset from Google Drive...")
        gdown.download(CONFIG["dataset_url"], CONFIG["dataset_zip"], quiet=False)
    else:
        print("✅ Dataset already downloaded.")

def extract_dataset():
    if not os.path.exists(CONFIG["dataset_path"]):
        print("📂 Extracting dataset...")
        with zipfile.ZipFile(CONFIG["dataset_zip"], 'r') as zip_ref:
            zip_ref.extractall("/content/")
        print(f"✅ Dataset extracted to {CONFIG['dataset_path']}")
    else:
        print("✅ Dataset already extracted.")

# ✅ DATA AUGMENTATION
def apply_augmentations(image):
    transform = transforms.Compose([
        transforms.Resize((640, 640)),
        transforms.RandomCrop(600),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform(image)

# ✅ MOT15 DATASET CLASS
class MOT15Dataset(Dataset):
    def __init__(self, root_dir, mode="train", transform=None):
        self.root_dir = os.path.join(root_dir, mode)
        self.transform = transform
        self.data = []
        for seq in os.listdir(self.root_dir):
            img_dir = os.path.join(self.root_dir, seq, "img1")
            gt_path = os.path.join(self.root_dir, seq, "gt/gt.txt")
            if os.path.exists(gt_path):
                gt_df = pd.read_csv(gt_path, header=None)
                gt_df.columns = ["frame", "track_id", "x", "y", "w", "h", "conf", "class", "visibility"]
                for img_name in sorted(os.listdir(img_dir)):
                    frame_id = int(img_name.split('.')[0])
                    frame_gt = gt_df[gt_df["frame"] == frame_id]
                    boxes_df = frame_gt[["x", "y", "w", "h"]].copy()
                    boxes_df = pd.DataFrame({
                    'x1': boxes_df['x'],
                    'y1': boxes_df['y'],
                    'x2': boxes_df['x'] + boxes_df['w'],
                    'y2': boxes_df['y'] + boxes_df['h']
                    })
                    boxes = boxes_df[['x1', 'y1', 'x2', 'y2']].values
                    labels = np.ones(len(boxes))
                    self.data.append((os.path.join(img_dir, img_name), boxes, labels))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, boxes, labels = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        target = {"boxes": torch.tensor(boxes, dtype=torch.float32), "labels": torch.tensor(labels, dtype=torch.int64)}
        if self.transform:
            image = self.transform(image)
        return image, target

# ✅ OBJECT DETECTOR CLASS
class ObjectDetector:
    def __init__(self, num_classes=2):
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        self.model.to(device)
        self.model.train()

    def detect_objects(self, images):
        img_tensors = [apply_augmentations(img).to(device) for img in images]
        with torch.no_grad():
            predictions = self.model(img_tensors)
        return predictions

# ✅ ADAPTIVE TRACKER CLASS
class AdaptiveTracker:
    def __init__(self):
        self.deep_sort = DeepSort(max_age=30, n_init=3, max_cosine_distance=0.2)
        self.sort_tracker = Sort()
        self.previous_tracks = {}

    def track_objects(self, detections, frame_num):
        sort_tracked = self.sort_tracker.update(np.array(detections) if detections else np.empty((0, 5)))
        deep_sort_tracked = self.deep_sort.update_tracks(detections, frame_num=frame_num)

        consistent_tracks = []
        for track in deep_sort_tracked:
            track_id = track.track_id
            bbox = track.to_tlbr()
            if track_id in self.previous_tracks:
                prev_bbox = self.previous_tracks[track_id]
                if np.linalg.norm(np.array(bbox[:2]) - np.array(prev_bbox[:2])) < 50:
                    consistent_tracks.append(track)
            else:
                consistent_tracks.append(track)
            self.previous_tracks[track_id] = bbox

        return sort_tracked, consistent_tracks

# ✅ TRAINING FUNCTION
def train_faster_rcnn(model, train_loader, epochs=10, lr=0.0001):
    optimizer = torch.optim.Adam(model.model.parameters(), lr=lr)
    model.model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for images, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images = [img.to(device) for img in images]
            targets = [{"boxes": t["boxes"].to(device), "labels": t["labels"].to(device)} for t in targets]
            optimizer.zero_grad()
            loss_dict = model.model(images, targets)
            loss = sum(loss for loss in loss_dict.values())
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

# ✅ PERFORMANCE EVALUATION FUNCTION
def evaluate_performance(detections, dataset):
    acc = mm.MOTAccumulator(auto_id=True)
    for idx, det in enumerate(detections):
        gt_boxes = dataset[idx][1]["boxes"].numpy()
        gt_ids = np.arange(len(gt_boxes))
        det_boxes = np.array(det["bboxes"])
        det_ids = det["track_id"]
        distances = mm.distances.iou_matrix(gt_boxes, det_boxes, max_iou=0.5)
        acc.update(gt_ids, det_ids, distances)

    mh = mm.metrics.create()
    summary = mh.compute(acc, metrics=['mota', 'motp', 'idf1', 'num_switches'], name='Overall')
    print(summary)

# ✅ MAIN EXECUTION
if __name__ == "__main__":
    download_dataset()
    extract_dataset()

    train_dataset = MOT15Dataset(CONFIG["dataset_path"], mode="train", transform=apply_augmentations)
    test_dataset = MOT15Dataset(CONFIG["dataset_path"], mode="test", transform=apply_augmentations)
    train_loader = DataLoader(train_dataset, batch_size=CONFIG["training"]["batch_size"], shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=CONFIG["training"]["batch_size"], shuffle=False)

    detector = ObjectDetector(num_classes=2)
    train_faster_rcnn(detector, train_loader, epochs=CONFIG["training"]["epochs"], lr=CONFIG["training"]["learning_rate"])

    tracker = AdaptiveTracker()
    all_detections = []
    for frame_num, (images, _) in tqdm(enumerate(test_loader), desc="Evaluating"):
        detections = detector.detect_objects(images)
        _, consistent_tracks = tracker.track_objects(detections, frame_num)
        all_detections.append({"track_id": [t.track_id for t in consistent_tracks],
                               "bboxes": [t.to_tlbr() for t in consistent_tracks]})

    evaluate_performance(all_detections, test_dataset)
    print("✅ Training & Tracking Completed!")

✅ Backend issue fixed!
✅ Dataset already downloaded.
✅ Dataset already extracted.


ValueError: Length mismatch: Expected axis has 10 elements, new values have 9 elements