# Modular Object Detection + Tracking (Accuracy‑First) — Colab Notebook

This Colab builds a **plug‑and‑play video detection & multi‑object tracking** pipeline with an accuracy‑first default stack:

- **Detector:** YOLOv8x (Ultralytics)
- **Tracker:** StrongSORT++ / BoT‑SORT via BoxMOT (with camera‑motion compensation)
- **CMC:** Homography‑based (helps when the camera moves and objects are static)

You can swap detectors/trackers/ReID without touching the rest of the code.


In [4]:
!pip install torch

Collecting torch
  Downloading torch-2.9.1-cp312-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting filelock (from torch)
  Using cached filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting typing-extensions>=4.10.0 (from torch)
  Using cached typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Collecting setuptools (from torch)
  Using cached setuptools-80.9.0-py3-none-any.whl.metadata (6.6 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx>=2.5.1 (from torch)
  Using cached networkx-3.6.1-py3-none-any.whl.metadata (6.8 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec>=0.8.5 (from torch)
  Using cached fsspec-2025.12.0-py3-none-any.whl.metadata (10 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Collecting MarkupSafe>=2.0 (from jinja2->torch)
  Downlo

In [5]:

import torch, platform
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}  |  device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
print(platform.platform())


PyTorch: 2.9.1
CUDA available: False  |  device: CPU
macOS-15.7.2-arm64-arm-64bit-Mach-O


In [7]:

!pip -q install ultralytics boxmot opencv-python onnxruntime
# torch/torchvision come with Colab; if not, uncomment:
!pip -q install torch torchvision --index-url https://download.pytorch.org/whl/cu121


In [8]:

import os, json
from pathlib import Path

root = Path('project')
(root).mkdir(exist_ok=True)
for d in [
    root/'core',
    root/'models',
    root/'models'/'detectors',
    root/'models'/'trackers',
    root/'models'/'reid',
    root/'motion',
    root/'utils'
]:
    d.mkdir(parents=True, exist_ok=True)

(Path(root/'README.md')).write_text('Colab project auto-generated.')

(Path(root/'app.py')).write_text('''
import argparse, time
from core.registry import build_detector, build_tracker, build_cmc, build_reid
from core.video_io import VideoReader, VideoWriter
from utils.metrics import TrackCSVWriter

def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument('--video', required=True)
    p.add_argument('--output', default='output.mp4')
    p.add_argument('--save-csv', default='tracks.csv')
    p.add_argument('--detector', default='yolov8', choices=['yolov8','rtdetr','dummy'])
    p.add_argument('--detector-weights', default='yolov8x.pt')
    p.add_argument('--tracker', default='strongsort', choices=['strongsort','botsort','deocsort','simplesort'])
    p.add_argument('--reid', default='osnet', choices=['osnet','none'])
    p.add_argument('--use-cmc', action='store_true')
    p.add_argument('--conf', type=float, default=0.25)
    p.add_argument('--iou', type=float, default=0.5)
    p.add_argument('--class-filter', nargs='*', default=None)
    return p.parse_args()


def main():
    args = parse_args()
    detector = build_detector(args.detector, args.detector_weights, args.conf, args.iou, args.class_filter)
    reid = build_reid(args.reid)
    tracker = build_tracker(args.tracker, reid=reid, match_iou=args.iou)
    cmc = build_cmc(args.use_cmc)

    vr = VideoReader(args.video)
    vw = VideoWriter(args.output, vr.fps, vr.width, vr.height)
    csvw = TrackCSVWriter(args.save_csv)

    prev = None
    t0 = time.time()
    for i, frame in enumerate(vr):
        H = cmc.estimate(prev, frame) if prev is not None else None
        tracker.set_cmc(H)
        dets = detector.detect(frame)
        tracks = tracker.update(dets, frame)
        anno = tracker.draw(frame.copy(), tracks)
        vw.write(anno)
        csvw.write(i, tracks)
        prev = frame

    vw.close(); csvw.close(); vr.close()
    print(f'Done in {time.time()-t0:.2f}s -> {args.output} / {args.save_csv}')

if __name__ == '__main__':
    main()
''')

(Path(root/'core'/'registry.py')).write_text('''
from models.detectors.base import DummyDetector
from models.detectors.yolov8 import YOLOv8Detector
from models.detectors.rtdetr import RTDETRDetector
from models.trackers.boxmot_wrappers import BoxMOTTracker
from models.trackers.simple_sort import SimpleSORTTracker
from models.reid.base import DummyReID
from models.reid.osnet_boxmot import OSNetBoxMOT
from motion.cmc import HomographyCMC, NoCMC

def build_detector(name='yolov8', weights='yolov8x.pt', conf=0.25, iou=0.5, class_filter=None):
    if name == 'yolov8':
        try:
            return YOLOv8Detector(weights, conf, iou, class_filter)
        except Exception as e:
            print('[WARN] YOLOv8 unavailable:', e); return DummyDetector()
    if name == 'rtdetr':
        try:
            return RTDETRDetector(weights, conf, iou, class_filter)
        except Exception as e:
            print('[WARN] RT-DETR unavailable:', e); return DummyDetector()
    return DummyDetector()

def build_reid(name='osnet'):
    if name == 'osnet':
        try:
            return OSNetBoxMOT()
        except Exception as e:
            print('[WARN] OSNet unavailable:', e); return DummyReID()
    return DummyReID()

def build_tracker(name='strongsort', reid=None, match_iou=0.5):
    if name in ['strongsort','botsort','deocsort']:
        try:
            return BoxMOTTracker(name, reid=reid, match_iou=match_iou)
        except Exception as e:
            print('[WARN] BoxMOT unavailable:', e); return SimpleSORTTracker(match_iou)
    return SimpleSORTTracker(match_iou)

def build_cmc(enabled=False):
    return HomographyCMC() if enabled else NoCMC()
''')

(Path(root/'core'/'video_io.py')).write_text('''
import cv2
class VideoReader:
    def __init__(self, path):
        self.cap = cv2.VideoCapture(path)
        if not self.cap.isOpened(): raise RuntimeError(f'Cannot open {path}')
        self.fps = int(self.cap.get(cv2.CAP_PROP_FPS) or 30)
        self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    def __iter__(self): return self
    def __next__(self):
        ok, frame = self.cap.read();
        if not ok: raise StopIteration
        return frame
    def close(self): self.cap.release()

class VideoWriter:
    def __init__(self, path, fps, w, h):
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.wr = cv2.VideoWriter(path, fourcc, fps, (w,h))
    def write(self, frame): self.wr.write(frame)
    def close(self): self.wr.release()
''')

(Path(root/'models'/'detectors'/'base.py')).write_text('''
import numpy as np
class Detection:
    def __init__(self, xyxy, conf, cls, cls_name=None):
        self.xyxy = np.array(xyxy, float)
        self.conf = float(conf); self.cls = int(cls) if cls is not None else -1
        self.cls_name = cls_name
class BaseDetector:
    def detect(self, frame): raise NotImplementedError
class DummyDetector(BaseDetector):
    def detect(self, frame): return []
''')

(Path(root/'models'/'detectors'/'yolov8.py')).write_text('''
from .base import BaseDetector, Detection
class YOLOv8Detector(BaseDetector):
    def __init__(self, weights='yolov8x.pt', conf=0.25, iou=0.5, class_filter=None):
        from ultralytics import YOLO
        self.model = YOLO(weights)
        self.conf, self.iou, self.class_filter = conf, iou, class_filter
    def detect(self, frame):
        res = self.model.predict(source=frame, conf=self.conf, iou=self.iou, verbose=False)[0]
        dets=[]; names = res.names; boxes = res.boxes
        for b in boxes:
            xyxy = b.xyxy[0].cpu().numpy(); conf = float(b.conf[0].cpu().numpy()); cls = int(b.cls[0].cpu().numpy())
            name = names.get(cls, str(cls)) if isinstance(names, dict) else str(cls)
            if self.class_filter and (name not in self.class_filter and cls not in self.class_filter): continue
            dets.append(Detection(xyxy, conf, cls, name))
        return dets
''')

(Path(root/'models'/'detectors'/'rtdetr.py')).write_text('''
from .base import BaseDetector
class RTDETRDetector(BaseDetector):
    def __init__(self, *args, **kwargs):
        raise RuntimeError('RT-DETR wrapper not implemented in this demo')
''')

(Path(root/'models'/'trackers'/'base.py')).write_text('''
import numpy as np, cv2
class Track:
    def __init__(self, track_id, xyxy, conf, cls, cls_name=None):
        self.id=int(track_id); self.xyxy=np.array(xyxy,float); self.conf=float(conf); self.cls=int(cls); self.cls_name=cls_name
class BaseTracker:
    def update(self, detections, frame): raise NotImplementedError
    def set_cmc(self, H): pass
    def draw(self, frame, tracks):
        for t in tracks:
            x1,y1,x2,y2 = t.xyxy.astype(int)
            cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),2)
            cv2.putText(frame, f'ID {t.id} {t.cls}', (x1,max(0,y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0,255,0),1)
        return frame
''')

(Path(root/'models'/'trackers'/'boxmot_wrappers.py')).write_text('''
from .base import BaseTracker, Track
import numpy as np
class BoxMOTTracker(BaseTracker):
    def __init__(self, tracker_type='strongsort', reid=None, match_iou=0.5):
        if tracker_type=='strongsort':
            from boxmot import StrongSORT as Impl
        elif tracker_type=='botsort':
            from boxmot import BotSORT as Impl
        elif tracker_type=='deocsort':
            from boxmot import DeepOCSORT as Impl
        else:
            raise ValueError('unknown tracker')
        self.impl = Impl(model=None)
        self.H=None
    def set_cmc(self, H): self.H = H
    def update(self, detections, frame):
        dets = []
        for d in detections:
            x1,y1,x2,y2 = d.xyxy; dets.append([x1,y1,x2,y2,d.conf,d.cls])
        dets = np.array(dets, float) if len(dets)>0 else np.empty((0,6), float)
        out = self.impl.update(dets, frame)
        tracks=[]
        for row in out:
            x1,y1,x2,y2,tid,cls = row[:6]
            tracks.append(Track(tid,[x1,y1,x2,y2],1.0,int(cls)))
        return tracks
''')

(Path(root/'models'/'trackers'/'simple_sort.py')).write_text('''
from .base import BaseTracker, Track
import numpy as np
class SimpleSORTTracker(BaseTracker):
    def __init__(self, match_iou=0.5): self.next_id=1; self.tracks=[]; self.match_iou=match_iou
    def iou(self,a,b):
        ax1,ay1,ax2,ay2=a; bx1,by1,bx2,by2=b
        inter = max(0,min(ax2,bx2)-max(ax1,bx1))*max(0,min(ay2,by2)-max(ay1,by1))
        area_a=max(0,(ax2-ax1)*(ay2-ay1)); area_b=max(0,(bx2-bx1)*(by2-by1))
        return inter/(area_a+area_b-inter+1e-6)
    def update(self, detections, frame):
        det_boxes=[d.xyxy for d in detections]; det_clses=[d.cls for d in detections]
        assigned=set(); new=[]
        for tid,tbox,tcls in self.tracks:
            j=-1; best=0
            for idx,db in enumerate(det_boxes):
                if idx in assigned: continue
                v=self.iou(tbox,db)
                if v>best: best=v; j=idx
            if j!=-1 and best>=self.match_iou: new.append((tid,det_boxes[j],det_clses[j])); assigned.add(j)
            else: new.append((tid,tbox,tcls))
        for idx,db in enumerate(det_boxes):
            if idx in assigned: continue
            new.append((self.next_id,db,det_clses[idx])); self.next_id+=1
        self.tracks=new
        return [Track(tid,tb,1.0,tc) for tid,tb,tc in self.tracks]
''')

(Path(root/'models'/'reid'/'base.py')).write_text('''
class BaseReID:
    def embed(self, frame, xyxy): raise NotImplementedError
class DummyReID(BaseReID):
    def embed(self, frame, xyxy): return None
''')

(Path(root/'models'/'reid'/'osnet_boxmot.py')).write_text('''
from .base import BaseReID
class OSNetBoxMOT(BaseReID):
    def __init__(self): pass
    def embed(self, frame, xyxy): return None
''')

(Path(root/'motion'/'cmc.py')).write_text('''
import cv2, numpy as np
class NoCMC:
    def estimate(self, prev, curr): return None
class HomographyCMC:
    def __init__(self, max_features=2000, match_thresh=0.75):
        self.max_features=max_features; self.match_thresh=match_thresh
    def estimate(self, prev, curr):
        if prev is None or curr is None: return None
        orb=cv2.ORB_create(nfeatures=self.max_features)
        kp1,des1=orb.detectAndCompute(prev,None); kp2,des2=orb.detectAndCompute(curr,None)
        if des1 is None or des2 is None: return None
        bf=cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
        matches=bf.knnMatch(des1,des2,k=2)
        good=[m for m,n in matches if m.distance < self.match_thresh*n.distance]
        if len(good)<8: return None
        src=np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
        dst=np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
        H,_=cv2.findHomography(src,dst,cv2.RANSAC,5.0)
        return H
''')

(Path(root/'utils'/'metrics.py')).write_text('''
import csv
class TrackCSVWriter:
    def __init__(self, path):
        self.f=open(path,'w',newline=''); self.w=csv.writer(self.f)
        self.w.writerow(['frame','id','x1','y1','x2','y2','cls','conf'])
    def write(self, frame_idx, tracks):
        for t in tracks:
            x1,y1,x2,y2 = t.xyxy
            self.w.writerow([frame_idx,t.id,x1,y1,x2,y2,t.cls,t.conf])
    def close(self): self.f.close()
''')

print('Project files created under ./project')


Project files created under ./project


## Input video
Either **upload a local video** or **download from a URL**.


In [None]:

from google.colab import files
print('Upload a video file (e.g., .mp4):')
up = files.upload()  # choose a file
video_path = list(up.keys())[0]
print('Using video:', video_path)


In [None]:

# Alternatively, uncomment and set a URL to download a sample video
# url = 'https://github.com/ultralytics/assets/releases/download/v0.0.0/people-walking.mp4'
# import os
# os.system(f'wget -O sample.mp4 {url}')
# video_path = 'sample.mp4'
# print('Downloaded sample to', video_path)


In [None]:

import os
os.chdir('project')
!python app.py --video "$video_path" --output out.mp4   --detector yolov8 --detector-weights yolov8x.pt   --tracker botsort --use-cmc --save-csv tracks.csv


In [None]:

from IPython.display import Video, display
print('Annotated video:')
display(Video('out.mp4', embed=True))
print('
Tracking CSV preview:')
import pandas as pd
print(pd.read_csv('tracks.csv').head())
