# COMP-SCI 5567 — Faster R-CNN Pipeline (Project)
Ailing Nan | Jim Huynh | Joseph Marinello | Kenny Phan

In [None]:
# Mount users Google Drive
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
# All Imports
import os
import cv2
import time
import math
import glob
import torch
import random
import configparser
#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as Fnn
import torchvision.transforms.functional as TF
#
from pathlib import Path
from datetime import datetime
from torch.utils.data import DataLoader
from torchvision.io import read_image, write_jpeg
from torchvision.utils import draw_bounding_boxes
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
#
try:
    from sklearn.metrics import average_precision_score
    SKL_OK = True
except Exception:
    SKL_OK = False



In [24]:
# Configure directories and constants
DRIVE_PROJECT_DIR = "/content/drive/MyDrive/Deep_Learning/Final Project" #Please update this based on you Drive Folder Configuration!
PROJECT_DIR       = DRIVE_PROJECT_DIR
ZIP_NAME          = "MOT16.zip" #Please make sure the zip file is located at the root folder!
ZIP_PATH          = f"{PROJECT_DIR}/{ZIP_NAME}"
MODEL_DIR         = f"{PROJECT_DIR}/models_FasterRCNN"
RESULT_DIR        = f"{PROJECT_DIR}/results_FasterRCNN"
DATA_DIR          = f"{PROJECT_DIR}/MOT16"
TEST_SEQ          = "MOT16-03"
#
RUN_ALL_TEST_SEQ  = False
DATA_SPLIT_RATIO  = 0.8
EPOCHS            = 5
BATCH_SIZE        = 2
CONF_THRESHOLD    = 0.7
IOU_THRESHOLD     = 0.5
SEED              = 42

# Set all seeds
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [25]:
# Ensure directory folders are created
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)


# Ensure data can be found within directories
if not os.path.exists(DATA_DIR):
    assert os.path.exists(ZIP_PATH), f"[ERR] {ZIP_PATH} not found, please upload {ZIP_NAME} to {PROJECT_DIR}/"
    print("[INFO] Unzip MOT16.zip ...")
    !unzip -q "{ZIP_PATH}" -d "{PROJECT_DIR}"
    print("[OK] File has been Unzipped")
else:
    print("[INFO] Skip unzip：Data directory already exist")

[INFO] Skip unzip：Data directory already exist


In [26]:
# Frame Constants
FRAME_COL  = 0
ID_COL     = 1
LEFT_COL   = 2
TOP_COL    = 3
WIDTH_COL  = 4
HEIGHT_COL = 5
PERSON_COL = 6

NUM_CLASSES = 2
MODEL_PATH  = f"{MODEL_DIR}/bbox_detector.pth"

In [27]:
# Data Augmentation Classes
class Compose:
    def __init__(self, transforms): self.transforms = transforms
    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target

class ToTensor:
    def __call__(self, image, target):
        return image, target

class RandomHorizontalFlip:
    def __init__(self, p=0.5): self.p=p
    def __call__(self, image, target):
        if random.random() < self.p and "boxes" in target and len(target["boxes"])>0:
            _, h, w = image.shape
            image = TF.hflip(image)
            boxes = target["boxes"].clone()
            boxes[:, [0,2]] = w - boxes[:, [2,0]]
            target["boxes"] = boxes
        return image, target

class RandomScale:
    def __init__(self, scale_range=(0.9, 1.1)): self.scale_range = scale_range
    def __call__(self, image, target):
        _, h, w = image.shape
        s = random.uniform(*self.scale_range)
        new_h, new_w = int(h*s), int(w*s)
        image = TF.resize(image, [new_h, new_w])
        if "boxes" in target and len(target["boxes"])>0:
            boxes = target["boxes"].clone()
            boxes[:, [0,2]] *= (new_w / w)
            boxes[:, [1,3]] *= (new_h / h)
            target["boxes"] = boxes
        return image, target

class ColorJitterLite:
    def __init__(self, b=0.2, c=0.2, s=0.2):
        self.b, self.c, self.s = b, c, s
    def __call__(self, image, target):
        if random.random() < 0.8:
            image = TF.adjust_brightness(image, random.uniform(1-self.b, 1+self.b))
        if random.random() < 0.8:
            image = TF.adjust_contrast(image, random.uniform(1-self.c, 1+self.c))
        if random.random() < 0.8:
            image = TF.adjust_saturation(image, random.uniform(1-self.s, 1+self.s))
        return image, target

class RandomGaussianBlur:
    def __init__(self, p=0.25, kernel_size=3, sigma=(0.1, 1.0)):
        self.p = p; self.kernel_size = kernel_size; self.sigma = sigma
    def __call__(self, image, target):
        if random.random() < self.p:
            image = TF.gaussian_blur(image, kernel_size=self.kernel_size, sigma=random.uniform(*self.sigma))
        return image, target

class CutOut:
    def __init__(self, n_holes=2, length=24, p=0.25):
        self.n_holes=n_holes; self.length=length; self.p=p
    def __call__(self, image, target):
        if random.random() > self.p: return image, target
        _, h, w = image.shape
        img = image.clone()
        for _ in range(self.n_holes):
            y = random.randint(0, max(0, h - self.length))
            x = random.randint(0, max(0, w - self.length))
            img[:, y:y+self.length, x:x+self.length] = 0
        return img, target

def get_train_transforms():
    return Compose([ToTensor(), RandomHorizontalFlip(0.5), RandomScale((0.9, 1.1)),
                    ColorJitterLite(0.2,0.2,0.2), RandomGaussianBlur(0.25,3,(0.1,1.0)), CutOut(2,24,0.25)])

def get_test_transforms():
    return Compose([ToTensor()])


In [28]:
# Dataset Handling Classes
class MOT16TrainDataset(torch.utils.data.Dataset):
    def __init__(self, root, seq_list=None, transforms=None):
        self.root = root; self.transforms = transforms
        self.imgs=[]; self.targets=[]

        # Added sequence or all training sets
        all_subdirs = sorted(os.listdir(root))
        sequence = seq_list if seq_list is not None else all_subdirs

        for subdir in sequence:
            img_dir = os.path.join(root, subdir, "img1")
            gt_path = os.path.join(root, subdir, "gt", "gt.txt")
            if not (os.path.isdir(img_dir) and os.path.exists(gt_path)): continue
            frames = sorted([f for f in os.listdir(img_dir) if f.lower().endswith(".jpg")])
            self.imgs += [os.path.join(subdir,"img1",f) for f in frames]

            gt = np.genfromtxt(gt_path, delimiter=",", dtype=float)
            gt = gt[np.where(gt[:, PERSON_COL] == 1)]

            tops   = gt[:, TOP_COL]
            lefts  = gt[:, LEFT_COL]
            bots   = tops + gt[:, HEIGHT_COL]
            rights = lefts + gt[:, WIDTH_COL]
            boxes  = np.column_stack((lefts, tops, rights, bots))
            person_ids = gt[:, ID_COL]

            for i in range(len(frames)):
                d={}
                mask = (gt[:, FRAME_COL].astype(int) == (i+1))
                req_boxes = boxes[mask,:]
                req_ids   = person_ids[mask]
                d["boxes"] = torch.tensor(req_boxes, dtype=torch.float32)
                d["labels"] = torch.ones(len(req_boxes), dtype=torch.int64)
                d["person_ids"] = torch.tensor(req_ids, dtype=torch.int64)
                self.targets.append(d)
        print("[INFO] Train dataset length:", len(self.imgs))

    def __len__(self): return len(self.imgs)

    def __getitem__(self, idx):
        if idx>=len(self.imgs): return None,None
        img_rel = self.imgs[idx]
        img_path = os.path.join(self.root, img_rel)
        img = read_image(img_path).float() / 255.0
        tgt = self.targets[idx]
        tgt = {k:(v.clone() if torch.is_tensor(v) else v) for k,v in tgt.items()}
        if self.transforms: img, tgt = self.transforms(img, tgt)
        return img, tgt

class MOT16EvalDataset(torch.utils.data.Dataset):
    def __init__(self, root, seq_list=None, transforms=None):
        self.root=root; self.transforms=transforms
        self.imgs=[]; self.targets=[]

        # Added sequence or all training sets
        all_subdirs = sorted(os.listdir(root))
        sequence = seq_list if seq_list is not None else all_subdirs

        for subdir in sequence:
            img_dir = os.path.join(root, subdir, "img1")
            gt_path = os.path.join(root, subdir, "gt", "gt.txt")
            if not (os.path.isdir(img_dir) and os.path.exists(gt_path)): continue
            frames = sorted([f for f in os.listdir(img_dir) if f.lower().endswith(".jpg")])
            self.imgs += [os.path.join(subdir,"img1",f) for f in frames]

            gt = np.genfromtxt(gt_path, delimiter=",", dtype=float)
            gt = gt[np.where(gt[:, PERSON_COL] == 1)]

            tops   = gt[:, TOP_COL]
            lefts  = gt[:, LEFT_COL]
            bots   = tops + gt[:, HEIGHT_COL]
            rights = lefts + gt[:, WIDTH_COL]
            boxes  = np.column_stack((lefts, tops, rights, bots))

            for i in range(len(frames)):
                d={}
                mask = (gt[:, FRAME_COL].astype(int) == (i+1))
                req_boxes = boxes[mask,:]
                d["boxes"] = torch.tensor(req_boxes, dtype=torch.float32)
                d["labels"] = torch.ones(len(req_boxes), dtype=torch.int64)
                self.targets.append(d)
        print("[INFO] Eval dataset length:", len(self.imgs))

    def __len__(self): return len(self.imgs)

    def __getitem__(self, idx):
        if idx>=len(self.imgs): return None,None
        img_rel = self.imgs[idx]
        img_path = os.path.join(self.root, img_rel)
        img = read_image(img_path).float() / 255.0
        tgt = self.targets[idx]
        tgt = {k:(v.clone() if torch.is_tensor(v) else v) for k,v in tgt.items()}
        if self.transforms: img, tgt = self.transforms(img, tgt)
        return img, tgt

class MOT16TestDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root=root; self.transforms=transforms
        self.imgs = sorted([f for f in os.listdir(os.path.join(root,"img1")) if f.lower().endswith(".jpg")])
    def __len__(self): return len(self.imgs)
    def __getitem__(self, idx):
        img_path = os.path.join(self.root,"img1", self.imgs[idx])
        img = read_image(img_path).float()/255.0
        if self.transforms: img,_ = self.transforms(img,{})
        return img, self.imgs[idx]

In [29]:
# Model Creation Functions
def get_detector_model(weights_path=None):
    model = fasterrcnn_resnet50_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)
    if weights_path and os.path.exists(weights_path):
        state = torch.load(weights_path, map_location="cpu")
        model.load_state_dict(state)
        print(f"[OK] Loaded weights: {weights_path}")
    else:
        print("[WARN] Training Weights not loaded（Normal for first time training）")
    return model

In [30]:
# Training Functions
def collate_fn(batch):
    batch = [(img,tgt) for img,tgt in batch if img is not None]
    return list(zip(*batch)) if batch else ([], [])

def train(model, data_root, seq_list=None, num_epochs=5, batch_size=2, save_path=MODEL_PATH): # added sequence list
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    for p in model.backbone.parameters(): p.requires_grad=False
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.AdamW(params, lr=5e-4, weight_decay=1e-4)

    ds = MOT16TrainDataset(os.path.join(data_root,"train"), seq_list=seq_list, transforms=get_train_transforms()) # added sequence list
    dl = DataLoader(ds, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, num_workers=2, pin_memory=True)

    total_steps = max(1, len(dl)*num_epochs)
    warmup = max(10, int(0.1*total_steps))
    def lr_lambda(step):
        if step < warmup: return float(step) / float(max(1,warmup))
        prog = (step-warmup)/float(max(1,total_steps-warmup))
        import math
        return 0.5*(1.0+math.cos(math.pi*prog))
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

    step=0
    for epoch in range(num_epochs):
        model.train(); epoch_loss=0; t0=time.time()
        for imgs, tgts in dl:
            imgs = [img.to(device) for img in imgs]
            tgts = [{k:(v.to(device) if torch.is_tensor(v) else v) for k,v in t.items()} for t in tgts]
            loss_dict = model(imgs, tgts)
            loss = sum(loss_dict.values())
            optimizer.zero_grad(); loss.backward(); optimizer.step()
            scheduler.step(); step+=1
            epoch_loss += loss.item()
        print(f"[Epoch {epoch+1}/{num_epochs}] loss={epoch_loss/len(dl):.4f}  time={time.time()-t0:.1f}s")
        torch.save(model.state_dict(), save_path)
    print(f"[OK] Training Complete → {save_path}")

In [31]:
# Metric and Evaluation Functions
def box_iou(boxes1, boxes2):
    area1 = (boxes1[:,2]-boxes1[:,0]).clamp(min=0) * (boxes1[:,3]-boxes1[:,1]).clamp(min=0)
    area2 = (boxes2[:,2]-boxes2[:,0]).clamp(min=0) * (boxes2[:,3]-boxes2[:,1]).clamp(min=0)
    lt = torch.max(boxes1[:,None,:2], boxes2[:,:2])
    rb = torch.min(boxes1[:,None,2:], boxes2[:,2:])
    wh = (rb - lt).clamp(min=0)
    inter = wh[:,:,0]*wh[:,:,1]
    union = area1[:,None] + area2 - inter
    return inter / torch.clamp(union, min=1e-6)

def compute_metrics(all_scores, all_tp, total_gt, out_dir):
    os.makedirs(out_dir, exist_ok=True)
    if len(all_scores)==0:
        results={"mAP":0.0,"Total GT boxes":total_gt,"Total detections":0,"True positives":0,"False positives":0,
                 "Precision_at_conf":0.0,"Recall_at_conf":0.0}
        with open(os.path.join(out_dir,"metrics.txt"),"w") as f:
            for k,v in results.items(): f.write(f"{k}: {v}\n")
        return results

    scores_np = np.array(all_scores)
    tps_np    = np.array(all_tp).astype(np.int32)

    order = np.argsort(-scores_np)
    scores_np = scores_np[order]
    tps_np    = tps_np[order]

    cum_tp = np.cumsum(tps_np)
    cum_fp = np.cumsum(1 - tps_np)

    precision = cum_tp / np.maximum(cum_tp + cum_fp, 1)
    recall    = cum_tp / max(total_gt, 1)

    if SKL_OK:
        from sklearn.metrics import average_precision_score
        ap = float(average_precision_score(tps_np, scores_np))
    else:
        ap=0.0
        for r in np.linspace(0,1,11):
            p = precision[recall>=r].max() if np.any(recall>=r) else 0
            ap += p/11.0

    plt.figure(figsize=(6,5))
    plt.plot(recall, precision)
    plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title(f"PR (AP={ap:.4f})"); plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir,"precision_recall_curve.png")); plt.close()

    results = {
        "mAP": ap,
        "Total GT boxes": int(total_gt),
        "Total detections": int(len(all_scores)),
        "True positives": int(cum_tp[-1]),
        "False positives": int(cum_fp[-1]),
        "Precision_at_conf": float(precision[-1]),
        "Recall_at_conf": float(recall[-1]),
    }
    with open(os.path.join(out_dir,"metrics.txt"),"w") as f:
        for k,v in results.items(): f.write(f"{k}: {v}\n")
    print("[OK] Writing metrics to:", os.path.join(out_dir,"metrics.txt"))
    return results

def evaluate_on_folder(model, eval_root, seq_list, out_dir, conf_thr=0.7, iou_thr=0.5, device=None): # Added sequence list
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device).eval()

    ds = MOT16EvalDataset(eval_root, seq_list=seq_list, transforms=get_test_transforms())

    all_scores=[]; all_tp=[]; total_gt=0
    os.makedirs(out_dir, exist_ok=True)
    vis_dir = os.path.join(out_dir, "visualizations"); os.makedirs(vis_dir, exist_ok=True)

    for idx in range(len(ds)):
        img, tgt = ds[idx]
        gt_boxes = tgt["boxes"]
        total_gt += len(gt_boxes)

        with torch.no_grad():
            pred = model([img.to(device)])[0]
        scores = pred["scores"].detach().cpu()
        boxes  = pred["boxes"].detach().cpu()
        keep = scores >= conf_thr
        scores, boxes = scores[keep], boxes[keep]

        tps = []
        if len(gt_boxes)>0 and len(boxes)>0:
            iou = box_iou(boxes, gt_boxes)
            matched_gt = set()
            for pi in range(len(boxes)):
                gi = torch.argmax(iou[pi]).item()
                max_iou = iou[pi, gi].item()
                if max_iou >= iou_thr and gi not in matched_gt:
                    tps.append(1); matched_gt.add(gi)
                else:
                    tps.append(0)
        elif len(boxes)>0:
            tps = [0]*len(boxes)

        all_scores += scores.tolist()
        all_tp     += tps

        if idx % 50 == 0:
            labels = [f"{s:.2f}" for s in scores.tolist()]
            vis = draw_bounding_boxes((img*255).to(torch.uint8), boxes, labels=labels, width=2)
            write_jpeg(vis, os.path.join(vis_dir, f"det_{idx:06d}.jpg"))

    return compute_metrics(all_scores, all_tp, total_gt, out_dir)


In [32]:
# Metric Extraction
METRICS_TABLE = f"{RESULT_DIR}/metrics_summary.csv"

def save_metrics_row(results:dict, seq_name:str, model_tag:str, conf_thr:float, notes:str=""):
    row = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "seq": seq_name,
        "model": model_tag,
        "conf_threshold": conf_thr,
        "mAP": results.get("mAP"),
        "total_gt": results.get("Total GT boxes"),
        "total_det": results.get("Total detections"),
        "tp": results.get("True positives"),
        "fp": results.get("False positives"),
        "precision_at_conf": results.get("Precision_at_conf"),
        "recall_at_conf": results.get("Recall_at_conf"),
        "notes": notes,
    }
    df = pd.DataFrame([row])
    os.makedirs(os.path.dirname(METRICS_TABLE), exist_ok=True)
    if os.path.exists(METRICS_TABLE):
        df.to_csv(METRICS_TABLE, mode="a", header=False, index=False)
    else:
        df.to_csv(METRICS_TABLE, index=False)
    print(f"[OK] Add metrics to {METRICS_TABLE}")

In [33]:
# Inference and Video Generation Functions
def fasterrcnn_detections_to_txt(model, sequence_dir, out_dir, txt_output, threshold=0.7):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device).eval()

    ds = MOT16TestDataset(sequence_dir, transforms=get_test_transforms())
    os.makedirs(out_dir, exist_ok=True)

    if len(ds)==0:
        print(f"[ERR] {sequence_dir}/img1 is empty"); return
    print(f"[INFO] Running inference on frames：{len(ds)} frames: {sequence_dir}")

    with open(txt_output, "w") as f_out:
        for frame_idx in range(len(ds)):
            img, name = ds[frame_idx]
            with torch.no_grad():
                pred = model([img.to(device)])[0]

            boxes  = pred["boxes"].detach().cpu()
            scores = pred["scores"].detach().cpu()

            for box, conf in zip(boxes, scores):
                if conf >= threshold:
                    x1, y1, x2, y2 = box
                    w, h = x2-x1, y2-y1
                    track_id=0 # placeholder ids because detection only

                    f_out.write(f"{frame_idx+1},{track_id},{int(x1)},{int(y1)},{int(w)},{int(h)},{conf},-1,-1,-1\n")
    print(f"[OK] Detections saved to {out_dir}")

def get_sequence_info(sequence_dir):
    seqinfo_path = os.path.join(sequence_dir, "seqinfo.ini")
    config = configparser.ConfigParser()
    config.read(seqinfo_path)
    fps = None
    width = None
    height = None
    fps = config.getint("Sequence", "frameRate", fallback=fps)
    width = config.getint("Sequence", "imWidth", fallback=width)
    height = config.getint("Sequence", "imHeight", fallback=height)
    return fps, (width, height)

def visualize_sequence(sequence_dir, results_file, output_video_path):
    # Load results
    results_df = pd.read_csv(results_file, header=None)
    results_df.columns = ["frame","id","x","y","w","h","conf","x3","y3","z3"]

    # Load frames
    image_paths = sorted(glob.glob(os.path.join(sequence_dir, "img1", "*.jpg")))

    # Video writer
    fps, frame_size = get_sequence_info(sequence_dir)
    first_img = cv2.imread(image_paths[0])
    frame_size = (first_img.shape[1], first_img.shape[0])
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, frame_size)

    # Assign random colors for IDs
    max_id = int(results_df["id"].max()) + 1
    np.random.seed(42)  # reproducible colors
    colors = np.random.randint(0, 255, size=(max_id, 3), dtype=np.uint8)

    # Process frames
    for img_path in image_paths:
        frame_num = int(os.path.splitext(os.path.basename(img_path))[0])
        img = cv2.imread(img_path)
        if img is None:
            continue

        frame_data = results_df[results_df["frame"] == frame_num]
        for _, row in frame_data.iterrows():
            track_id = int(row["id"])
            color = tuple(map(int, colors[track_id]))
            x, y, w_box, h_box = int(row["x"]), int(row["y"]), int(row["w"]), int(row["h"])
            cv2.rectangle(img, (x, y), (x + w_box, y + h_box), color, 2)
            cv2.putText(img, str(track_id), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        video_writer.write(img)
    video_writer.release()
    print(f"Video saved to {output_video_path}")

In [34]:
# Split Data Functions
def train_eval_split(folders_dir, split_ratio, seed):
    sequences = [d for d in os.listdir(folders_dir) if os.path.isdir(os.path.join(folders_dir, d))]
    sequences.sort()
    print("All sequences:", sequences)

    random.seed(seed)
    random.shuffle(sequences)

    split_idx = int(len(sequences) * split_ratio)
    train_sequence = sequences[:split_idx]
    eval_sequence = sequences[split_idx:]

    print("Training sequences:", train_sequence)
    print("Evaluation sequences:", eval_sequence)

    return train_sequence, eval_sequence

## MAIN CODE EXECUTION

In [35]:
# TRAINING
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_sequence, eval_sequence = train_eval_split(folders_dir=f"{DATA_DIR}/train", split_ratio=DATA_SPLIT_RATIO, seed=SEED) # Split Data

# a) Training (If you already have weights and don"t want to retrain, you can comment out the next 2 lines)
model = get_detector_model(weights_path=None)
train(model, DATA_DIR, seq_list=train_sequence, num_epochs=EPOCHS, batch_size=BATCH_SIZE, save_path=MODEL_PATH)

All sequences: ['MOT16-02', 'MOT16-04', 'MOT16-05', 'MOT16-09', 'MOT16-10', 'MOT16-11', 'MOT16-13']
Training sequences: ['MOT16-04', 'MOT16-09', 'MOT16-10', 'MOT16-05', 'MOT16-13']
Evaluation sequences: ['MOT16-02', 'MOT16-11']
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 220MB/s]


[WARN] Training Weights not loaded（Normal for first time training）
[INFO] Train dataset length: 3816
[Epoch 1/5] loss=0.6020  time=380.4s
[Epoch 2/5] loss=0.4829  time=377.6s
[Epoch 3/5] loss=0.4279  time=385.3s
[Epoch 4/5] loss=0.3837  time=401.3s
[Epoch 5/5] loss=0.3549  time=383.6s
[OK] Training Complete → /content/drive/MyDrive/Deep_Learning/Final Project/models_FasterRCNN/bbox_detector.pth


In [36]:
# EVALUATION
# b) Reload weights, perform a clean evaluation
model = get_detector_model(weights_path=MODEL_PATH)

# c) Evaluation (using data with GT)
eval_root = os.path.join(DATA_DIR, "train")
eval_out  = os.path.join(RESULT_DIR, f"eval_train_conf{CONF_THRESHOLD}")
res = evaluate_on_folder(model, eval_root, seq_list=eval_sequence, out_dir=eval_out, conf_thr=CONF_THRESHOLD, iou_thr=IOU_THRESHOLD, device=device)
save_metrics_row(res, seq_name="TRAIN_ALL", model_tag="fasterrcnn_head_finetune", conf_thr=CONF_THRESHOLD, notes="aug+AdamW+warmup_cos")

[OK] Loaded weights: /content/drive/MyDrive/Deep_Learning/Final Project/models_FasterRCNN/bbox_detector.pth
[INFO] Eval dataset length: 1500
[OK] Writing metrics to: /content/drive/MyDrive/Deep_Learning/Final Project/results_FasterRCNN/eval_train_conf0.7/metrics.txt
[OK] Add metrics to /content/drive/MyDrive/Deep_Learning/Final Project/results_FasterRCNN/metrics_summary.csv


In [37]:
# TEST VIDEO CREATION
# d) Reload weights for inference
model = get_detector_model(weights_path=MODEL_PATH)

# e) Single sequence or complete sequence (testing = no ground truth)
test_seqs = ["MOT16-01","MOT16-03","MOT16-06","MOT16-07","MOT16-08","MOT16-12","MOT16-14"]
if RUN_ALL_TEST_SEQ:
    for seq in test_seqs:
        sequence_dir = os.path.join(DATA_DIR, "test", seq)
        out_dir = os.path.join(RESULT_DIR, f"test_{seq}")
        text_output = os.path.join(out_dir, f"test_{seq}.txt")
        fasterrcnn_detections_to_txt(model, sequence_dir, out_dir, text_output, threshold=CONF_THRESHOLD) # create txt with detections
        video_path = os.path.join(out_dir, f"test_{seq}.mp4")
        visualize_sequence(sequence_dir, results_file=text_output, output_video_path=video_path) # save video with detections
else:
    seq = TEST_SEQ
    sequence_dir = os.path.join(DATA_DIR, "test", seq)
    out_dir = os.path.join(RESULT_DIR, f"test_{seq}")
    text_output = os.path.join(out_dir, f"test_{seq}.txt")
    fasterrcnn_detections_to_txt(model, sequence_dir, out_dir, text_output, threshold=CONF_THRESHOLD) # create txt with detections
    video_path = os.path.join(out_dir, f"test_{seq}.mp4")
    visualize_sequence(sequence_dir, results_file=text_output, output_video_path=video_path) # save video with detections

print("\n[DONE]")

[OK] Loaded weights: /content/drive/MyDrive/Deep_Learning/Final Project/models_FasterRCNN/bbox_detector.pth
[INFO] Running inference on frames：1500 frames: /content/drive/MyDrive/Deep_Learning/Final Project/MOT16/test/MOT16-03
[OK] Detections saved to /content/drive/MyDrive/Deep_Learning/Final Project/results_FasterRCNN/test_MOT16-03
Video saved to /content/drive/MyDrive/Deep_Learning/Final Project/results_FasterRCNN/test_MOT16-03/test_MOT16-03.mp4

[DONE]


In [38]:
# TRAINING VIDEO CREATION
# D) Reload weights for inference
model = get_detector_model(weights_path=MODEL_PATH)

# Make video
seq = "MOT16-02" # sequence of frames
sequence_dir = os.path.join(DATA_DIR, "train", seq)
out_dir = os.path.join(RESULT_DIR, f"train_{seq}")
text_output = os.path.join(out_dir, f"train_{seq}.txt")
fasterrcnn_detections_to_txt(model, sequence_dir, out_dir, text_output, threshold=CONF_THRESHOLD) # create txt with detections
video_path = os.path.join(out_dir, f"train_{seq}.mp4")
visualize_sequence(sequence_dir, results_file=text_output, output_video_path=video_path) # save video with detections

print("\n[DONE]")

[OK] Loaded weights: /content/drive/MyDrive/Deep_Learning/Final Project/models_FasterRCNN/bbox_detector.pth
[INFO] Running inference on frames：600 frames: /content/drive/MyDrive/Deep_Learning/Final Project/MOT16/train/MOT16-02
[OK] Detections saved to /content/drive/MyDrive/Deep_Learning/Final Project/results_FasterRCNN/train_MOT16-02
Video saved to /content/drive/MyDrive/Deep_Learning/Final Project/results_FasterRCNN/train_MOT16-02/train_MOT16-02.mp4

[DONE]
