# Jupyter notebook for debugging

In [108]:
%load_ext autoreload
%autoreload 2

# Copied from `train` function in train_simple.py:L78
import yaml

device = 'cpu'
hyp = 'data/hyps/hyp.scratch-low.yaml'

with open(hyp, errors="ignore") as f:
    hyp = yaml.safe_load(f)  # load hyps dict

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [109]:
from models.yolo import Model
from utils.general import check_dataset

cfg = 'models/yolov5n_nuscenes.yaml'
data = 'data/nuscenes.yaml'
data_dict = check_dataset(data)

nc = int(data_dict["nc"])  # number of classes
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create

Overriding model.yaml nc=4 with nc=15
Overriding model.yaml anchors with anchors=[[8.700976371765137, 18.705415725708008], [17.866653442382812, 14.511322975158691], [18.120155334472656, 34.65727996826172], [33.7895393371582, 20.528886795043945], [41.03347396850586, 31.010244369506836], [40.75947952270508, 67.85029602050781], [87.73966979980469, 33.539337158203125], [115.51000213623047, 81.31227111816406], [148.4618682861328, 222.72320556640625]]


TypeError: type list doesn't define __round__ method

In [110]:
anchors = model.model[-1].anchors

# [TODO] Draw anchors
anchors = model.model[-1].anchors

# [TODO] Draw anchors
# Plotting the anchors
fig, ax = plt.subplots()

for i, (w, h) in enumerate(anchors):
    rect = plt.Rectangle((0, 0), w, h, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
    plt.text(w/2, h/2, f'Anchor {i+1}', color='blue', ha='center')

plt.xlim(0, 5)
plt.ylim(0, 7)
plt.xlabel('Width')
plt.ylabel('Height')
plt.title('YOLO Anchors')

# Save the plot to a file
plt.savefig('/mnt/data/yolo_anchors_plot.png')

plt.show()

In [4]:
from utils.dataloaders import create_dataloader
from utils.general import check_img_size, colorstr

imgsz = 416
batch_size = 1
single_cls = False
seed = 0

train_path = data_dict["train"]
gs = max(int(model.stride.max()), 32)  # grid size (max stride)
imgsz = check_img_size(imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple

train_loader, dataset = create_dataloader(
    train_path,
    imgsz,
    batch_size,
    gs,
    single_cls,
    hyp=hyp,
    augment=True,
    cache=None,
    rect=False,
    rank=-1,
    workers=8,
    image_weights=False,
    quad=False,
    prefix=colorstr("train: "),
    shuffle=True,
    seed=seed,
)

[34m[1mtrain: [0mScanning /home/ubuntu/datasets/nuscenes_det2d/train... 28130 images, 1425 backgrounds, 0 corrupt: 100%|██████████| 28130/28130 [00:01<00:00, 20217.09it/s]
[34m[1mtrain: [0mNew cache created: /home/ubuntu/datasets/nuscenes_det2d/train.cache


In [12]:
for imgs, targets, paths, _ in train_loader:
    imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
    break

In [14]:
import torch
from models.common import DetectMultiBackend
from utils.torch_utils import select_device

weights = 'yolov5n.pt'
# data = 'data/nuscenes.yaml'
data = 'data/coco128.yaml'
half = False  # use FP16 half-precision inference
dnn = False  # use OpenCV DNN for ONNX inference
device = select_device('cpu')

model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)

# inference
model.eval()
with torch.no_grad():
    pred = model(imgs)  # forward

YOLOv5 🚀 v7.0-320-g77b4eb3a Python-3.10.12 torch-2.0.1 CPU

Fusing layers... 


YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients


In [15]:
from utils.general import non_max_suppression

conf_thres = 0.25  # confidence threshold
iou_thres = 0.45  # NMS IOU threshold
max_det = 1000  # maximum detections per image
classes = None
agnostic_nms = False  # class-agnostic NMS

pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

# [TODO] draw predictions (see detect.py:L178)
from utils.general import non_max_suppression

conf_thres = 0.25  # confidence threshold
iou_thres = 0.45  # NMS IOU threshold
max_det = 1000  # maximum detections per image
classes = None
agnostic_nms = False  # class-agnostic NMS

pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

# [TODO] draw predictions (see detect.py:L178)
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def draw_predictions(imgs, pred, class_names):
    fig, ax = plt.subplots(1, figsize=(12, 12))
    ax.imshow(imgs[0].permute(1, 2, 0).cpu().numpy())

    for *xyxy, conf, cls in pred[0]:
        label = f'{class_names[int(cls)]} {conf:.2f}'
        plot_one_box(xyxy, ax, label=label, color=(1, 0, 0), line_thickness=2)

    plt.show()

def plot_one_box(x, ax, label=None, color=(1, 0, 0), line_thickness=2):
    # Plots one bounding box on the image
    tl = line_thickness or round(0.002 * (x[2] - x[0] + x[3] - x[1]) / 2) + 1  # line thickness
    x = [float(i) for i in x]
    xyxy = [x[0], x[1], x[2] - x[0], x[3] - x[1]]
    rect = patches.Rectangle(xyxy[:2], *xyxy[2:], linewidth=tl, edgecolor=color, facecolor='none')
    ax.add_patch(rect)
    if label:
        ax.text(x[0], x[1], label, color=color, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5))

# [TODO] Draw predictions
# Example usage (assuming predictions are made and `imgs` and `pred` are available):
draw_predictions(imgs, pred, model.names)

# Forward
with torch.cuda.amp.autocast(amp):
    pred = model(imgs)  # forward
    loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
    if opt.quad:
        loss *= 4.0

# [TODO] Draw predictions
draw_predictions(imgs, pred, model.names)

import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def draw_predictions(img, pred, class_names):
    img = img.permute(1, 2, 0).cpu().numpy()  # Convert image tensor to numpy array
    img = (img * 255).astype(np.uint8)  # Convert from [0, 1] to [0, 255]
    for *xyxy, conf, cls in pred:
        label = f'{class_names[int(cls)]} {conf:.2f}'
        plot_one_box(xyxy, img, label=label, color=(0, 0, 255), line_thickness=2)
    return img

def plot_one_box(x, img, label=None, color=(1, 0, 0), line_thickness=2):
    # Plots one bounding box on the image
    tl = line_thickness or round(0.002 * (x[2] - x[0] + x[3] - x[1]) / 2) + 1  # line thickness
    x = [int(i) for i in x]
    cv2.rectangle(img, (x[0], x[1]), (x[2], x[3]), color, thickness=tl)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = x[0] + t_size[0], x[1] - t_size[1] - 3
        cv2.rectangle(img, (x[0], x[1]), c2, color, -1)  # filled
        cv2.putText(img, label, (x[0], x[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def save_video_with_predictions(video_path, output_path, model, device):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        img = torch.from_numpy(frame).to(device).float() / 255.0
        img = img.permute(2, 0, 1).unsqueeze(0)  # Convert to batch format
        pred = model(img)[0]  # Get prediction

        # Draw predictions on the frame
        frame = draw_predictions(img[0], pred, model.names)
        out.write(frame)

    cap.release()
    out.release()

# Example usage
video_path = 'input_video.mp4'  # Path to input video
output_path = 'output_video.mp4'  # Path to save output video
save_video_with_predictions(video_path, output_path, model, device)

