# Object Detection

In [1]:
!git clone https://github.com/ultralytics/yolov5

!pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt

fatal: destination path 'yolov5' already exists and is not an empty directory.


Defaulting to user installation because normal site-packages is not writeable




In [3]:
import cv2 as cv
import os
import random
import torch
import torch.nn as nn
import torchvision
from torchvision import models
from torch.utils.data import Dataset, DataLoader
import albumentations as A
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
import xml.etree.ElementTree as ET
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor


In [4]:
# Get train and validation datasets
images_directory = "../dataset/images"
annotations_directory = "../dataset/annotations"

train_split = 0.8

with open("train.txt") as train:
    train_images_filenames_total = train.read().splitlines()

    split_idx = int(train_split * len(train_images_filenames_total))
    train_images_filenames = train_images_filenames_total[:split_idx]
    val_images_filenames = train_images_filenames_total[split_idx:]


with open("test.txt") as test:
    test_images_filenames = test.read().splitlines()

# Filter out images that can not be loaded properly
train_images_filenames = [i for i in train_images_filenames if cv.imread(os.path.join(images_directory, i + ".png")) is not None]
val_images_filenames = [i for i in val_images_filenames if cv.imread(os.path.join(images_directory, i + ".png")) is not None]
test_images_filenames = [i for i in test_images_filenames if cv.imread(os.path.join(images_directory, i + ".png")) is not None]

random.seed(42)
random.shuffle(train_images_filenames)
random.shuffle(val_images_filenames)
random.shuffle(test_images_filenames)

print(len(train_images_filenames), len(val_images_filenames), len(test_images_filenames))

490 123 264


## Convert to YOLO annotations

In [5]:
import shutil
classes = {
    "trafficlight": 0,
    "stop": 1,
    "speedlimit": 2,
    "crosswalk": 3,
}

os.makedirs("../dataset/labels", exist_ok=True)

os.makedirs("dataset/images/train", exist_ok=True)
os.makedirs("dataset/images/val", exist_ok=True)
os.makedirs("dataset/images/test", exist_ok=True)
os.makedirs("dataset/labels/train", exist_ok=True)
os.makedirs("dataset/labels/val", exist_ok=True)
os.makedirs("dataset/labels/test", exist_ok=True)


for img in train_images_filenames:
    shutil.copy(
        f"../dataset/images/{img}.png", f"dataset/images/train/{img}.png")

for img in val_images_filenames:
    shutil.copy(
        f"../dataset/images/{img}.png", f"dataset/images/val/{img}.png")

for img in test_images_filenames:
    shutil.copy(
        f"../dataset/images/{img}.png", f"dataset/images/test/{img}.png")


for path in os.listdir(annotations_directory):
    with open(os.path.join(annotations_directory, path), "r") as xml:
        folder = ""
        if path.strip(".xml") in train_images_filenames:
            folder = "train"
        elif path.strip(".xml") in val_images_filenames:
            folder = "val"
        else:
            folder = "test"


        with open(f"dataset/labels/{folder}/{path.replace('xml', 'txt')}", "w") as txt:
            tree = ET.parse(xml)
            root = tree.getroot()

            size = root.find("size")
            height = int(size.find("height").text)
            width = int(size.find("width").text)

            objects = root.findall("object")

            lines = []
            for object in objects:
                class_index = classes[object.find("name").text]

                xmin = int(object.find("bndbox/xmin").text)
                ymin = int(object.find("bndbox/ymin").text)
                xmax = int(object.find("bndbox/xmax").text)
                ymax = int(object.find("bndbox/ymax").text)

                # middle of bbox
                bbox_x = ((xmax + xmin) / 2) / width
                bbox_y = ((ymax + ymin) / 2) / height
                bbox_width = (xmax - xmin) / width
                bbox_height = (ymax - ymin) / height

                lines.append(
                    f"{class_index} {bbox_x} {bbox_y} {bbox_width} {bbox_height}")

            txt.write("\n".join(lines))


## Dataset

In [6]:
classes = {
    "trafficlight": 0,
    "stop": 1,
    "speedlimit": 2,
    "crosswalk": 3,
}


class TrafficSignDataset(Dataset):
    def __init__(self, annotations_directory, images_filenames, images_directory, transform=None):
        self.annotations_directory = annotations_directory
        self.images_filenames = images_filenames
        self.images_directory = images_directory
        self.transform = transform

    def __len__(self):
        return len(self.images_filenames)

    def __getitem__(self, idx):
        image_filename = self.images_filenames[idx]
        image = cv.imread(os.path.join(
            self.images_directory, image_filename + ".png"))
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

        image = image / 255.

        boxes, labels = self._get_boxes_and_labels(image_filename)

        image_id = torch.tensor([idx])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id

        if self.transform is not None:
            transformed = self.transform(
                image=image, bboxes=target["boxes"], labels=target["labels"])
            image = transformed["image"]
            target["boxes"] = transformed["bboxes"]
            target["labels"] = transformed["labels"]
            target["boxes"] = torch.as_tensor(target["boxes"], dtype=torch.float32)
            target["labels"] = torch.as_tensor(target["labels"], dtype=torch.int64)

        return image.float(), target

    # https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
    def _get_boxes_and_labels(self, filename):
        boxes = []
        labels = []
        with open(os.path.join(self.annotations_directory, filename + ".xml")) as xml:
            tree = ET.parse(xml)
            root = tree.getroot()

            objects = root.findall("object")
            for object in objects:
                class_index = classes[object.find("name").text]

                xmin = int(object.find("bndbox/xmin").text)
                ymin = int(object.find("bndbox/ymin").text)
                xmax = int(object.find("bndbox/xmax").text)
                ymax = int(object.find("bndbox/ymax").text)

                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(class_index)

        return boxes, labels


In [7]:
num_classes = len(classes)

# Get CPU or GPU device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

batch_size = 32
num_workers = 0  # how many processes are used to load the data

train_transform = A.Compose(
    [
        A.Resize(256, 256),
        A.RandomCrop(224, 224),
        A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2,
                           rotate_limit=30, p=0.5),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="pascal_voc", label_fields=['labels'])
)

val_transform = A.Compose(
    [A.Resize(256, 256), A.CenterCrop(224, 224), ToTensorV2()],
    bbox_params=A.BboxParams(format="pascal_voc", label_fields=['labels'])
)

train = TrafficSignDataset(annotations_directory,
                           train_images_filenames, images_directory, train_transform)
val = TrafficSignDataset(annotations_directory,
                         val_images_filenames, images_directory, val_transform)
test = TrafficSignDataset(annotations_directory,
                          test_images_filenames, images_directory)

train_dataloader = DataLoader(
    train, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, collate_fn=lambda batch: tuple(zip(*batch)))
val_dataloader = DataLoader(val, batch_size=batch_size,
                            shuffle=False, num_workers=num_workers, drop_last=False, collate_fn=lambda batch: tuple(zip(*batch)))
test_dataloader = DataLoader(
    test, batch_size=1, shuffle=False, num_workers=num_workers, drop_last=False)


Using cuda device


## Two-Stage

In [8]:
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)

# from torchmetrics import JaccardIndex
# metric = JaccardIndex(num_classes=num_classes)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (0): ConvNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): FrozenBatchNorm2d(16, eps=1e-05)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
            (2): ReLU(inplace=True)
          )
          (1): ConvNormActivation(
            (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
          )
        )
      )
      (2): InvertedResidual(
        (block): Seque

In [9]:
def train_one_epoch(dataloader, model, epoch, optimizer=None, is_train=True):
    if is_train:
        assert optimizer is not None, "When training, please provide an optimizer."

    num_batches = len(dataloader)

    if is_train:
        model.train()
    else:
        model.eval()

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(dataloader) - 1)

        lr_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=warmup_factor, total_iters=warmup_iters
        )

    total_loss = 0.0

    with torch.set_grad_enabled(is_train):
        for images, targets in tqdm(dataloader):

            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            # loss_value = losses_reduced.item()
            # losses_reduced = sum(loss for loss in loss_dict.values())

            if is_train:
                optimizer.zero_grad()
                losses.backward()
                optimizer.step()

                if lr_scheduler is not None:
                    lr_scheduler.step()

            # IMPORTANT: call .item() to obtain the value of the loss WITHOUT the computational graph attached
            total_loss += losses.item()
            # total_jaccard += metric(final_pred.cpu(), target.cpu())

        return total_loss / num_batches # , total_jaccard / num_batches


def evaluate(dataloader, model, metric):
    model.eval()
    
    with torch.set_grad_enabled(False):
        for images, targets in tqdm(dataloader):
            images = list(image.to(device) for image in images)

            outputs = model(images)
            outputs = [{k: v.to("cpu") for k, v in t.items()} for t in outputs]

            # print(outputs)
            res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
            # print(res)

            metric.update(outputs, targets)
        
    return metric

In [10]:
import numpy as np
from tqdm import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision

# torch.cuda.empty_cache()

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

metric = MeanAveragePrecision(box_format="xyxy", iou_type="bboxes", class_metrics=True)

# let's train it for 10 epochs
num_epochs = 10


train_history = {'loss': []}
val_history = {'meanap': []}
best_val_loss = np.inf

# for epoch in range(num_epochs):
#     # train for one epoch, printing every 10 iterations
#     train_one_epoch()
#     train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
#     # evaluate on the test dataset
#     evaluate(model, data_loader_test, device=device)



print("Start training...")
for t in range(num_epochs):
  print(f"\nEpoch {t+1}")
  train_loss = train_one_epoch(train_dataloader, model, t, optimizer)
  print(f"Train loss: {train_loss:.3f}")
  # val_loss = train_one_epoch(val_dataloader, model, t, optimizer, is_train=False)
  meanap = evaluate(val_dataloader, model, metric)
  # print(f"Val loss: {val_loss:.3f}")
  print(meanap.compute())

  # save model when val loss improves
  # if val_loss < best_val_loss:
  #   best_val_loss = val_loss
  #   save_dict = {'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': t}
  #   torch.save(save_dict, 'best_model.pth')

  # # save latest model
  # save_dict = {'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': t}
  # torch.save(save_dict, 'latest_model.pth')

  # save training history for plotting purposes
  train_history["loss"].append(train_loss)
  # train_history["jaccard"].append(train_jaccard)

  val_history["meanap"].append(meanap)
  # val_history["jaccard"].append(val_jaccard)

Start training...

Epoch 1


100%|██████████| 15/15 [00:27<00:00,  1.83s/it]


Train loss: 0.967


 25%|██▌       | 1/4 [00:00<00:02,  1.13it/s]

[{'boxes': tensor([[1.2084e+02, 5.7886e+00, 1.8824e+02, 6.9866e+01],
        [3.0391e+01, 6.0489e-01, 1.1772e+02, 6.1355e+01],
        [1.2703e+02, 1.4535e+01, 1.9007e+02, 7.1041e+01],
        [1.4467e+02, 1.3775e+01, 1.9795e+02, 6.1324e+01],
        [1.2838e+02, 2.6156e+01, 2.0394e+02, 7.6154e+01],
        [3.0502e+01, 8.0885e-01, 1.1031e+02, 6.4429e+01],
        [1.1922e+02, 1.8288e+01, 1.9274e+02, 7.0183e+01],
        [1.1301e+02, 1.4495e+01, 1.7341e+02, 5.8404e+01],
        [1.1503e+02, 1.9160e+01, 1.6998e+02, 6.3331e+01],
        [4.0930e+01, 0.0000e+00, 1.1595e+02, 6.0292e+01],
        [5.9135e+01, 7.4180e+00, 1.1239e+02, 5.5002e+01],
        [1.5129e+02, 2.0203e+01, 1.8916e+02, 6.2672e+01],
        [1.0087e+02, 6.1126e+00, 1.8362e+02, 5.7246e+01],
        [1.5044e+02, 1.2847e+01, 1.9852e+02, 6.3205e+01],
        [7.3520e+01, 3.7870e-01, 1.3202e+02, 5.9090e+01],
        [1.3516e+02, 0.0000e+00, 1.9467e+02, 5.5940e+01],
        [9.9841e+01, 7.2039e+00, 1.7962e+02, 5.5257e+01],
   

 50%|█████     | 2/4 [00:01<00:01,  1.14it/s]

[{'boxes': tensor([[ 63.1070,  37.7911, 127.0065,  82.6882],
        [ 63.2043, 102.3269, 126.7491, 142.2413],
        [ 69.9650,  63.7450, 122.7064, 149.3085],
        [ 68.2629,  85.1101, 126.5778, 110.1213],
        [ 75.1267,  43.9812, 119.4331,  85.5247],
        [ 73.1980,  98.5949, 144.0258, 155.2216],
        [ 79.5758,  96.7467, 121.7333, 144.1613],
        [ 64.6887,  35.6690, 122.5575, 121.3930],
        [ 71.4333,  85.5037, 120.2066, 107.3942],
        [ 73.8898,  60.1352, 121.7703, 148.5225],
        [ 71.7891, 104.3621, 124.5033, 143.7283],
        [ 64.5486, 104.8464, 112.0620, 142.7603],
        [ 62.9272,  89.0628, 115.9072, 167.2200],
        [ 61.4384,  77.5833, 132.9722, 121.3741],
        [ 70.7009, 115.7647, 127.7503, 148.5938],
        [ 90.6482, 105.5174, 131.6403, 143.1705],
        [ 72.6496,  61.1763, 122.3545, 149.5468],
        [ 65.3074,  38.4459, 107.3086,  76.6892],
        [ 65.9496,  43.3327, 120.8026,  83.5909],
        [ 70.7518,  84.6028, 124.0452, 

 75%|███████▌  | 3/4 [00:02<00:00,  1.14it/s]

[{'boxes': tensor([[7.3540e+01, 4.8611e+01, 1.3510e+02, 9.7627e+01],
        [5.2873e+01, 1.7247e+02, 6.9070e+01, 1.8766e+02],
        [6.8411e+01, 5.1319e+00, 1.4530e+02, 6.3198e+01],
        [8.3509e+01, 5.3346e+01, 1.5177e+02, 1.1023e+02],
        [8.5114e+01, 5.5466e+01, 1.3283e+02, 1.0309e+02],
        [7.9673e+01, 5.8236e+01, 1.3393e+02, 9.9743e+01],
        [7.7030e+01, 1.4972e-01, 1.3799e+02, 5.5242e+01],
        [8.8171e+01, 3.5147e+01, 1.4405e+02, 9.8926e+01],
        [9.8645e+01, 5.9773e+01, 1.3438e+02, 9.9869e+01],
        [7.6296e+01, 5.1468e+01, 1.3626e+02, 8.0969e+01],
        [6.4283e+01, 5.1969e+01, 1.8704e+02, 9.8166e+01],
        [6.9647e+01, 3.8759e+01, 1.2802e+02, 1.3374e+02],
        [7.6096e+01, 1.7991e+01, 1.4041e+02, 5.1640e+01],
        [9.8345e+01, 7.3292e+00, 1.3638e+02, 6.0120e+01],
        [6.6835e+01, 4.6812e+01, 1.4521e+02, 1.0638e+02],
        [6.9771e+01, 4.6847e+01, 1.1878e+02, 9.2151e+01],
        [7.2477e+01, 1.1087e+01, 1.3201e+02, 4.8268e+01],
   

100%|██████████| 4/4 [00:03<00:00,  1.20it/s]

[{'boxes': tensor([[ 57.9055,  49.8188, 101.2020,  82.5544],
        [ 29.2591,  83.7325, 102.7716, 117.6585],
        [ 53.0593,  38.1622, 109.5208,  92.7457],
        [ 49.5264,  53.3285,  94.6151,  85.3993],
        [ 48.2401,  53.8614,  90.1943,  87.0870],
        [ 32.5155,  54.6302,  92.8224, 144.2475],
        [ 63.0653,  58.4059,  93.9594,  81.4502],
        [ 33.0971,  84.7362,  95.8438, 114.9481],
        [ 63.3460,  45.0805, 102.8838,  85.2089],
        [ 52.2065,  79.5583,  83.6462, 124.4172],
        [ 33.4248,  85.4029,  93.6181, 113.7252],
        [ 56.2311,  46.8694,  95.3930,  83.2898],
        [ 47.6632,  41.1821,  99.2646,  73.9806],
        [ 47.8165,  53.7440,  86.6030,  88.4052],
        [ 73.1492,  45.3196, 102.0222,  84.1886],
        [ 29.3843,  67.8360,  90.5841, 121.6678],
        [ 24.8406,  68.0725,  89.0204, 122.4728],
        [ 58.0802,  31.0528, 109.3747,  93.5363],
        [ 40.2263,  31.8408,  95.4991,  85.3131],
        [ 75.2074,  59.7873,  97.9306, 




{'map': tensor(0.0664), 'map_50': tensor(0.1911), 'map_75': tensor(0.0100), 'map_small': tensor(0.0636), 'map_medium': tensor(0.1010), 'map_large': tensor(0.2188), 'mar_1': tensor(0.1307), 'mar_10': tensor(0.2176), 'mar_100': tensor(0.2201), 'mar_small': tensor(0.1839), 'mar_medium': tensor(0.3373), 'mar_large': tensor(0.4667), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}

Epoch 2


100%|██████████| 15/15 [01:17<00:00,  5.17s/it]


Train loss: 0.618


 25%|██▌       | 1/4 [00:02<00:07,  2.58s/it]

[{'boxes': tensor([[119.7382,   9.0003, 194.2442,  71.6912],
        [ 27.4344,   0.6261, 119.9475,  60.1930],
        [118.1779,  13.0303, 185.3979,  64.2344],
        [121.9613,  21.8462, 189.8425,  63.7099],
        [ 43.0590,   2.3654, 115.0576,  56.3102]]), 'labels': tensor([2, 2, 1, 3, 3]), 'scores': tensor([0.7858, 0.1652, 0.1598, 0.0932, 0.0563])}, {'boxes': tensor([[ 34.9348, 117.5936,  83.5529, 162.0356],
        [ 31.9492,  21.6176,  83.9716,  60.7709],
        [ 38.5886, 120.2744,  80.4910, 159.6841],
        [ 36.1251,  22.2829,  83.3105,  59.6790],
        [ 35.2789,  21.6845,  81.0390,  59.5712],
        [ 29.2383,   4.9782,  82.5111,  82.1525],
        [ 33.5445,  78.8715,  85.6814, 121.2623]]), 'labels': tensor([2, 2, 1, 3, 1, 2, 2]), 'scores': tensor([0.8597, 0.2121, 0.0876, 0.0789, 0.0691, 0.0662, 0.0502])}, {'boxes': tensor([[ 65.7411,  26.2159, 123.5047,  78.4646],
        [ 66.7520,  26.6020, 119.5863,  71.1518]]), 'labels': tensor([2, 1]), 'scores': tensor([0.938

 50%|█████     | 2/4 [00:04<00:04,  2.34s/it]

[{'boxes': tensor([[ 68.2325,  41.9164, 123.5340,  85.8339],
        [ 69.0391,  99.6276, 125.3406, 151.2111],
        [ 71.3050, 104.5629, 123.7585, 144.1860],
        [ 65.6422,  44.6927, 123.4265,  77.4569],
        [ 73.8433,  87.3626, 123.9244, 105.1437],
        [ 71.4671, 107.2297, 123.1120, 141.2683],
        [ 69.5894,  44.9258, 118.6642,  83.6055],
        [ 65.6117,  79.9686, 128.5051, 148.3123],
        [ 68.0719,  79.1405, 132.8284, 148.2728]]), 'labels': tensor([2, 2, 1, 1, 2, 3, 3, 1, 3]), 'scores': tensor([0.8182, 0.8121, 0.1192, 0.1040, 0.0892, 0.0839, 0.0667, 0.0504, 0.0501])}, {'boxes': tensor([[ 98.1119,  80.7974, 171.8329, 146.3749],
        [101.2528,  18.8477, 179.8624,  81.9350],
        [144.5663, 188.8035, 160.5348, 202.0574],
        [103.5782,  23.6836, 166.5419,  80.4187],
        [101.3667,  79.3256, 165.2413, 137.1272],
        [101.6045,  19.9425, 174.8434,  80.7932],
        [ 92.1913,  33.0358, 176.4878, 161.0912],
        [100.7866,  83.5006, 169.5162

 75%|███████▌  | 3/4 [00:08<00:02,  2.77s/it]

[{'boxes': tensor([[ 76.4818,  50.8997, 137.4452, 107.7163],
        [ 53.2901, 173.1220,  67.8791, 187.0848],
        [ 81.1562,  54.2001, 134.3286, 100.7406],
        [ 73.7627,   1.8397, 136.6783,  54.7027],
        [ 71.2067,  55.0093, 145.4141, 102.0804],
        [ 53.4308, 173.3190,  68.1351, 187.0508]]), 'labels': tensor([2, 2, 1, 2, 3, 1]), 'scores': tensor([0.8000, 0.7050, 0.1325, 0.0863, 0.0775, 0.0594])}, {'boxes': tensor([[ 79.1138, 102.6196, 111.6442, 131.3548],
        [ 82.1317,  78.1401, 113.4586, 105.3971],
        [116.6759, 142.3355, 129.5679, 152.6234],
        [ 76.7242,  66.7729, 115.9805, 148.6560],
        [ 85.0332,  80.7260, 110.6551, 102.2896]]), 'labels': tensor([2, 2, 2, 2, 1]), 'scores': tensor([0.8474, 0.7775, 0.0894, 0.0636, 0.0605])}, {'boxes': tensor([[ 68.7755,  95.2544, 116.0602, 138.8196],
        [ 70.4300, 100.1002, 113.9397, 135.8154],
        [ 71.0124, 103.0324, 114.8572, 132.0313]]), 'labels': tensor([2, 1, 3]), 'scores': tensor([0.8463, 0.128

100%|██████████| 4/4 [00:10<00:00,  2.68s/it]

[{'boxes': tensor([[ 56.3180,  50.2136,  96.9569,  86.4977],
        [ 35.0709,  83.9750,  96.9221, 116.9261],
        [ 56.8739,  49.5640,  96.7234,  85.5962],
        [ 56.2669,  48.8053,  96.3284,  83.7980],
        [ 35.3390,  85.0150,  93.8242, 113.3815],
        [ 37.3663,  84.1211,  92.9361, 114.8054]]), 'labels': tensor([2, 2, 1, 3, 3, 1]), 'scores': tensor([0.7569, 0.1457, 0.1029, 0.0820, 0.0638, 0.0502])}, {'boxes': tensor([[ 37.6031,  39.1240, 132.0833, 112.1698],
        [ 44.2056, 107.0210, 122.3886, 192.9853],
        [ 39.7021, 111.6736, 126.2135, 188.0807],
        [ 41.4422, 109.7929, 122.0999, 181.8823],
        [ 40.3905,  39.3446, 123.7822, 111.9019],
        [ 17.1423,  88.5281, 142.2876, 210.7307],
        [ 40.9525,  39.7005, 132.9533, 100.7734],
        [ 50.2044, 126.0297, 113.1285, 177.7313],
        [ 56.6689, 123.7082, 111.3975, 175.9556]]), 'labels': tensor([2, 2, 3, 1, 1, 2, 3, 2, 1]), 'scores': tensor([0.7371, 0.4375, 0.1739, 0.1672, 0.1397, 0.1005, 0.096




{'map': tensor(0.0924), 'map_50': tensor(0.2080), 'map_75': tensor(0.0421), 'map_small': tensor(0.0872), 'map_medium': tensor(0.1262), 'map_large': tensor(0.2169), 'mar_1': tensor(0.1857), 'mar_10': tensor(0.2597), 'mar_100': tensor(0.2609), 'mar_small': tensor(0.2286), 'mar_medium': tensor(0.3532), 'mar_large': tensor(0.4722), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}

Epoch 3


100%|██████████| 15/15 [01:24<00:00,  5.63s/it]


Train loss: 0.564


 25%|██▌       | 1/4 [01:02<03:08, 62.80s/it]

[{'boxes': tensor([[117.4489,  15.5738, 194.8783,  67.4163],
        [117.9576,  14.4394, 187.6552,  66.7890],
        [117.9693,  14.4276, 190.5415,  68.1529]]), 'labels': tensor([2, 1, 3]), 'scores': tensor([0.8081, 0.1261, 0.0760])}, {'boxes': tensor([[ 36.6328, 120.6451,  82.3830, 160.8942],
        [ 29.0099, 120.0821,  87.7598, 167.5217]]), 'labels': tensor([2, 1]), 'scores': tensor([0.8721, 0.0514])}, {'boxes': tensor([[ 62.4380,  30.4798, 124.2950,  71.6969],
        [ 58.1270,  29.1619, 122.1848,  78.0899]]), 'labels': tensor([2, 1]), 'scores': tensor([0.9544, 0.0501])}, {'boxes': tensor([[ 95.1732, 113.9960, 142.1155, 152.0798],
        [ 69.0276, 157.7198,  89.0517, 176.3252],
        [ 93.3583, 107.2108, 146.1511, 153.0900],
        [ 93.6603, 111.1301, 136.2751, 151.5377]]), 'labels': tensor([3, 2, 2, 1]), 'scores': tensor([0.2823, 0.2409, 0.1797, 0.1514])}, {'boxes': tensor([[103.1873,  64.7886, 157.8079, 109.8693],
        [102.2364, 109.2043, 155.5186, 152.0775],
      

 50%|█████     | 2/4 [01:46<01:43, 51.51s/it]

[{'boxes': tensor([[ 71.5211, 102.8294, 124.2304, 146.3004],
        [ 66.8549,  45.0655, 122.4076,  85.4283],
        [ 69.0748, 104.1112, 121.6414, 143.2608],
        [ 68.9270,  45.5031, 120.1625,  84.6627],
        [ 71.1585, 105.0290, 124.4092, 142.5744],
        [ 65.4930,  46.0360, 123.1241,  78.3038]]), 'labels': tensor([2, 2, 1, 3, 3, 1]), 'scores': tensor([0.8438, 0.7554, 0.0852, 0.0809, 0.0741, 0.0727])}, {'boxes': tensor([[ 98.9211,  82.3579, 169.2017, 138.0105],
        [102.4492,  25.3632, 172.1736,  80.3840],
        [144.6516, 189.0860, 160.0491, 201.9292],
        [100.2967,  82.5915, 170.0312, 134.7240],
        [102.5279,  25.7261, 168.0107,  79.6585],
        [102.4884,  23.9226, 173.9432,  78.5877],
        [104.2455,  86.9808, 172.3434, 134.2943],
        [144.3289, 188.6392, 160.7271, 202.1149],
        [144.3035, 189.2633, 160.4054, 201.9285]]), 'labels': tensor([2, 2, 2, 1, 1, 3, 3, 3, 1]), 'scores': tensor([0.8026, 0.5318, 0.3582, 0.1797, 0.1559, 0.1523, 0.106

 75%|███████▌  | 3/4 [01:55<00:32, 32.32s/it]

[{'boxes': tensor([[ 75.8293,  55.6901, 135.7912, 100.9667],
        [ 53.3596, 173.1680,  67.3543, 187.0435],
        [ 75.9458,  53.5332, 134.6452,  98.3761],
        [ 75.9107,  55.7821, 135.8678, 100.3620],
        [ 53.0001, 173.5527,  68.4938, 186.7958],
        [ 52.0939, 173.3099,  66.3393, 187.5287]]), 'labels': tensor([2, 2, 1, 3, 3, 1]), 'scores': tensor([0.8322, 0.5425, 0.1231, 0.1060, 0.0732, 0.0653])}, {'boxes': tensor([[ 81.5297, 103.5665, 110.6192, 130.2337],
        [ 83.0161,  78.7584, 113.1007, 104.5540],
        [116.7298, 142.0748, 129.2275, 152.5387]]), 'labels': tensor([2, 2, 2]), 'scores': tensor([0.6895, 0.3987, 0.1300])}, {'boxes': tensor([[ 69.3601,  98.4286, 113.8259, 137.3248],
        [ 67.7224,  99.2146, 115.5463, 136.7607],
        [ 67.1983, 100.6716, 114.5547, 136.9014]]), 'labels': tensor([2, 3, 1]), 'scores': tensor([0.7663, 0.0967, 0.0847])}, {'boxes': tensor([[ 41.1358,  58.0124, 136.0508, 164.0060],
        [ 47.4868,  77.5061, 139.4947, 165.9083]

100%|██████████| 4/4 [02:15<00:00, 33.94s/it]

[{'boxes': tensor([[ 55.9073,  50.4160,  96.0456,  86.4144],
        [ 55.6023,  55.0621,  97.3359,  83.3473],
        [ 56.8281,  55.2020,  94.9388,  83.4589],
        [ 51.6270,  43.6359, 103.2116,  90.0712]]), 'labels': tensor([2, 3, 1, 3]), 'scores': tensor([0.6582, 0.1490, 0.0924, 0.0647])}, {'boxes': tensor([[ 39.9795,  39.9993, 123.2418, 109.5949],
        [ 39.8414, 113.3260, 125.5741, 179.3071],
        [ 45.7382, 108.2910, 124.0969, 183.4011],
        [ 41.7740, 122.5984, 123.7413, 187.1884],
        [ 36.0789,  43.8187, 135.4319, 108.6702],
        [ 35.5995,  41.5117, 148.7642, 107.3231],
        [ 33.4778,  89.0732, 132.4067, 207.7948]]), 'labels': tensor([2, 3, 2, 1, 1, 3, 3]), 'scores': tensor([0.7587, 0.3037, 0.2950, 0.2251, 0.1606, 0.0996, 0.0733])}, {'boxes': tensor([[ 90.3332,  56.1623, 106.0255,  70.1477]]), 'labels': tensor([2]), 'scores': tensor([0.7980])}, {'boxes': tensor([[ 70.2082,  61.9220, 152.7478, 134.5577],
        [ 68.0554, 166.6559, 157.3066, 224.0000]




{'map': tensor(0.1419), 'map_50': tensor(0.2802), 'map_75': tensor(0.1096), 'map_small': tensor(0.1270), 'map_medium': tensor(0.2064), 'map_large': tensor(0.2269), 'mar_1': tensor(0.2012), 'mar_10': tensor(0.2863), 'mar_100': tensor(0.2872), 'mar_small': tensor(0.2513), 'mar_medium': tensor(0.3907), 'mar_large': tensor(0.4667), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}

Epoch 4


  0%|          | 0/15 [00:00<?, ?it/s]

## One-Stage

In [17]:
# Might need to be run on linux/WSL
!python3 yolov5/train.py --batch 10 --epochs 30 --data trafficsigns.yaml
# python yolov5/train.py --batch -1 --epochs 3 --data trafficsigns.yaml --workers 0

SyntaxError: invalid syntax (3741266108.py, line 1)

In [None]:
# Detection
!python3 yolov5/detect.py --weights yolov5/runs/train/exp22/weights/best.pt --img 640 --conf 0.25 --source dataset/images/train/road2.png

In [8]:
# yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', channels=3, classes=4, autoshape=False)
# yolo_model.to(device)

# yolo = {
#     "model": yolo_model,
#     "name": 'yolov5s',
#     "num_epochs": 10,
#     "loss": nn.CrossEntropyLoss(),  # already includes the Softmax activation
#     "optimizer": torch.optim.SGD(yolo_model.parameters(), lr=1e-3)
# }

# print(yolo_model)

    exitcode = _main(fd, parent_sentinel)
  File "c:\Program Files\Python310\lib\multiprocessing\spawn.py", line 125, in _main
    prepare(preparation_data)
  File "c:\Program Files\Python310\lib\multiprocessing\spawn.py", line 236, in prepare
    _fixup_main_from_path(data['init_main_from_path'])
  File "c:\Program Files\Python310\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
    main_content = runpy.run_path(main_path,
  File "c:\Program Files\Python310\lib\runpy.py", line 269, in run_path
    return _run_module_code(code, init_globals, run_name,
  File "c:\Program Files\Python310\lib\runpy.py", line 96, in _run_module_code
    _run_code(code, mod_globals, init_globals,
  File "c:\Program Files\Python310\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "c:\Users\Ricardo\Desktop\VC2022\Part2\yolov5\train.py", line 26, in <module>
    import torch
  File "C:\Users\Ricardo\AppData\Roaming\Python\Python310\site-packages\torch\__init__.py", line 