

This file shows the recipe of training YoloV3 with Midas Backbone



## Mounting google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Cloning Midas repository 

In [None]:
! git clone https://github.com/hs0805/MiDaS.git

Cloning into 'MiDaS'...
remote: Enumerating objects: 250, done.[K
remote: Counting objects: 100% (250/250), done.[K
remote: Compressing objects: 100% (173/173), done.[K
remote: Total 394 (delta 58), reused 201 (delta 37), pack-reused 144[K
Receiving objects: 100% (394/394), 231.02 KiB | 460.00 KiB/s, done.
Resolving deltas: 100% (139/139), done.


In [None]:
cd /content/MiDaS/

/content/MiDaS


In [None]:
# Downloading the pretrained weights for midas
!wget https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.pt

--2020-12-06 16:56:20--  https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.pt
Resolving github.com (github.com)... 13.114.40.48
Connecting to github.com (github.com)|13.114.40.48|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/193518067/f6db3a00-236a-11eb-9db9-6689df01a8ba?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20201206%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20201206T165621Z&X-Amz-Expires=300&X-Amz-Signature=75e013143c884360055577d437e1253cf9d62e23b4016e8bb8ac2889cb070a90&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=193518067&response-content-disposition=attachment%3B%20filename%3Dmodel-f6b98070.pt&response-content-type=application%2Foctet-stream [following]
--2020-12-06 16:56:21--  https://github-production-release-asset-2e65be.s3.amazonaws.com/193518067/f6db3a00-236a-11eb-9db9-6689df01a8ba?X-Amz-Algorithm=AWS4-HMAC-SHA256&

In [None]:
import os
import glob
import torch
import utils
import cv2

from torchvision.transforms import Compose
import torchvision.transforms as transforms
from midas.midas_net import MidasNet
from midas.transforms import Resize, NormalizeImage, PrepareForNet


In [None]:
from torchsummary import summary

In [None]:
input_path = "ConstructionPPE/input_images"
output_path = "output"
# MODEL_PATH = "model.pt"
model_path = None

# Loading Midas model from the midas directory as backbone


In [None]:
model = MidasNet(model_path, non_negative=True).to('cuda' if torch.cuda.is_available() else 'cpu')

Loading weights:  None


Downloading: "https://github.com/facebookresearch/WSL-Images/archive/master.zip" to /root/.cache/torch/hub/master.zip
Downloading: "https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth" to /root/.cache/torch/hub/checkpoints/ig_resnext101_32x8-c38310e5.pth


HBox(children=(FloatProgress(value=0.0, max=356056638.0), HTML(value='')))




In [None]:
# print(model.pretrained.layer1, (3, 416, 416))

In [None]:
# print(summary(model.pretrained.layer1, (3, 416, 416)))

In [None]:
# print(summary(model.pretrained.layer2, (256, 104, 104)))

In [None]:
# print(summary(model.pretrained.layer3, (512, 52, 52)))

In [None]:
# print(summary(model.pretrained.layer4, (1024, 26, 26)))

In [None]:
# print(model.scratch.layer1_rn)

In [None]:
# print(model.scratch.layer2_rn)

In [None]:
# print(model.scratch.layer3_rn)

In [None]:
# print(model.scratch.layer4_rn)

# Yolo layers on the top of MiDaSNet

In [None]:
import torch
import torch.nn as nn
from collections import OrderedDict

class YOLO(nn.Module):
    def __init__(self, config, backbone, neck, is_training=True):
        super(YOLO, self).__init__()
        self.config = config
        self.training = is_training

        self.backbone = backbone
        # self.neck = neck
        _out_filters = [256, 512, 1024, 2048]
        # _out_filters = [128, 256, 512, 1024]
        #  embedding0
        final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
        self.embedding0 = self._make_embedding([512, 2048], _out_filters[-1], final_out_filter0)
        #  embedding1
        final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
        self.embedding1_cbl = self._make_cbl(512, 256, 1)
        self.embedding1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.embedding1 = self._make_embedding([256, 1024], _out_filters[-2] + 256, final_out_filter1)
        #  embedding2
        final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
        self.embedding2_cbl = self._make_cbl(256, 128, 1)
        self.embedding2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.embedding2 = self._make_embedding([128, 512], _out_filters[-3] + 128, final_out_filter2)

    def _make_cbl(self, _in, _out, ks):
        pad = (ks - 1) // 2 if ks else 0
        return nn.Sequential(OrderedDict([
            ("conv", nn.Conv2d(_in, _out, kernel_size=ks, stride=1, padding=pad, bias=False)),
            ("bn", nn.BatchNorm2d(_out)),
            ("relu", nn.LeakyReLU(0.1)),
        ]))

    def _make_embedding(self, filters_list, in_filters, out_filter):
        m = nn.ModuleList([
            self._make_cbl(in_filters, filters_list[0], 1),
            self._make_cbl(filters_list[0], filters_list[1], 3),
            self._make_cbl(filters_list[1], filters_list[0], 1),
            self._make_cbl(filters_list[0], filters_list[1], 3),
            self._make_cbl(filters_list[1], filters_list[0], 1),
            self._make_cbl(filters_list[0], filters_list[1], 3)])
        m.add_module("conv_out", nn.Conv2d(filters_list[1], out_filter, kernel_size=1,
                                           stride=1, padding=0, bias=True))
        return m

    def forward(self, x):

        layer_1 = self.backbone.layer1(x)
        x2 = self.backbone.layer2(layer_1)
        x1 = self.backbone.layer3(x2)
        x0 = self.backbone.layer4(x1)
        # print(x2.size() , x1.size(), x0.size()))
        
        def _branch(_embedding, _in):
            for i, e in enumerate(_embedding):
                _in = e(_in)
                if i == 4:
                    out_branch = _in
            return _in, out_branch
        #  backbone
        # x2, x1, x0 = self.backbone(x)

        # x2, x1, x0 = self.backbone.layer2, self.backbone.layer3, self.backbone.layer4
        
        #  yolo branch 0
        out0, out0_branch = _branch(self.embedding0, x0)
        #  yolo branch 1
        x1_in = self.embedding1_cbl(out0_branch)
        x1_in = self.embedding1_upsample(x1_in)
        x1_in = torch.cat([x1_in, x1], 1)
        out1, out1_branch = _branch(self.embedding1, x1_in)
        #  yolo branch 2
        x2_in = self.embedding2_cbl(out1_branch)
        x2_in = self.embedding2_upsample(x2_in)
        x2_in = torch.cat([x2_in, x2], 1)
        out2, out2_branch = _branch(self.embedding2, x2_in)
        return out0, out1, out2

In [None]:
# _out_filters = [256, 512, 1024, 2048]

# Config file for hyperparameters

In [None]:
config = {"yolo": {
    "anchors": [[[116, 90], [156, 198], [373, 326]],
                [[30, 61], [62, 45], [59, 119]],
                [[10, 13], [16, 30], [33, 23]]],
    "classes": 4,
}
,
"lr": {
        "backbone_lr": 1e-06,
        "other_lr": 1e-05,
        "freeze_backbone": True,   #  freeze backbone weigths to finetune
        "decay_gamma": 0.1,
        "decay_step": 20,           #  decay lr in every ? epochs
    },
    "optimizer": {
        "type": "sgd",
        "weight_decay": 4e-05,
    },
    "batch_size": 4,
    "train_path": "../data/coco/trainvalno5k.txt",
    "epochs": 100,
    "img_h": 416,
    "img_w": 416,
    "parallels": [0],                         #  config GPU device
    "working_dir": "YOUR_WORKING_DIR",              #  replace with your working dir
    "pretrain_snapshot": "",                        #  load checkpoint
    "evaluate_type": "", 
    "try": 0,
    "export_onnx": False,

}

# Loading the Yolo detector which is on the top of MiDaS

In [None]:
detector = YOLO(config, model.pretrained, model.scratch, False).to('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# print(detector.modules)
! pwd

/content/MiDaS


In [None]:
# print(summary(detector, (3, 416, 416)))

In [None]:
! gdown --id 1SnFAlSvsx37J7MDNs3WWLgeKY0iknikP

Downloading...
From: https://drive.google.com/uc?id=1SnFAlSvsx37J7MDNs3WWLgeKY0iknikP
To: /content/MiDaS/official_yolov3_weights_pytorch.pth
248MB [00:02, 94.4MB/s]


In [None]:
# print(detector)

In [None]:
# for name, param in detector.embedding0.named_parameters():
#     if param.requires_grad:
#         print (name, param.data)

In [None]:
# detector = nn.DataParallel(detector)

# Copying necessary models and files for the training of Yolo layer

In [None]:
!cp /content/drive/MyDrive/TrainingYolo/saved_model.pth /content/MiDaS
!cp /content/drive/MyDrive/TrainingYolo/classes.txt /content/
!cp /content/drive/MyDrive/TrainingYolo/train_file.txt /content/
state_dict = torch.load('/content/MiDaS/saved_model.pth', map_location = torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

In [None]:
# loading the previously saved model
detector.load_state_dict(state_dict, strict = False)

<All keys matched successfully>

In [None]:
# ! pip3 install torchviz

In [None]:
% cd /content/MiDaS/

/content/MiDaS


In [None]:
! git clone https://github.com/BobLiu20/YOLOv3_PyTorch.git

fatal: destination path 'YOLOv3_PyTorch' already exists and is not an empty directory.


In [None]:
% cd YOLOv3_PyTorch/

/content/MiDaS/YOLOv3_PyTorch


In [None]:
from nets.yolo_loss import YOLOLoss
from common.coco_dataset import COCODataset

In [None]:
yolo_losses = []
for i in range(3):
    yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
                                config["yolo"]["classes"], (416, 416)))

In [None]:
% cd /content/
! git clone https://github.com/theschoolofai/YoloV3.git
% cd /content/MiDaS/YOLOv3_PyTorch/

/content
fatal: destination path 'YoloV3' already exists and is not an empty directory.
/content/MiDaS/YOLOv3_PyTorch


In [None]:
! cp -r /content/YoloV3/data/smalcoco/ /content/MiDaS/YOLOv3_PyTorch/data/

In [None]:
% cd /content/MiDaS/YOLOv3_PyTorch/

/content/MiDaS/YOLOv3_PyTorch


In [None]:
# DataLoader
# MiDaS/YOLOv3_PyTorch/data/custom/
dataloader = torch.utils.data.DataLoader(COCODataset("/content/train_file.txt",
                                                        (416, 416),
                                                        is_training=True),
                                            batch_size=4,
                                            shuffle=True, num_workers=16, pin_memory=True)

In [None]:
# ! cp -r /content/drive/MyDrive/yolo_data /content/MiDaS/YOLOv3_PyTorch/data/

In [None]:
# dataloader = torch.utils.data.DataLoader(LoadImagesAndLabels("./data/yolo_data/customdata/train.txt"))

In [None]:
sample = next(iter(dataloader))

In [None]:
 #print(sample[1].shape)
# print(sample[1].shape)

In [None]:
import logging
import torch.optim as optim

In [None]:
import time
def _get_optimizer(config, net):
    optimizer = None

    # Assign different lr for each layer
    params = None
    base_params = list(
        map(id, net.backbone.parameters())
    )
    logits_params = filter(lambda p: id(p) not in base_params, net.parameters())

    if not config["lr"]["freeze_backbone"]:
        params = [
            {"params": logits_params, "lr": config["lr"]["other_lr"]},
            {"params": net.backbone.parameters(), "lr": config["lr"]["backbone_lr"]},
        ]
    else:
        logging.info("freeze backbone's parameters.")
        for p in net.backbone.layer1.parameters():
            p.requires_grad = False
        for p in net.backbone.layer2.parameters():
            p.requires_grad = False
        params = [
            {"params": logits_params, "lr": config["lr"]["other_lr"]},
        ]

    # Initialize optimizer class
    if config["optimizer"]["type"] == "adam":
        optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"])
    elif config["optimizer"]["type"] == "amsgrad":
        optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"],
                               amsgrad=True)
    elif config["optimizer"]["type"] == "rmsprop":
        optimizer = optim.RMSprop(params, weight_decay=config["optimizer"]["weight_decay"])
    else:
        # Default to sgd
        logging.info("Using SGD optimizer.")
        optimizer = optim.SGD(params, momentum=0.9,
                              weight_decay=config["optimizer"]["weight_decay"],
                              nesterov=(config["optimizer"]["type"] == "nesterov"))

    return optimizer


In [None]:
optimizer = _get_optimizer(config, detector)
lr_scheduler = optim.lr_scheduler.StepLR(
    optimizer,
    step_size=config["lr"]["decay_step"],
    gamma=config["lr"]["decay_gamma"])

In [None]:
def _save_checkpoint(state_dict):
    checkpoint_path = "/content/MiDaS/saved_model.pth"
    torch.save(state_dict, checkpoint_path)

In [None]:
for epoch in range(80):
    print('epoch', epoch)
    total_loss = 0
    for step, samples in enumerate(dataloader):
        # sample = samples[0][0]
        # images, labels = samples[0][0], samples[1][0]
        # images, labels = samples[0], samples[1]
        images, labels = samples['image'], samples['label']
        # print(labels)
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        images, lables = images.to(device, dtype = torch.float), labels.to(device, dtype = torch.float)
        start_time = time.time()
        # config["global_step"] += 1

        # Forward and backward
        optimizer.zero_grad()
        outputs = detector(images)
        # print(len(images))
        # print(images.shape)
        # print(len(outputs))
        # print(outputs[0].shape)
        # print(samples['image_path'])
        losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"]
        losses = []
        for _ in range(len(losses_name)):
            losses.append([])
        for i in range(3):
            _loss_item = yolo_losses[i](outputs[i], labels)
            for j, l in enumerate(_loss_item):
                losses[j].append(l)
        losses = [sum(l) for l in losses]
        loss = losses[0]
        loss.backward()
        # print(loss)
        total_loss += loss
        optimizer.step()
        # print(loss)
        if step > 0 and step % 10 == 0:
            _loss = loss.item()
            duration = float(time.time() - start_time)
            example_per_second = config["batch_size"] / duration
            lr = optimizer.param_groups[0]['lr']
            print(epoch, step, _loss, example_per_second, lr)
            logging.info(
                "epoch [%.3d] iter = %d loss = %.2f example/sec = %.3f lr = %.5f "%
                (epoch, step, _loss, example_per_second, lr)
            )
            # config["tensorboard_writer"].add_scalar("lr",
            #                                         lr,
            #                                         config["global_step"])
            # config["tensorboard_writer"].add_scalar("example/sec",
            #                                         example_per_second,
            #                                         config["global_step"])
            for i, name in enumerate(losses_name):
                value = _loss if i == 0 else losses[i]
                # config["tensorboard_writer"].add_scalar(name,
                #                                         value,
                #                                         config["global_step"])

        if step > 0 and step % 860 == 0:
            detector.train(False)
            _save_checkpoint(detector.state_dict())
            detector.train(True)

    lr_scheduler.step()
    print(total_loss/4)
detector.train(False)
_save_checkpoint(detector.state_dict())
detector.train(True)
logging.info("Bye~")

epoch 0
0 10 0.05949438735842705 8.996212169959033 1e-05
0 20 0.05867671221494675 9.503011957769719 1e-05
0 30 0.10129957646131516 9.1686491650117 1e-05
0 40 0.08739197999238968 8.671184562523516 1e-05
0 50 0.21570946276187897 5.783703246099145 1e-05
0 60 0.060023631900548935 8.0609195297369 1e-05
0 70 0.1729905605316162 8.983663404773697 1e-05
0 80 0.1837799847126007 9.052397691741149 1e-05
0 90 0.050009604543447495 8.717162201973482 1e-05
0 100 0.17276796698570251 8.607421567375024 1e-05
0 110 0.08713516592979431 7.64251858674673 1e-05
0 120 0.16882069408893585 8.922518099284005 1e-05
0 130 0.0669143795967102 9.400469877364586 1e-05
0 140 0.08970056474208832 8.787936215605209 1e-05
0 150 0.0590701587498188 8.002667362445044 1e-05
0 160 0.33522292971611023 9.58762705142404 1e-05
0 170 0.04849128797650337 9.706071293030833 1e-05
0 180 0.04558797553181648 9.274985944240234 1e-05
0 190 0.045447058975696564 9.505967996174318 1e-05
0 200 0.055329836905002594 9.101308733661172 1e-05
0 210 0

In [None]:
['./data/custom/images/image_3027.jpg', './data/custom/images/image_0522.jpg', './data/custom/images/image_1944.jpg', './data/custom/images/image_2820.jpg']

In [None]:
import numpy as np
import random

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.ticker import NullLocator

In [None]:
cmap = plt.get_cmap('tab20b')
colors = [cmap(i) for i in np.linspace(0, 1, 5)]
print(len(colors))

In [None]:
def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
    """
    Removes detections with lower object confidence score than 'conf_thres' and performs
    Non-Maximum Suppression to further filter detections.
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_score, class_pred)
    """

    # From (center x, center y, width, height) to (x1, y1, x2, y2)
    box_corner = prediction.new(prediction.shape)
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
    prediction[:, :, :4] = box_corner[:, :, :4]
    print('b_box is :', box_corner)
    output = [None for _ in range(len(prediction))]
    for image_i, image_pred in enumerate(prediction):
        # Filter out confidence scores below threshold
        conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
        image_pred = image_pred[conf_mask]
        # If none are remaining => process next image
        if not image_pred.size(0):
            continue
        # Get score and class with highest confidence
        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1,  keepdim=True)
        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
        # Iterate through all predicted classes
        unique_labels = detections[:, -1].cpu().unique()
        if prediction.is_cuda:
            unique_labels = unique_labels.cuda()
        for c in unique_labels:
            # Get the detections with the particular class
            detections_class = detections[detections[:, -1] == c]
            # Sort the detections by maximum objectness confidence
            _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
            detections_class = detections_class[conf_sort_index]
            # Perform non-maximum suppression
            max_detections = []
            while detections_class.size(0):
                # Get detection with highest confidence and save as max detection
                max_detections.append(detections_class[0].unsqueeze(0))
                # Stop if we're at the last detection
                if len(detections_class) == 1:
                    break
                # Get the IOUs for all boxes with lower confidence
                ious = bbox_iou(max_detections[-1], detections_class[1:])
                # Remove detections with IoU >= NMS threshold
                detections_class = detections_class[1:][ious < nms_thres]

            max_detections = torch.cat(max_detections).data
            # Add max detections to outputs
            output[image_i] = max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))

    return output

def bbox_iou(box1, box2, x1y1x2y2=True):
    """
    Returns the IoU of two bounding boxes
    """
    if not x1y1x2y2:
        # Transform from center and width to exact coordinates
        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
    else:
        # Get the coordinates of bounding boxes
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]

    # get the corrdinates of the intersection rectangle
    inter_rect_x1 =  torch.max(b1_x1, b2_x1)
    inter_rect_y1 =  torch.max(b1_y1, b2_y1)
    inter_rect_x2 =  torch.min(b1_x2, b2_x2)
    inter_rect_y2 =  torch.min(b1_y2, b2_y2)
    # Intersection area
    inter_area =    torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
                    torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
    # Union Area
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)

    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)

    return iou

In [None]:
# YOLO loss with 3 scales
yolo_losses = []
for i in range(3):
    yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
                                config["yolo"]["classes"], (config["img_w"], config["img_h"])))

# prepare images path

#images_name = os.listdir('/content/MiDaS/YOLOv3_PyTorch/data/yolo_data/customdata/images/')
images_name = os.listdir('/content/drive/MyDrive/Datasets/Construction PPE Kit/Annotated Images/images/')
images_path = [os.path.join('/content/drive/MyDrive/Datasets/Construction PPE Kit/Annotated Images/images/', name) for name in images_name]
if len(images_path) == 0:
    raise Exception("no image found in {}".format(config["images_path"]))

# Start inference
batch_size = config["batch_size"]
for step in range(0, len(images_path), batch_size):
    # preprocess
    images = []
    images_origin = []
    for path in images_path[step*batch_size: (step+1)*batch_size]:
        logging.info("processing: {}".format(path))
        image = cv2.imread(path, cv2.IMREAD_COLOR)
        if image is None:
            logging.error("read path error: {}. skip it.".format(path))
            continue
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        images_origin.append(image)  # keep for save result
        if len(images_origin) == 0:
            continue
        image = cv2.resize(image, (config["img_w"], config["img_h"]),
                            interpolation=cv2.INTER_LINEAR)
        image = image.astype(np.float32)
        image /= 255.0
        image = np.transpose(image, (2, 0, 1))
        image = image.astype(np.float32)
        images.append(image)
    images = np.asarray(images)
    images = torch.from_numpy(images).cuda()
    # inference
    with torch.no_grad():
        print(images.shape)
        outputs = detector(images)
        output_list = []
        for i in range(3):
            output_list.append(yolo_losses[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, config["yolo"]["classes"],
                                                conf_thres=0.45,
                                                nms_thres=0.5)

    # write result images. Draw bounding boxes and labels of detections
    classes = open('/content/classes.txt', "r").read().split("\n")[:-1]
    if not os.path.isdir("./output/"):
        os.makedirs("./output/")
    for idx, detections in enumerate(batch_detections):
        # plt.figure()
        fig, ax = plt.subplots(1)
        ax.imshow(images_origin[idx])
        if detections is not None:
            unique_labels = detections[:, -1].cpu().unique()
            n_cls_preds = len(unique_labels)
            bbox_colors = random.sample(colors, n_cls_preds)
            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
                # Rescale coordinates to original dimensions
                ori_h, ori_w = images_origin[idx].shape[:2]
                pre_h, pre_w = config["img_h"], config["img_w"]
                box_h = ((y2 - y1) / pre_h) * ori_h
                box_w = ((x2 - x1) / pre_w) * ori_w
                y1 = (y1 / pre_h) * ori_h
                x1 = (x1 / pre_w) * ori_w
                # Create a Rectangle patch
                bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2,
                                            edgecolor=color,
                                            facecolor='none')
                # Add the bbox to the plot
                ax.add_patch(bbox)
                # Add label
                #print('cls prediction is :', cls_pred)
                print(int(cls_pred), len(classes), classes[int(cls_pred)])
                plt.text(x1, y1, s=classes[int(cls_pred)], color='white',
                            verticalalignment='top',
                            bbox={'color': color, 'pad': 0})
        # Save generated image with detections
        plt.axis('off')
        plt.gca().xaxis.set_major_locator(NullLocator())
        plt.gca().yaxis.set_major_locator(NullLocator())
        plt.savefig('output/{}_{}.jpg'.format(step, idx), bbox_inches='tight', pad_inches=0.0)
        plt.close()
logging.info("Save all results to ./output/")    


In [None]:
# ! rm -rf /content/MiDaS/YOLOv3_PyTorch/common/data/