# 物件偵測專班

# YOLOv4 預測教學

## 匯入所需套件

In [1]:
import argparse
import os
import platform
import shutil
import numpy as np
import pandas as pd
import time
from pathlib import Path

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import (
    check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
from utils.torch_utils import select_device, load_classifier, time_synchronized

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## 參數設置

In [3]:
weights = 'runs/exp11_yolov4-csp/weights/best_yolov4-csp.pt'
source = '../person_reid_datasets/test/'
sub = '../person_reid_datasets/sample_submission.csv'
out = 'inference/output'
imgsz = 640
conf_thres = 0.25
iou_thres = 0.5

In [4]:
# 創建資料夾
if os.path.exists(out):
    shutil.rmtree(out)  # delete output folder
os.makedirs(out)  # make new output folder

In [5]:
model = attempt_load(weights, map_location=device)  # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

Fusing layers... Model Summary: 235 layers, 5.24706e+07 parameters, 5.04494e+07 gradients


In [6]:
import torch.nn as nn
from torchvision import models

# Defines the new fc layer and classification layer
# |--Linear--|--bn--|--relu--|--Linear--|
class ClassBlock(nn.Module):
    def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, linear=512, return_f = False):
        super(ClassBlock, self).__init__()
        self.return_f = return_f
        add_block = []
        if linear>0:
            add_block += [nn.Linear(input_dim, linear)]
        else:
            linear = input_dim
        if bnorm:
            add_block += [nn.BatchNorm1d(linear)]
        if relu:
            add_block += [nn.LeakyReLU(0.1)]
        if droprate>0:
            add_block += [nn.Dropout(p=droprate)]
        add_block = nn.Sequential(*add_block)
        
        classifier = []
        classifier += [nn.Linear(linear, class_num)]
        classifier = nn.Sequential(*classifier)

        self.add_block = add_block
        self.classifier = classifier
    def forward(self, x):
        x = self.add_block(x)
        if self.return_f:
            f = x
            x = self.classifier(x)
            return [x,f]
        else:
            x = self.classifier(x)
            return x
        
class ft_net(nn.Module):
    def __init__(self, class_num=100, droprate=0.5, circle=False, linear_num=512):
        super(ft_net, self).__init__()
        model_ft = models.resnet50(pretrained=True)
        self.model = nn.Sequential(*list(model_ft.children())[:-1])
        self.classifier = ClassBlock(2048, class_num, droprate, linear=linear_num, return_f = circle)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), x.size(1))
        x = self.classifier(x)
        return x
    
reid_model = torch.load('../reid_model.pth')

## 預測影像

In [7]:
dataset = LoadImages(source, img_size=imgsz)

In [8]:
class_names = ['1',
 '111',
 '112',
 '116',
 '117',
 '122',
 '123',
 '124',
 '125',
 '126',
 '132',
 '133',
 '135',
 '148',
 '15',
 '155',
 '162',
 '163',
 '166',
 '173',
 '174',
 '175',
 '181',
 '182',
 '183',
 '184',
 '185',
 '188',
 '192',
 '193',
 '197',
 '205',
 '216',
 '221',
 '222',
 '225',
 '228',
 '230',
 '231',
 '232',
 '233',
 '235',
 '265',
 '273',
 '285',
 '296',
 '3',
 '302',
 '338',
 '339',
 '341',
 '344',
 '359',
 '516',
 '517',
 '543',
 '544',
 '545',
 '55',
 '607',
 '61',
 '618',
 '619',
 '621',
 '622',
 '623',
 '624',
 '626',
 '63',
 '652',
 '653',
 '655',
 '656',
 '659',
 '660',
 '662',
 '663',
 '664',
 '705',
 '712',
 '713',
 '714',
 '734',
 '735',
 '736',
 '74',
 '75',
 '754',
 '835',
 '84',
 '857',
 '87',
 '88',
 '880',
 '881',
 '882',
 '891',
 '894',
 '90',
 '98']
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(class_names))]

In [9]:
submission = pd.read_csv(sub)
submission = submission.drop([0])
# Run inference
with torch.no_grad():
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img) # run once
    for path, img, im0s, vid_cap in dataset:
        basename = os.path.basename(path)
        basename_no_ext = os.path.splitext(basename)[0]
        
        h, w, _ = im0s.shape
        img = torch.from_numpy(img).to(device)
        img = img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        # Inference
        t1 = time_synchronized()
        pred = model(img)[0]

        # Apply NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s
            save_path = str(Path(out) / Path(p).name)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g' % n  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    crop_img = im0[int(xyxy[1]):int(xyxy[3]), int(xyxy[0]):int(xyxy[2])]
                    crop_img_resize = cv2.resize(crop_img[:,:,::-1], (224, 224))
                    crop_img_resize = ((crop_img_resize/255 - (0.485, 0.456, 0.406)) / (0.229, 0.224, 0.225)).astype('float32') # 0 - 255 to 0.0 - 1.0
                    crop_img_resize = crop_img_resize.transpose((2, 0, 1))
                    crop_img_resize = np.expand_dims(crop_img_resize, 0)
                    crop_img_resize = torch.from_numpy(crop_img_resize).to(device)
                    reid_model.eval()
                    with torch.no_grad():
                        outputs, _ = reid_model(crop_img_resize)
                        _, preds = torch.max(outputs, 1)
                    reid = class_names[preds[0]]
                    submission.loc[len(submission)] = [basename_no_ext,
                                                       reid,
                                                       float(conf.cpu()),
                                                       int(xyxy[0].cpu())/w,
                                                       int(xyxy[1].cpu())/h,
                                                       int(xyxy[2].cpu())/w,
                                                       int(xyxy[3].cpu())/h]
            print(s)
#                     plot_one_box(xyxy, im0, label=reid, color=colors[class_names.index(reid)], line_thickness=1)
#             cv2.imwrite(save_path, im0)
    print('Results saved to %s' % Path(out))
    print('Done. (%.3fs)' % (time.time() - t0))

image 1/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_01.jpg: 384x640 5
image 2/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_02.jpg: 384x640 3
image 3/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_03.jpg: 384x640 4
image 4/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_04.jpg: 384x640 4
image 5/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_05.jpg: 384x640 2
image 6/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_06.jpg: 384x640 4
image 7/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_07.jpg: 384x640 5
image 8/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_08.jpg: 384x640 4
image 9/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_09.jpg: 384x640 8
image 10/153 /home/jovyan/object_detection_project/person_reid_datasets/test/c1_10.jpg: 384x640 3
image 11/153 /home/jovyan/obj

In [10]:
submission.to_csv('submission.csv', index=False)