# YOLOR + ReID 預測

In [1]:
import sys
sys.path.append('yolor')

## 匯入所需套件

In [2]:
import os
import shutil
import time
import yaml
import pandas as pd
from pathlib import Path

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import matplotlib.pyplot as plt

from utils.google_utils import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import (
    check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, strip_optimizer)
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

from models.models import *
from utils.datasets import *
from utils.general import *
%matplotlib inline  

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## 參數設置

In [4]:
weights = 'weights/yolor_csp.pt'
source = 'person_reid_datasets/test/'
sub = 'person_reid_datasets/sample_submission.csv'
cfg = 'cfg/yolor_csp_pedestrian.cfg'
data = 'data/pedestrian.yaml'
imgsz = 640
conf_thres = 0.25
iou_thres = 0.5

## 將訓練好的偵測模型權重和 ReID 模型權重讀入到各自的模型中

In [5]:
model = Darknet(cfg, imgsz).to(device).eval()
model.load_state_dict(torch.load(weights, map_location=device)['model'])

<All keys matched successfully>

In [7]:
import torch.nn as nn
from torchvision import models

# Defines the new fc layer and classification layer
# |--Linear--|--bn--|--relu--|--Linear--|
class ClassBlock(nn.Module):
    def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, linear=512, return_f = False):
        super(ClassBlock, self).__init__()
        self.return_f = return_f
        add_block = []
        if linear>0:
            add_block += [nn.Linear(input_dim, linear)]
        else:
            linear = input_dim
        if bnorm:
            add_block += [nn.BatchNorm1d(linear)]
        if relu:
            add_block += [nn.LeakyReLU(0.1)]
        if droprate>0:
            add_block += [nn.Dropout(p=droprate)]
        add_block = nn.Sequential(*add_block)
        
        classifier = []
        classifier += [nn.Linear(linear, class_num)]
        classifier = nn.Sequential(*classifier)

        self.add_block = add_block
        self.classifier = classifier
    def forward(self, x):
        x = self.add_block(x)
        if self.return_f:
            f = x
            x = self.classifier(x)
            return [x,f]
        else:
            x = self.classifier(x)
            return x
        
class ft_net(nn.Module):
    def __init__(self, class_num=100, droprate=0.5, circle=False, linear_num=512):
        super(ft_net, self).__init__()
        model_ft = models.resnet50(pretrained=True)
        self.model = nn.Sequential(*list(model_ft.children())[:-1])
        self.classifier = ClassBlock(2048, class_num, droprate, linear=linear_num, return_f = circle)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), x.size(1))
        x = self.classifier(x)
        return x
    
reid_model = torch.load('reid_model_cosine.pth', map_location=device)

## 預測影像

In [8]:
dataset = LoadImages(source, img_size=imgsz, auto_size=32)

In [9]:
class_names = ['1',
 '111',
 '112',
 '116',
 '117',
 '122',
 '123',
 '124',
 '125',
 '126',
 '132',
 '133',
 '135',
 '148',
 '15',
 '155',
 '162',
 '163',
 '166',
 '173',
 '174',
 '175',
 '181',
 '182',
 '183',
 '184',
 '185',
 '188',
 '192',
 '193',
 '197',
 '205',
 '216',
 '221',
 '222',
 '225',
 '228',
 '230',
 '231',
 '232',
 '233',
 '235',
 '265',
 '273',
 '285',
 '296',
 '3',
 '302',
 '338',
 '339',
 '341',
 '344',
 '359',
 '516',
 '517',
 '543',
 '544',
 '545',
 '55',
 '607',
 '61',
 '618',
 '619',
 '621',
 '622',
 '623',
 '624',
 '626',
 '63',
 '652',
 '653',
 '655',
 '656',
 '659',
 '660',
 '662',
 '663',
 '664',
 '705',
 '712',
 '713',
 '714',
 '734',
 '735',
 '736',
 '74',
 '75',
 '754',
 '835',
 '84',
 '857',
 '87',
 '88',
 '880',
 '881',
 '882',
 '891',
 '894',
 '90',
 '98']
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(class_names))]

In [11]:
submission = pd.read_csv(sub)
submission = submission.drop([0])
# Run inference
with torch.no_grad():
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img) # run once
    for path, img, im0s, vid_cap in dataset:
        basename = os.path.basename(path)
        basename_no_ext = os.path.splitext(basename)[0]
        
        h, w, _ = im0s.shape
        img = torch.from_numpy(img).to(device)
        img = img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        # Inference
        t1 = time_synchronized()
        pred = model(img)[0]

        # Apply NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s
#             save_path = str(Path(out) / Path(p).name)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g' % n  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    crop_img = im0[int(xyxy[1]):int(xyxy[3]), int(xyxy[0]):int(xyxy[2])]
                    crop_img_resize = cv2.resize(crop_img[:,:,::-1], (192, 384))
                    crop_img_resize = ((crop_img_resize/255 - (0.485, 0.456, 0.406)) / (0.229, 0.224, 0.225)).astype('float32') # 0 - 255 to 0.0 - 1.0
                    crop_img_resize = crop_img_resize.transpose((2, 0, 1))
                    crop_img_resize = np.expand_dims(crop_img_resize, 0)
                    crop_img_resize = torch.from_numpy(crop_img_resize).to(device)
                    reid_model.eval()
                    with torch.no_grad():
                        outputs, _ = reid_model(crop_img_resize)
                        _, preds = torch.max(outputs, 1)
                    reid = class_names[preds[0]]
                    submission.loc[len(submission)] = [basename_no_ext,
                                                       reid,
                                                       float(conf.cpu()),
                                                       int(xyxy[0].cpu())/w,
                                                       int(xyxy[1].cpu())/h,
                                                       int(xyxy[2].cpu())/w,
                                                       int(xyxy[3].cpu())/h]
            print(s)
#                     plot_one_box(xyxy, im0, label=reid, color=colors[class_names.index(reid)], line_thickness=1)
#             cv2.imwrite(save_path, im0)
#     print('Results saved to %s' % Path(out))
    print('Done. (%.3fs)' % (time.time() - t0))

image 1/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_01.jpg: 384x640 4
image 2/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_02.jpg: 384x640 4
image 3/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_03.jpg: 384x640 4
image 4/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_04.jpg: 384x640 4
image 5/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_05.jpg: 384x640 2
image 6/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_06.jpg: 384x640 4
image 7/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_07.jpg: 384x640 4
image 8/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_08.jpg: 384x640 3
image 9/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_09.jpg: 384x640 8
image 10/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_10.jpg: 384x640 3
image 11/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_11.jpg: 384x640 4
image 12/153 /home/jovyan/Person_ReID/person_reid_datasets/test/c1_12.jpg:

In [12]:
submission.to_csv('submission_yolor_csp_cosine.csv', index=False)

In [13]:
import numpy as np
import pandas as pd
from mean_average_precision.detection_map import DetectionMAP
answer = pd.read_csv('answer.csv')
submission = pd.read_csv('submission_yolor_csp_cosine.csv')

filenames = np.unique(answer['filename'])
class_names = [  1,   3,  15,  55,  61,  63,  74,  75,  84,  87,  88,  90,  98,
       111, 112, 116, 117, 122, 123, 124, 125, 126, 132, 133, 135, 148,
       155, 162, 163, 166, 173, 174, 175, 181, 182, 183, 184, 185, 188,
       192, 193, 197, 205, 216, 221, 222, 225, 228, 230, 231, 232, 233,
       235, 265, 273, 285, 296, 302, 338, 339, 341, 344, 359, 516, 517,
       543, 544, 545, 607, 618, 619, 621, 622, 623, 624, 626, 652, 653,
       655, 656, 659, 660, 662, 663, 664, 705, 712, 713, 714, 734, 735,
       736, 754, 835, 857, 880, 881, 882, 891, 894]
cls_name_dict = {k:v for v, k in enumerate(class_names)}
submission['reid'] = submission['reid'].map(cls_name_dict)
answer['reid'] = answer['reid'].map(cls_name_dict)

In [14]:
frames = []
for file in filenames:
    sub = submission[submission['filename']==file]
    ans = answer[answer['filename']==file]
    pred_bb = sub[['left', 'top', 'right', 'bottom']].values
    pred_cls = sub['reid'].values
    pred_conf = sub['confidence'].values
    gt_bb = ans[['left', 'top', 'right', 'bottom']].values
    gt_cls = ans['reid'].values
    frames.append((pred_bb, pred_cls, pred_conf, gt_bb, gt_cls))

In [15]:
n_class = len(class_names)

thresh = [0.5 , 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95]
mAP_all = []
for thre in thresh:
    mAP = DetectionMAP(n_class, overlap_threshold=thre)
    for i, frame in enumerate(frames):
    #     print("Evaluate frame {}".format(i))
    #     show_frame(*frame)
        mAP.evaluate(*frame)
    print('mAP@' + str(thre)[1:] + f': {mAP.compute_mAP()}')
    mAP_all.append(mAP.compute_mAP())

print(f'mAP@.5:.95: {np.average(mAP_all)}')
# print('mAP:', mAP.compute_mAP())
# mAP.plot(class_names=class_names, figsize=30)
# plt.show()
#plt.savefig("pr_curve_example.png")

mAP@.5: 0.7247240183175458
mAP@.55: 0.7247240183175458
mAP@.6: 0.7227170253105527
mAP@.65: 0.7187790633151195
mAP@.7: 0.7109825166705125
mAP@.75: 0.6945085880707758
mAP@.8: 0.6373163321788664
mAP@.85: 0.5254621956458038
mAP@.9: 0.4026095139332551
mAP@.95: 0.34186887387974346
mAP@.5:.95: 0.6203692145639721
