In [2]:
from mmdet.apis import init_detector, inference_detector, show_result_pyplot
import mmcv
import json
import time
from tqdm import tqdm
import numpy as np
import os
import sys
sys.path.insert(1, '/home/user/mmdetection')
from mean_average_precision import MetricBuilder
import warnings
warnings.filterwarnings('ignore')
from efficientnet_pytorch import EfficientNet
import math
import xml.etree.ElementTree as ET
import pandas as pd
import torch

# Val

In [3]:
# путь до обучающей выборки
root_train_images = '/home/user/Desktop/hack/data_task2/train/images/'
dataset_train_imgs = os.listdir(root_train_images)
dataset_train_imgs = [root_train_images+img for img in dataset_train_imgs]

# количество кадров в обучающей выборке
num_frames = len(dataset_train_imgs)

# пути до разметки обучающей выборки
gt_xmls_folder = '/home/user/Desktop/hack/data_task2/train/annotations/PASCAL_VOC_xml/'
img_formats = ('.png', '.jpeg', '.jpg')

In [4]:
def get_gt_bboxes(gt_xmls_folder):
    """
    Спарсить данные разметки из папки gt_xml_folders.
    
    На выходе получить:
    bboxes: bounding box'ы для каждого кадра разметки;
    xml_paths: пути до каждой из аннотаций. 
    """
    
    xml_paths, bboxes = [], []
    for im_path in os.listdir(gt_xmls_folder):
        xml_paths.append(im_path)
        fp_xml = gt_xmls_folder+im_path
        tree = ET.parse(fp_xml)
        root = tree.getroot()
        bboxes_curr = []
        for neighbor in root.iter('bndbox'):
            xmin = math.floor(float(neighbor.find('xmin').text))
            ymin = math.floor(float(neighbor.find('ymin').text))
            xmax = math.floor(float(neighbor.find('xmax').text))
            ymax = math.floor(float(neighbor.find('ymax').text))
            bboxes_curr.append([xmin, ymin, xmax, ymax, 0, 0, 0])
        bboxes.append(bboxes_curr)
    return bboxes, xml_paths
        
    
gt_bboxes, gt_im_paths = get_gt_bboxes(gt_xmls_folder)
#print(len(gt_bboxes), len(gt_im_paths))

In [7]:
def get_preds(dataset_train_imgs, config, checkpoint, img_formats=img_formats, thr=0.5, class_id=0):
    """
    Получить предсказания модели на основе mmdetection.
    
    dataset_train_imgs: пути до кадров обучающей выборки;
    config: конфигурационный файл архитектуры mmdetection;
    checkpoint: чекпоинты (≈веса) mmdetection;
    img_formats: кадры каких форматов - подходящие;
    thr: порог вероятности, ниже которого считать кадры не найденными;
    class_id: какую категорию формата coco ищем.
    
    На выходе:
    bboxes: bbox'ы предсказаний (валидационной выборки);
    im_paths: пути до каждого кадра.
    """
    
    im_paths, bboxes = [], []
    model = init_detector(config, checkpoint, device='cuda:0')
    for path_im in tqdm(dataset_train_imgs):
        if path_im.endswith(img_formats) and not path_im.startswith('.'):
            im_paths.append(path_im)
            preds = []
            result = inference_detector(model, path_im)
            result_persons = result[class_id]
            for person in result_persons:
                conf = person[4]
                if conf >= thr:
                    x = int(person[0])
                    y = int(person[1])
                    w = int(person[2])
                    h = int(person[3])
                    pred = [x, y, w, h, class_id, conf]
                    preds.append(pred)
            bboxes.append(preds)
    #print(len(bboxes), len(im_paths))
    return bboxes, im_paths


def get_preds_yolov5(dataset_train_imgs, img_formats=img_formats, thr=0.5, class_id=0):
    """
    Получить предсказания модели на основе yolov5 ultralytics.
    
    dataset_train_imgs: пути до кадров обучающей выборки;
    img_formats: кадры каких форматов - подходящие;
    thr: порог вероятности, ниже которого считать кадры не найденными;
    class_id: какую категорию формата coco ищем.
    
    На выходе:
    bboxes: bbox'ы предсказаний (валидационной выборки);
    im_paths: пути до каждого кадра.
    """
    
    im_paths, bboxes = [], []
    model = torch.hub.load('ultralytics/yolov5', 'yolov5x6')
    for path_im in tqdm(dataset_train_imgs):
        if path_im.endswith(img_formats) and not path_im.startswith('.'):
            im_paths.append(path_im)
            preds = []
            results = model(path_im)
            results = results.xyxy[0].cpu().numpy()
            for result in results:
                conf = result[4]
                if result[-1] == 0 and conf >= thr:
                    x = int(result[0])
                    y = int(result[1])
                    w = int(result[2])
                    h = int(result[3])
                    pred = [x, y, w, h, class_id, conf]
                    preds.append(pred)
            bboxes.append(preds)
    #print(len(bboxes), len(im_paths))
    return bboxes, im_paths
            
    

#config = '../configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py'
#checkpoint = '../checkpoints/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth'
#pred_bboxes, im_paths = get_preds(dataset_train_imgs, config, checkpoint)
pred_bboxes, im_paths = get_preds_yolov5(dataset_train_imgs)

Using cache found in /home/user/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2021-10-30 torch 1.9.0 CUDA:0 (GeForce GTX 1080 Ti, 11177.1875MB)

Fusing layers... 
Model Summary: 574 layers, 140730220 parameters, 0 gradients
Adding AutoShape... 
100%|█████████████████████████████████████████| 500/500 [00:29<00:00, 17.15it/s]


In [8]:
fnames_preds = []
for im in im_paths:
    fnames_preds.append(os.path.splitext(im)[0].rsplit('/')[-1])
print('len(fnames_preds) =', len(fnames_preds))


fnames_gts = []
for im in gt_im_paths:
    fnames_gts.append(os.path.splitext(im)[0])
print('len(fnames_gts) =', len(fnames_gts))
assert len(fnames_preds) == len(fnames_gts), 'inappropriate sizes between gts and preds'

len(fnames_preds) = 500
len(fnames_gts) = 500


In [9]:
df_gts = pd.DataFrame(data={'fnames_gts': fnames_gts, 'gt_bboxes': gt_bboxes})
df_gts = df_gts.sort_values(by=['fnames_gts'])
df_gts = df_gts.rename(columns={"fnames_gts": "fnames"})
df_gts

Unnamed: 0,fnames,gt_bboxes
140,am3_1_frame004,"[[747, 507, 982, 1080, 0, 0, 0]]"
466,am3_1_frame006,"[[759, 470, 960, 1006, 0, 0, 0]]"
375,am3_1_frame007,"[[779, 458, 980, 980, 0, 0, 0]]"
173,am3_1_frame015,"[[789, 417, 948, 887, 0, 0, 0]]"
4,am3_1_frame021,"[[787, 401, 957, 839, 0, 0, 0]]"
...,...,...
485,oz_violation_frame545,"[[840, 262, 905, 434, 0, 0, 0], [947, 237, 103..."
14,oz_violation_frame560,"[[970, 290, 1077, 499, 0, 0, 0], [860, 274, 94..."
354,oz_violation_frame561,"[[1001, 288, 1071, 494, 0, 0, 0], [870, 282, 9..."
100,oz_violation_frame562,"[[885, 278, 951, 521, 0, 0, 0], [998, 292, 106..."


In [10]:
df_preds = pd.DataFrame(data={'fnames_preds': fnames_preds, 'pred_bboxes': pred_bboxes})
df_preds = df_preds.sort_values(by=['fnames_preds'])
df_preds = df_preds.rename(columns={"fnames_preds": "fnames"})
df_preds

Unnamed: 0,fnames,pred_bboxes
220,am3_1_frame004,"[[763, 515, 975, 1077, 0, 0.9316406]]"
140,am3_1_frame006,"[[766, 474, 951, 1004, 0, 0.92285156]]"
344,am3_1_frame007,"[[789, 462, 972, 966, 0, 0.9355469]]"
375,am3_1_frame015,"[[794, 423, 941, 882, 0, 0.9165039]]"
175,am3_1_frame021,"[[794, 409, 942, 837, 0, 0.9165039]]"
...,...,...
20,oz_violation_frame545,"[[849, 268, 900, 428, 0, 0.63427734], [964, 24..."
435,oz_violation_frame560,"[[867, 282, 931, 507, 0, 0.8725586], [976, 295..."
210,oz_violation_frame561,"[[872, 285, 942, 509, 0, 0.8461914], [1000, 29..."
301,oz_violation_frame562,"[[886, 283, 946, 516, 0, 0.84375], [1005, 292,..."


In [11]:
df = pd.merge(df_gts, df_preds, on="fnames")
df

Unnamed: 0,fnames,gt_bboxes,pred_bboxes
0,am3_1_frame004,"[[747, 507, 982, 1080, 0, 0, 0]]","[[763, 515, 975, 1077, 0, 0.9316406]]"
1,am3_1_frame006,"[[759, 470, 960, 1006, 0, 0, 0]]","[[766, 474, 951, 1004, 0, 0.92285156]]"
2,am3_1_frame007,"[[779, 458, 980, 980, 0, 0, 0]]","[[789, 462, 972, 966, 0, 0.9355469]]"
3,am3_1_frame015,"[[789, 417, 948, 887, 0, 0, 0]]","[[794, 423, 941, 882, 0, 0.9165039]]"
4,am3_1_frame021,"[[787, 401, 957, 839, 0, 0, 0]]","[[794, 409, 942, 837, 0, 0.9165039]]"
...,...,...,...
495,oz_violation_frame545,"[[840, 262, 905, 434, 0, 0, 0], [947, 237, 103...","[[849, 268, 900, 428, 0, 0.63427734], [964, 24..."
496,oz_violation_frame560,"[[970, 290, 1077, 499, 0, 0, 0], [860, 274, 94...","[[867, 282, 931, 507, 0, 0.8725586], [976, 295..."
497,oz_violation_frame561,"[[1001, 288, 1071, 494, 0, 0, 0], [870, 282, 9...","[[872, 285, 942, 509, 0, 0.8461914], [1000, 29..."
498,oz_violation_frame562,"[[885, 278, 951, 521, 0, 0, 0], [998, 292, 106...","[[886, 283, 946, 516, 0, 0.84375], [1005, 292,..."


In [12]:
gt_bboxes = df.gt_bboxes.values
pred_bboxes = df.pred_bboxes.values

In [13]:
metric_fn = MetricBuilder.build_evaluation_metric("map_2d", num_classes=1)
for i in range(num_frames):
    metric_fn.add(np.array(pred_bboxes[i]), np.array(gt_bboxes[i]))
mAP = metric_fn.value(iou_thresholds=np.arange(0.5, 1.0, 0.05))
print('mAP: ', mAP['mAP'])

mAP:  0.528363


# Test

In [14]:
def calc_area(bbox):
    """
    Посчитать площадь bbox'a.
    """
    
    area = 0
    if bbox:
        x, y, w, h, class_id, conf = bbox
        area = w*h
    return area

In [15]:
def calc_bbox(bbox):
    """
    Перевод под формат json'a для сабмита:
    (x1, x2, y1, y2) => (x, y, w, h), остальное отбросить
    """
    
    if not bbox:
        bbox = []
    else:
        x, y, w, h, class_id, conf = bbox
        w_new = w-x
        h_new = h-y
        bbox = x, y, w_new, h_new, class_id, conf
    return bbox[0:4]

In [16]:
def get_image_id(file_name):
    """
    Подцепляем из submission'a image_id на основе имени файла. 
    Нужно для построения нового submission'a. 
    """
    
    with open('./../../submission_example.json') as f:
        data = json.load(f)
    for im_info in data['images']:
        if im_info.get('file_name') == file_name + '.jpg':
            id_ = im_info.get('id')
    for annot_info in data['annotations']:
        if annot_info.get('id') == id_:
            image_id = annot_info.get('image_id')
    return image_id

In [17]:
root_test_images = '/home/user/Desktop/hack/data_task2/test/images/'
dataset_test_imgs = os.listdir(root_test_images)
dataset_test_imgs = [root_test_images+img for img in dataset_test_imgs]
test_num_frames = len(dataset_test_imgs)
img_formats = ('.png', '.jpeg', '.jpg')

config = '../configs/yolof/yolof_r50_c5_8x8_1x_coco.py'
checkpoint = '../checkpoints/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth'
#test_pred_bboxes, test_im_paths = get_preds(dataset_test_imgs, config, checkpoint)
test_pred_bboxes, test_im_paths = get_preds_yolov5(dataset_test_imgs)

test_fnames_preds = []
for im in test_im_paths:
    test_fnames_preds.append(os.path.splitext(im)[0].rsplit('/')[-1])
print('len(test_fnames_preds) =', len(test_fnames_preds))

test_df_preds = pd.DataFrame(data={'test_fnames_preds': test_fnames_preds, 
                                   'test_pred_bboxes': test_pred_bboxes})
test_df_preds = test_df_preds.sort_values(by=['test_fnames_preds'])
test_df_preds = test_df_preds.rename(columns={"test_fnames_preds": "test_fnames"})
test_df_preds

Using cache found in /home/user/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2021-10-30 torch 1.9.0 CUDA:0 (GeForce GTX 1080 Ti, 11177.1875MB)

Fusing layers... 
Model Summary: 574 layers, 140730220 parameters, 0 gradients
Adding AutoShape... 
100%|█████████████████████████████████████████| 150/150 [00:08<00:00, 17.15it/s]

len(test_fnames_preds) = 150





Unnamed: 0,test_fnames,test_pred_bboxes
146,am3_1_frame011,"[[806, 424, 929, 882, 0, 0.91308594]]"
90,am3_1_frame025,"[[831, 358, 947, 719, 0, 0.88964844]]"
30,am3_1_frame030,"[[904, 296, 991, 548, 0, 0.8730469]]"
74,am3_1_frame043,[]
81,am3_3_frame019,"[[861, 633, 1015, 1063, 0, 0.88964844]]"
...,...,...
52,oz_violation_frame451,"[[888, 325, 964, 613, 0, 0.9042969], [1040, 32..."
88,oz_violation_frame453,"[[892, 329, 1012, 618, 0, 0.89746094], [980, 3..."
22,oz_violation_frame522,"[[882, 315, 957, 519, 0, 0.85498047], [960, 28..."
28,oz_violation_frame542,"[[849, 268, 901, 432, 0, 0.5805664]]"


In [19]:
def make_images_annotation_test(df):
    """
    Создать submition (=аннотацию_тестовой_выборки)
    
    df: датасет с колонками test_fnames (названия кадров без расширения),
                            test_pred_bboxes (bbox'ы соответствующих кадров)
    """
    
    annotations = []
    images = []
    id_num = 0
    for image_id, file_name in enumerate(df.test_fnames.values, 1):
        images.append({
            'id': id_num, 
            'width': 1920,
            'height': 1080,
            'file_name': file_name+'.jpg',
            'license': 0,
            'flickr_url': '',
            'coco_url': '',
            'data_captured': 0
        })
        
        for bboxes in df.loc[df['test_fnames'] == file_name]['test_pred_bboxes']:
            for bbox in bboxes:
                id_num += 1
                annotations.append({
                    'id': id_num,
                    'image_id': image_id,
                    'category_id': 1,
                    'segmentation': [],
                    'area': calc_area(bbox),
                    'bbox': calc_bbox(bbox),
                    'iscrowd': 0,
                    'attributes': {'occluded': False},
                })
            
    info = {
        'contributor': '',
        'date_created': '',
        'description': '',
        'url': '',
        'version': '',
        'year': ''
    }
    licenses = [{'name': '', 'id': 0, 'url': ''}]
    categories = [{'id': 1, 'name': 'person', 'supercategory': ''}]    
    sub = {
        'licenses': licenses, 
        'info': info,
        'categories': categories,
        'images': images,
        'annotations': annotations
    }
    with open('30_2204.json', 'w') as outfile:
        json.dump(sub, outfile)
    return None

In [20]:
make_images_annotation_test(test_df_preds)