In [None]:
!pip install globox

In [None]:
from __future__ import annotations

import os
from pathlib import Path
from enum import Enum
import json
import shutil

import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from kaggle_secrets import UserSecretsClient
import wandb

from IPython.display import FileLink
from IPython.display import display, HTML

from tqdm.notebook import tqdm

In [None]:
user_secrets = UserSecretsClient()
wandb_key = user_secrets.get_secret("wandb-key")

wandb.login(key=wandb_key)

# Create data

In [None]:
class MyConfig:
    sample_subm = pd.read_csv('../input/detection-of-human-silhouettes-in-forest-images/omsk/sample_solution.csv')
    train = pd.read_csv('../input/detection-of-human-silhouettes-in-forest-images/train_dataset_train/train.csv')
    test_dir = '../input/detection-of-human-silhouettes-in-forest-images/test_dataset_test/test'
    train_dir = '../input/detection-of-human-silhouettes-in-forest-images/train_dataset_train/train'
    train_img_dir = './data/train/images'
    train_labels_dir = './data/train/labels'
    val_img_dir = './data/val/images'
    val_labels_dir = './data/val/labels'
    make_inference = True
    train_val_split = True
    make_training = False

In [None]:
MyConfig.sample_subm['region_shape'] = MyConfig.sample_subm['region_shape'].astype('object')

In [None]:
def make_dirs_train_yolo():
    shutil.rmtree('./data', ignore_errors=True)
    Path("./data/train/images").mkdir(parents=True, exist_ok=True)
    Path("./data/train/labels").mkdir(parents=True, exist_ok=True)
    Path("./data/val/images").mkdir(parents=True, exist_ok=True)
    Path("./data/val/labels").mkdir(parents=True, exist_ok=True)
make_dirs_train_yolo()

In [None]:
def yolobbox2bbox(x,y,w,h):
    x1, y1 = x-w/2, y-h/2
    x2, y2 = x+w/2, y+h/2
    return x1, y1, x2, y2

# Convert Coco bb to Yolo
def coco_to_yolo(x1, y1, w, h, image_w, image_h):
    return [((2*x1 + w)/(2*image_w)) , ((2*y1 + h)/(2*image_h)), w/image_w, h/image_h]

In [None]:
def vis_example(path):
    img = cv2.imread(path)
    h, w = img.shape[0], img.shape[1]
    
    train = MyConfig.train
    
    coords_str = train[train.count_region != 0].iloc[6]['region_shape'].replace("'", "")
    circle_coords = json.loads(coords_str)
    yolo_coords_f = ''
    for item in circle_coords:
        cx = item['cx']
        cy = item['cy']
        r = int(item['r'] // 1.5)
    #     r = item['r']
        top_left_cornel = (cx - r, cy - r)
        bottom_right_cornel = (cx + r, cy + r)

        yolo_coords = coco_to_yolo(*top_left_cornel, bottom_right_cornel[0] - top_left_cornel[0], bottom_right_cornel[1] - top_left_cornel[1], w, h)
        x1, y1 = top_left_cornel[0], top_left_cornel[1]
        w, h = bottom_right_cornel[0] - top_left_cornel[0], bottom_right_cornel[1] - top_left_cornel[1]
        img = cv2.rectangle(img, (x1, y1), (x1+w, y1+h), (255,0,0), 2)

        label = '0 ' + ' '.join(list(map(str, yolo_coords))) + '\n'
        yolo_coords_f = yolo_coords_f + label
    plt.figure(figsize=(30, 20))
    plt.imshow(img)
    print(yolo_coords_f)

vis_example(f'{MyConfig.train_dir}/4269.JPG')

In [None]:
def train_val_split(train_on_all_data=True):
    
    train = MyConfig.train
    val_part = pd.concat([train[train.count_region != 0].sample(frac=0.2, random_state=42), train[train.count_region == 0].sample(frac=0.01, random_state=42)])
    
    if train_on_all_data:
        train_part = train
        train_part = pd.concat([train_part[train_part.count_region != 0], train_part[train_part.count_region == 0].sample(frac=0.03, random_state=42)])
    else:
        train_part = train[~train.isin(val_part)].dropna()
        train_part = pd.concat([train_part[train_part.count_region != 0], train_part[train_part.count_region == 0].sample(frac=0.03, random_state=42)])
    
    print(len(train_part), len(train_part[train_part.count_region > 0]), len(val_part), len(val_part[val_part.count_region > 0]))
    # val_part = pd.concat([train[train.count_region != 0].sample(frac=0.2, random_state=42), train[train.count_region == 0].sample(frac=0.001, random_state=42)])
    # train_part = train[~train.isin(val_part)].dropna()
    # train_part = pd.concat([train_part[train_part.count_region != 0], train_part[train_part.count_region == 0].sample(frac=0.003, random_state=42)])
    return train_part, val_part

if MyConfig.train_val_split:
    train_part, val_part = train_val_split()

In [None]:
def create_lables(df ,train_img_dir, train_labels_dir):
    print('imgs with labels: \n')
    for i, row in df.iterrows():
        shutil.copy(f'{MyConfig.train_dir}/{row["ID_img"]}', train_img_dir)
        if row['count_region'] == 0:
            open(f'{train_labels_dir}/{row["ID_img"].split(".")[0]}.txt', 'a').close()
        else:
            print(f'{train_img_dir}/{row["ID_img"]}')
            img = cv2.imread(f'{train_img_dir}/{row["ID_img"]}')
            h, w = img.shape[0], img.shape[1]

            coords_str = row['region_shape'].replace("'", "")
            circle_coords = json.loads(coords_str)
            yolo_coords_f = ''

            for item in circle_coords:
                cx = item['cx']
                cy = item['cy']
                r = item['r']
                top_left_cornel = (cx - r, cy - r)
                bottom_right_cornel = (cx + r, cy + r)

                yolo_coords = coco_to_yolo(*top_left_cornel, bottom_right_cornel[0] - top_left_cornel[0], bottom_right_cornel[1] - top_left_cornel[1], w, h)

                label = '0 ' + ' '.join(list(map(str, yolo_coords))) + '\n'
                yolo_coords_f = yolo_coords_f + label
            with open(f'{train_labels_dir}/{row["ID_img"].split(".")[0]}.txt', 'a') as label_txt:
                label_txt.write(yolo_coords_f)

In [None]:
if MyConfig.make_training:
    create_lables(train_part, MyConfig.train_img_dir, MyConfig.train_labels_dir)
    create_lables(val_part, MyConfig.val_img_dir, MyConfig.val_labels_dir)

## Convert labels for mmdetection

In [None]:
# rename JPG to jpg

def rename_jpg_ext(folder: str):
    for path, dirs, files in os.walk(folder):
        for file in files:
            pieces = list(os.path.splitext(file))
            if pieces[-1] == '.JPG':
                pieces[-1] = pieces[-1] = '.jpg'
                new_name = "".join(pieces)
                os.rename(os.path.join(path, file), os.path.join(path, new_name))
                
rename_jpg_ext("./data/train/images/")
rename_jpg_ext("./data/val/images/")    

In [None]:
from globox import AnnotationSet

def yolo_to_coco(image_dir: str, labels_dir: str, coco_ds_name: str, label_to_id):
    yolo_annotations = AnnotationSet.from_yolo(
        folder = Path(labels_dir),
        image_folder = Path(image_dir))
    annot = yolo_annotations.to_coco( 
        label_to_id= label_to_id,
        auto_ids = True, verbose=False)
    
    for a in annot['annotations']:
        a['area'] = a['bbox'][2]*a['bbox'][3] 
     
    annot['categories'] = [{"supercategory": "none", "id": i, "name": l} for l, i in label_to_id.items()]
    
    with open(coco_ds_name, "w") as f:
            json.dump(annot, fp=f, allow_nan=False)

yolo_to_coco(MyConfig.train_img_dir, MyConfig.train_labels_dir, './train.json', {'human': 0})
yolo_to_coco(MyConfig.val_img_dir, MyConfig.val_labels_dir, './val.json', {'human': 0})

In [None]:
with open("/kaggle/working/classes.txt", "w") as classes:
    classes.write("human")

# MMdetection

### Install

In [None]:
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torch-1.7.0+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchvision-0.8.1+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchaudio-0.7.0-cp37-cp37m-linux_x86_64.whl' --no-deps

In [None]:
!pip install '/kaggle/input/mmdetectionv2140/addict-2.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/yapf-0.31.0-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/terminal-0.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/terminaltables-3.1.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/mmcv_full-1_3_8-cu110-torch1_7_0/mmcv_full-1.3.8-cp37-cp37m-manylinux1_x86_64.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/pycocotools-2.0.2/pycocotools-2.0.2' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/mmpycocotools-12.0.3/mmpycocotools-12.0.3' --no-deps

!rm -rf mmdetection

!cp -r /kaggle/input/mmdetectionv2140/mmdetection-2.14.0 /kaggle/working/
!mv /kaggle/working/mmdetection-2.14.0 /kaggle/working/mmdetection
%cd /kaggle/working/mmdetection
!pip install -e .

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import sklearn
import torchvision
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import PIL
import json
from PIL import Image, ImageEnhance
import albumentations as A
import mmdet
import mmcv
from albumentations.pytorch import ToTensorV2
import seaborn as sns
import glob
from pathlib import Path
import pycocotools
from pycocotools import mask
import numpy.random
import random
import cv2
import re
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
from mmdet.apis import inference_detector, init_detector, show_result_pyplot, set_random_seed

Download checkpoint for training

In [None]:
!mkdir checkpoints
!wget -c http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth \
      -O checkpoints/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth

### Config

In [None]:
from mmcv import Config
cfg = Config.fromfile('/kaggle/working/mmdetection/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py')

In [None]:
cfg.dataset_type = 'CocoDataset'
cfg.classes = ['human']
cfg.data_root = '/kaggle/working'

for head in cfg.model.roi_head.bbox_head:
    head.num_classes = 1
    

cfg.data.test.type = 'CocoDataset'
cfg.data.test.classes = ('human',)
cfg.data.test.data_root = '/kaggle/working'
cfg.data.test.ann_file = './val.json'
cfg.data.test.img_prefix = 'data/val/images'

cfg.data.train.type = 'CocoDataset'
cfg.data.train.data_root = '/kaggle/working'
cfg.data.train.ann_file = './train.json'
cfg.data.train.img_prefix = 'data/train/images'
cfg.data.train.classes = ('human',)

cfg.data.val.type = 'CocoDataset'
cfg.data.val.data_root = '/kaggle/working'
cfg.data.val.ann_file = './val.json'
cfg.data.val.img_prefix = 'data/val/images'
cfg.data.val.classes = ('human',)

albu_train_transforms = [
    dict(type='ShiftScaleRotate', shift_limit=0.0625,
         scale_limit=0.15, rotate_limit=15, p=0.4),
    dict(type='RandomBrightnessContrast', brightness_limit=0.2,
         contrast_limit=0.2, p=0.5),
#     dict(type='IAAAffine', shear=(-10.0, 10.0), p=0.4),
#     dict(type='CLAHE', p=0.5),
    dict(
        type="OneOf",
        transforms=[
            dict(type="GaussianBlur", p=1.0, blur_limit=7),
            dict(type="MedianBlur", p=1.0, blur_limit=7),
        ],
        p=0.4,
    ),
]

cfg.train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
#     dict(type='Resize', img_scale=[(440, 596), (480, 650), (520, 704), (580, 785), (620, 839)], multiscale_mode='value', keep_ratio=True),
#     dict(type='Resize', img_scale=[(880, 1192), (960, 130), (1040, 1408), (1160, 1570), (1240, 1678)], multiscale_mode='value', keep_ratio=True),
    dict(type='Resize', img_scale=[(1333, 800), (1690, 960)]),
#     dict(type='Resize', img_scale=(1333, 800)),
    
    

    dict(type='RandomFlip', flip_ratio=0.5),

    dict(
        type='Albu',
        transforms=albu_train_transforms,
        bbox_params=dict(
        type='BboxParams',
        format='pascal_voc',
        label_fields=['gt_labels'],
        min_visibility=0.0,
        filter_lost_elements=True),
        keymap=dict(img='image', gt_bboxes='bboxes'),
        update_pad_shape=False,
        skip_img_without_anno=True),
    dict(
        type='Normalize',
        mean=[128, 128, 128],
        std=[11.58, 11.58, 11.58],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'), 
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]

cfg.val_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
#         img_scale=[(880, 1192), (960, 130), (1040, 1408), (1160, 1570), (1240, 1678)],
        img_scale = [(1333, 800), (1690, 960)],
#         img_scale=(1333, 800),
#         img_scale = (520, 704),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[128, 128, 128],
                std=[11.58, 11.58, 11.58],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]


cfg.test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=[(1333, 800), (1690, 960)],
#         img_scale=(1333, 800),
        
#         img_scale = (520, 704),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[128, 128, 128],
                std=[11.58, 11.58, 11.58],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]

cfg.data.train.pipeline = cfg.train_pipeline
cfg.data.val.pipeline = cfg.val_pipeline
# cfg.data.test.pipeline = cfg.test_pipeline


cfg.data.test.pipeline[1:] = [
    dict(
        type='MultiScaleFlipAug',
        img_scale=[(1333, 800), (1690, 960)],
#         img_scale=(1333, 800),
        
#         img_scale = (520, 704),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[128, 128, 128],
                std=[11.58, 11.58, 11.58],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]


cfg.model.test_cfg.rcnn.max_per_img = 400

cfg.load_from = './checkpoints/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth'

cfg.work_dir = '/kaggle/working/model_output'

cfg.optimizer.lr = 0.02 / 8
cfg.lr_config = dict(
    policy='CosineAnnealing', 
    by_epoch=False,
    warmup='linear', 
    warmup_iters=125, 
    warmup_ratio=0.001,
    min_lr=1e-07)

cfg.data.samples_per_gpu = 1
cfg.data.workers_per_gpu = 0

cfg.evaluation.metric = 'bbox'
cfg.evaluation.interval = 10

cfg.checkpoint_config.interval = 10
cfg.runner.max_epochs = 50 # 12
cfg.log_config.interval = 144

# cfg.model.rpn_head.anchor_generator.base_sizes = [4, 9, 17, 31, 64]
# cfg.model.rpn_head.anchor_generator.strides = [4, 8, 16, 32, 64]

cfg.model.train_cfg.rpn.sampler.num = 1024
cfg.model.train_cfg.rpn_proposal.nms_post = 2000
for rcnn in cfg.model.train_cfg.rcnn:
    rcnn.sampler.num = 3072
cfg.model.test_cfg.rpn.nms_pre = 3000
cfg.model.test_cfg.rpn.nms_post = 3000
# cfg.model.test_cfg.rpn.max_num = 3000
#edits to train and test cfg are from https://github.com/Media-Smart/SKU110K-DenseDet/blob/master/configs/SKU_fusion_bfp_x101_32x4d.py

cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
cfg.fp16 = dict(loss_scale=512.0)
meta = dict()
meta['config'] = cfg.pretty_text



print(f'Config:\n{cfg.pretty_text}')

In [None]:
if MyConfig.make_training:
    datasets = [build_dataset(cfg.data.train)]
    model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
    model.CLASSES = datasets[0].CLASSES

    mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir))
    train_detector(model, datasets, cfg, distributed=False, validate=True, meta=meta)

Inference with saved model weights:

In [None]:
if MyConfig.make_inference:
    checkpoint_file = '../../input/crcnn-human-silhouettes-in-fores/model_output/epoch_50.pth'
    model = init_detector(cfg, checkpoint_file, device='cuda:0')

In [None]:
val_part[val_part.count_region > 0]

In [None]:
def visualize_predict(img_path, thrh=0.2, circles=True, put_score=True):

    img = mmcv.imread(img_path)
    result = inference_detector(model, img)

    coords_arr = result[0]
    
    for item in coords_arr:
        if item[4] > thrh:
            cx = int((item[2] + item[0]) // 2)
            cy = int((item[3] + item[1]) // 2)
            r = int(item[3] - item[1]) // 2

            w = int((item[2] - item[0]))
            h = int((item[3] - item[1]))

            x1 = int(item[0])
            y1 = int(item[1])
            
            if circles:
                img = cv2.circle(img, (cx, cy), r, (255,0,0), 2)
            else:
                img = cv2.rectangle(img, (x1, y1), (x1+w, y1+h), (255,0,0), 2)
            if put_score:
                img = cv2.putText(img, str(item[4]), (int(item[0]), int(item[1])), cv2.FONT_HERSHEY_SIMPLEX, 1.5, 255, 2)

    plt.figure(figsize=(30, 20))
    plt.imshow(img)

In [None]:
# visualize_predict('../../input/detection-of-human-silhouettes-in-forest-images/test_dataset_test/test/890.JPG', thrh = 0.45, circles = True, put_score=True)
visualize_predict('../../input/detection-of-human-silhouettes-in-forest-images/train_dataset_train/train/3835.JPG', thrh = 0.45, circles = True, put_score=True)

In [None]:
sample_subm = pd.read_csv('../../input/detection-of-human-silhouettes-in-forest-images/omsk/sample_solution.csv')
sample_subm['region_shape'] = sample_subm['region_shape'].astype('object')
sample_subm.head()

In [None]:
%%time


test_image_dir = '../../input/detection-of-human-silhouettes-in-forest-images/test_dataset_test/test'
boxes_count_all = 0

thrh = 0.3

mmdet_boxes_pred = {'img': [], 'Xmin': [], 'Ymin':[], 'Xmax':[], 'Ymax':[], 'Conf':[], 'img_h': [], 'img_w': []}

for i, row in tqdm(sample_subm.iterrows(), total=sample_subm.shape[0]):
    img = mmcv.imread(f'{test_image_dir}/{row.ID_img}')
    result = inference_detector(model, img)
    
    predict_region_shape = ''
    cxs, cys, rs = [], [], []
    
    for box in result[0]:
        if box[4] > thrh:
            mmdet_boxes_pred['img'].append(row.ID_img)
            mmdet_boxes_pred['Xmin'].append(box[0])
            mmdet_boxes_pred['Ymin'].append(box[1])
            mmdet_boxes_pred['Xmax'].append(box[2])
            mmdet_boxes_pred['Ymax'].append(box[3])
            mmdet_boxes_pred['Conf'].append(box[4])
            mmdet_boxes_pred['img_h'].append(img.shape[0])
            mmdet_boxes_pred['img_w'].append(img.shape[1])

            cx = int((box[2] + box[0]) // 2)
            cy = int((box[3] + box[1]) // 2)
            r = int(box[3] - box[1]) // 2 #             r = int((int(box[3] - box[1]) // 2) // 1.5)
            
            boxes_count_all += 1
            cxs.append(cx)
            cys.append(cy)
            rs.append(r)
    if len(rs) > 0:
        circles = sorted(zip(cxs, cys,rs))
        arr_coords = [f'{{"cx":{cx}, "cy":{cy}, "r":{r}}}' for (cx, cy, rs) in circles]
        sample_subm.at[i, 'region_shape'] = arr_coords
    else:
        sample_subm.at[i, 'region_shape'] = 0

In [None]:
test_predicts_crcnn = pd.DataFrame({'Xmin': mmdet_boxes_pred['Xmin'], 'Ymin': mmdet_boxes_pred['Ymin'],\
                  'Xmax': mmdet_boxes_pred['Xmax'], 'Ymax': mmdet_boxes_pred['Ymax'],\
                  'ID_image': mmdet_boxes_pred['img'], 'Conf': mmdet_boxes_pred['Conf'], 'h':mmdet_boxes_pred['img_h'], 'w': mmdet_boxes_pred['img_w']})


test_predicts_crcnn['Ymin'] = test_predicts_crcnn['Ymin'] / test_predicts_crcnn['h']
test_predicts_crcnn['Ymax'] = test_predicts_crcnn['Ymax'] / test_predicts_crcnn['h']
test_predicts_crcnn['Xmin'] = test_predicts_crcnn['Xmin'] / test_predicts_crcnn['w']
test_predicts_crcnn['Xmax'] = test_predicts_crcnn['Xmax'] / test_predicts_crcnn['w']
test_predicts_crcnn['Ymin'] = test_predicts_crcnn['Ymin']
test_predicts_crcnn['Ymax'] = test_predicts_crcnn['Ymax']
test_predicts_crcnn['Xmin'] = test_predicts_crcnn['Xmin']
test_predicts_crcnn['Xmax'] = test_predicts_crcnn['Xmax']
test_predicts_crcnn['label'] = 0

In [None]:
test_predicts_crcnn = test_predicts_crcnn[test_predicts_crcnn.Conf > 0.55]
len(test_predicts_crcnn['ID_image'].unique()), len(test_predicts_crcnn)

In [None]:
len(sample_subm[sample_subm.region_shape != 0]), boxes_count_all

In [None]:
sample_subm[sample_subm.region_shape != 0].head(15)

In [None]:
sample_subm.to_csv('../crcnn_60ep_0_3.csv', index = False)

In [None]:
visualize_predict('../../input/detection-of-human-silhouettes-in-forest-images/test_dataset_test/test/337.jpg', thrh = 0.3, circles = True, put_score=True)

# YOLOv5

In [None]:
%cd ..

In [None]:
def make_dataset_file():
    category_names = ['human']
    category_names = {k: v for v, k in enumerate(category_names)}
    names_str = " \n ".join([f'{item[1]}: {item[0]}' for item in list(zip(category_names.keys(), category_names.values()))])
    yaml_content = f"""
    train: /kaggle/working/data/train/images
    val: /kaggle/working/data/val/images

    # number of classes
    nc: 1

    # class names
    names: \n {names_str}
    """

    with open('./data/dataset.yaml', 'w') as f:
        f.write(yaml_content)
        
make_dataset_file()
!cat ./data/dataset.yaml

In [None]:
!git clone https://github.com/ultralytics/yolov5.git
!pip install -r ./yolov5/requirements.txt

In [None]:
# YOLOv5l YOLOv5x YOLOv5s6 YOLOv5m6 YOLOv5l6 YOLOv5x6
# !cd yolov5 && python train.py --img 1280 --batch 16 --epochs 30 --data ../data/dataset.yaml --weights yolov5s.pt
# !cd yolov5 && python -m torch.distributed.run --nproc_per_node 2 train.py --img 1920 --batch 4 --epochs 90 --data ../data/dataset.yaml --weights yolov5l.pt --hyp hyp.scratch-med.yaml --device 0,1 --cache disk

# !cd yolov5 && python train.py --img 1920 --batch 16 --epochs 20 --data ../data/dataset.yaml --weights yolov5s.pt --hyp hyp.scratch-med.yaml

In [None]:
class YOLOv5(Enum):
    yolov5s = 'yolov5s.pt'
    yolov5m = 'yolov5m.pt'
    yolov5l = 'yolov5l.pt'

class Yolo_hyp(Enum):
    low = 'hyp.scratch-low.yaml'
    med = 'hyp.scratch-med.yaml'
    high = 'hyp.scratch-high.yaml'
    
class YoloTrainingConfig:
    resolution = 1920
    batch_size = 6
    epochs = 90
    dataset_path = '../data/dataset.yaml'
    weights = YOLOv5.yolov5m.value
    hyp = Yolo_hyp.med.value

In [None]:
if MyConfig.make_training:
    !cd yolov5 && python -m torch.distributed.run --nproc_per_node 2 train.py --img $YoloTrainingConfig.resolution --batch $YoloTrainingConfig.batch_size \
    --epochs $YoloTrainingConfig.epochs --data ../data/dataset.yaml --weights $YoloTrainingConfig.weights --hyp $YoloTrainingConfig.hyp --device 0,1 --cache disk

Yolov5 inference config

In [None]:
class YoloInferenceConfig:
        
    yolov5_checkpoint_path = '../../input/detection-of-human-silhouettes-in-forest-weights/best_m_1920_90ep.pt'
    resolution = 1920
    model_arch = 'yolov5m'
    test_images_path = '../input/detection-of-human-silhouettes-in-forest-images/test_dataset_test/test'
    test_csv_path = ''
    sample_subm_path = '../input/detection-of-human-silhouettes-in-forest-images/omsk/sample_solution.csv'
    iou = 0.45
    thresh = 0.25
    agnostic = '--agnostic'
    augment = '--augment'
    half = '--half' # --half
    from_saved_weights = True
    source_path = f'../{MyConfig.test_dir}'

In [None]:
# last run
if MyConfig.make_training:
    exp_last = sorted(next(os.walk('./yolov5/runs/train/'))[1])[-1]
    !echo $exp_last

In [None]:
if MyConfig.make_inference:
    !cd yolov5 && python detect.py $YoloInferenceConfig.augment --img $YoloInferenceConfig.resolution --conf $YoloInferenceConfig.thresh \
    --iou $YoloInferenceConfig.iou --source $YoloInferenceConfig.source_path --weights $YoloInferenceConfig.yolov5_checkpoint_path --save-txt --save-conf $YoloInferenceConfig.half

In [None]:
if MyConfig.make_inference:
    exp_detect_last = sorted(next(os.walk('./yolov5/runs/detect/'))[1])[-1]

In [None]:
# class YoloInferenceConfig():
    
#     def __init__(self):
#         self._agnostic  = True
    
#     @property
#     def agnostic(self):
#         if self._agnostic:
#             return '--agnostic'
#         else:
#             return ''
        
#     @agnostic.setter
#     def agnostic(self, value):
#         self._agnostic = value

# YoloConfig = YoloInferenceConfig()
# YoloConfig.agnostic

In [None]:
def get_solution_labels_df(path_to_txt_folder, sample_subm):
    simple_solution = []
    for detection_file in os.listdir(path_to_txt_folder):
        img_name = sample_subm[sample_subm['ID_img'].str.contains(detection_file.split('.')[0])]['ID_img'].iloc[0]
        with open(path_to_txt_folder + detection_file, 'r') as f:
            data = f.read()
            data = [i for i in data.split('\n') if i != '']
        for line in data:
            val = [float(i) for i in line.split()]
            cls, xywh, conf = val[0], val[1:5], val[5]
            center_x, center_y, width, height = xywh
            xmin = center_x - (width / 2)
            xmax = center_x + (width / 2)
            ymin = center_y - (height / 2)
            ymax = center_y + (height / 2)
            simple_solution.append([img_name, cls, conf, xmin, xmax, ymin, ymax, center_x, center_y, width, height])
    return simple_solution

In [None]:
test_predicts = get_solution_labels_df(f'./yolov5/runs/detect/{exp_detect_last}/labels/', MyConfig.sample_subm)
test_predicts = pd.DataFrame(test_predicts, columns=['ID_image', 'label', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax', 'center_x', 'center_y', 'width', 'height'])

In [None]:
test_predicts = test_predicts[test_predicts.Conf > 0.44]
test_predicts.head(5)

In [None]:
len(test_predicts), len(test_predicts['ID_image'].unique())

In [None]:
detect_img_path = f'./yolov5/runs/detect/{exp_detect_last}'
uniq_imgs_list = test_predicts['ID_image'].unique()
img_path = f'{detect_img_path}/{uniq_imgs_list[5]}'
img = cv2.imread(img_path)
plt.figure(figsize=(30, 20))
plt.imshow(img)

## WBF

In [None]:
# https://github.com/ZFTurbo/Weighted-Boxes-Fusion

!pip install ensemble-boxes

In [None]:
# from ensemble_boxes import *

# boxes_list = test_predicts[['XMin', 'YMin', 'XMax', 'YMax']].values

# scores_list = test_predicts['Conf'].values
# labels_list = test_predicts['label'].values
# # weights = [2, 1]

# iou_thr = 0.5
# skip_box_thr = 0.0001
# sigma = 0.1
# thresh = 0.44

# # Merge boxes for single model predictions
# boxes, scores, labels = weighted_boxes_fusion([boxes_list], [scores_list], [labels_list], weights=None, iou_thr=iou_thr)

# boxes, scores, labels = nms(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr)
# boxes, scores, labels = soft_nms(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, sigma=sigma, thresh=skip_box_thr)
# boxes, scores, labels = non_maximum_weighted(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
# boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)

In [None]:
# def show_image(im):
#     plt.imshow(im.astype(np.uint8))
    
# def gen_color_list(model_num, labels_num):
#     color_list = np.zeros((model_num, labels_num, 3))
#     colors_to_use = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (0, 255, 255), (255, 0, 255), (255, 255, 0), (0, 0, 0)]
#     total = 0
#     for i in range(model_num):
#         for j in range(labels_num):
#             color_list[i, j, :] = colors_to_use[total]
#             total = (total + 1) % len(colors_to_use)
#     return color_list

# def show_boxes(boxes_list, scores_list, labels_list, image_size=800):
#     thickness = 5
#     color_list = gen_color_list(len(boxes_list), len(np.unique(labels_list)))
#     image = np.zeros((image_size, image_size, 3), dtype=np.uint8)
#     image[...] = 255
#     for i in range(len(boxes_list)):
#         for j in range(len(boxes_list[i])):
#             x1 = int(image_size * boxes_list[i][j][0])
#             y1 = int(image_size * boxes_list[i][j][1])
#             x2 = int(image_size * boxes_list[i][j][2])
#             y2 = int(image_size * boxes_list[i][j][3])
#             lbl = labels_list[i][j]
#             cv2.rectangle(image, (x1, y1), (x2, y2), color_list[i][lbl], int(thickness * scores_list[i][j]))
#     show_image(image)

In [None]:
#  show_boxes([boxes], [scores], [labels.astype(np.int32)])

#### WBF

In [None]:
from ensemble_boxes import *

def wbf_process(df, coord_cols: List[str]):
    
    iou_thr = 0.2 # 0.2: 0.59850
    skip_box_thr = 0.44 # 0.44
#     sigma = 0.1
    wbf_results = {'boxes':[], 'scores':[], 'labels':[], 'ID_image':[]}
    
    for name, group in df.groupby(['ID_image']):
        boxes_list = df.loc[group.index, coord_cols].values
        scores_list = df.loc[group.index, 'Conf'].values
        labels_list = df.loc[group.index, 'label'].values
        boxes, scores, labels = weighted_boxes_fusion([boxes_list], [scores_list], [labels_list], weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
        wbf_results['boxes'].append(boxes)
        wbf_results['scores'].append(scores)
        wbf_results['labels'].append(labels)
        wbf_results['ID_image'].append(df.loc[group.index, 'ID_image'].values[0])

        if len(df.loc[group.index, 'ID_image']) != len(boxes):
            print(len(boxes))

    boxes_after_wbf = []
    lmgs_after_wbf = []
    scores_after_wbf = []

    for img_predict in zip(wbf_results['boxes'], wbf_results['ID_image'], wbf_results['scores']):
        for item in zip(img_predict[0], img_predict[2]):
            boxes_after_wbf.append(item[0])
            lmgs_after_wbf.append(img_predict[1])
            scores_after_wbf.append(item[1])

    test_predicts_ = pd.DataFrame({'Xmin': np.array(boxes_after_wbf)[:, 0], 'Ymin': np.array(boxes_after_wbf)[:, 1],\
                  'Xmax': np.array(boxes_after_wbf)[:, 2], 'Ymax': np.array(boxes_after_wbf)[:, 3],\
                  'ID_image': lmgs_after_wbf, 'Conf': scores_after_wbf})
    return test_predicts_

test_predicts_ = wbf_process(test_predicts, ['XMin', 'YMin', 'XMax', 'YMax'])
test_predicts_crcnn_wbf = wbf_process(test_predicts_crcnn, ['Xmin', 'Ymin', 'Xmax', 'Ymax'])

In [None]:
len(test_predicts_crcnn_wbf), len(test_predicts_crcnn_wbf['ID_image'].unique())

In [None]:
len(test_predicts_), len(test_predicts_['ID_image'].unique())

In [None]:
def add_c_h_w(df: pd.DataFrame) -> None:
    df['center_x'] = (df['Xmin'] + df['Xmax']) / 2
    df['center_y'] = (df['Ymin'] + df['Ymax']) / 2
    df['r'] = (df['Xmax'] - df['Xmin']) / 2
    df['width'] = df['Xmax'] - df['Xmin']
    df['height'] = df['Ymax'] - df['Ymin']
    
add_c_h_w(test_predicts_)
add_c_h_w(test_predicts_crcnn_wbf)

In [None]:
def denorm_coords(df: pd.DataFrame) -> None:
    for i, row in df.iterrows():
        img = cv2.imread(f'{MyConfig.test_dir}/{row["ID_image"]}')
        h, w = img.shape[0], img.shape[1]
        center_x = row['center_x'] * w
        center_y = row['center_y'] * h
        width = row['width'] * w
        height = row['height'] * h
        r = width / 2
        
        df.at[i,'cx'] = int(center_x)
        df.at[i,'cy'] = int(center_y)
        df.at[i,'r'] = int(r)
    
    df['cx'] = df['cx'].astype(int)
    df['cy'] = df['cy'].astype(int)
    df['r'] = df['r'].astype(int)
        
denorm_coords(test_predicts_)
denorm_coords(test_predicts_crcnn_wbf)

In [None]:
def make_subm_format(df: pd.DataFrame, sample_subm) -> None:
    for i, row in MyConfig.sample_subm.iterrows():
        if len(df[df.ID_image == row["ID_img"]]) > 0:
            circle_coords = [f'{{"cx":{item[1]["cx"]}, "cy":{item[1]["cy"]}, "r":{int(item[1]["r"])}}}' for item in df[df.ID_image == row['ID_img']].sort_values(by=['cx', 'cy']).iterrows()]
            sample_subm.at[i, 'region_shape'] = circle_coords
        else:
            sample_subm.at[i, 'region_shape'] = 0
            
make_subm_format(test_predicts_, MyConfig.sample_subm)

crnn_sample_sumb = MyConfig.sample_subm.copy()

make_subm_format(test_predicts_crcnn_wbf, crnn_sample_sumb)

In [None]:
MyConfig.sample_subm[MyConfig.sample_subm.region_shape != 0].head(20)

In [None]:
crnn_sample_sumb[crnn_sample_sumb.region_shape != 0].head(20)

In [None]:
def vis_img_with_pred(path: str, sample_subm: pd.DataFrame):
    
    img = cv2.imread(path)
    coords_str = str(sample_subm.iloc[index]['region_shape']).replace("'", "")
    circle_coords = json.loads(coords_str)
    yolo_coords_f = ''
    for item in circle_coords:
        cx = item['cx']
        cy = item['cy']
        r = int(item['r'] // 2) # old: 1.5  r = item['r']
        
        print(cx, cy, r)
   
        img = cv2.circle(img, (cx, cy), r, (255,0,0), 2)

    plt.figure(figsize=(30, 20))
    plt.imshow(img)
    
index = 503
img_path = f'{MyConfig.test_dir}/{MyConfig.sample_subm.iloc[index]["ID_img"]}'
vis_img_with_pred(img_path, sample_subm)

In [220]:
csv_subm_file_name = './yolov5m_1920_90ep_0_44_wbf.csv'
csv_subm_file_name_crnn = './crcnn_wbf.csv'
MyConfig.sample_subm.to_csv(csv_subm_file_name, index=False)
crnn_sample_sumb.to_csv(csv_subm_file_name_crnn, index=False)

display(FileLink(csv_subm_file_name))
FileLink(csv_subm_file_name_crnn)