## Подготовка данных для обучения YOLO

Возьмем только изображения с людьми и будем их аугментировать и вырезать фрагменты из них так, чтобы в фрагменте оставался как минимум один человек.

Создадим для каждогого исходного изображения по 200 тренировочных и 20 валидационных вариантов изображений.

Размер изображения приведем к 320x240 (для ускорения обучения).

*При планировании размеров изображения не был учтен тот факт, что YOLO требуются изображения с размерами кратными 64.*

In [1]:
import json
from pathlib import Path

import cv2
import pandas as pd
import albumentations as A


pd.set_option('display.max_colwidth', 160)

df_src = pd.read_csv('train.csv')
df_src['count_region'] = df_src['count_region'].values.astype(int)

df_src[df_src.count_region > 0].head(10)

Unnamed: 0,ID_img,count_region,region_shape
8,3444.jpg,1,"['{""cx"":2259,""cy"":391,""r"":64}']"
217,3653.JPG,1,"['{""cx"":2719,""cy"":2097,""r"":75}']"
254,3690.JPG,1,"['{""cx"":2914,""cy"":1693,""r"":90}']"
399,3835.JPG,2,"['{""cx"":2549,""cy"":2329,""r"":80}', '{""cx"":2437,""cy"":2321,""r"":74}']"
511,3947.JPG,1,"['{""cx"":3130,""cy"":1370,""r"":134}']"
717,4153.jpg,1,"['{""cx"":3731,""cy"":3049,""r"":75}']"
833,4269.JPG,6,"['{""cx"":3575,""cy"":1657,""r"":98}', '{""cx"":3284,""cy"":1706,""r"":81}', '{""cx"":4260,""cy"":1611,""r"":97}', '{""cx"":3350,""cy"":1632,""r"":60}', '{""cx"":3475,""cy"":1586,""r"":7..."
855,4291.JPG,2,"['{""cx"":3467,""cy"":1740,""r"":77}', '{""cx"":3587,""cy"":1785,""r"":79}']"
1029,4465.JPG,2,"['{""cx"":2735,""cy"":1711,""r"":102}', '{""cx"":2430,""cy"":1926,""r"":107}']"
1168,4604.JPG,2,"['{""cx"":3276,""cy"":1619,""r"":130}', '{""cx"":3114,""cy"":1611,""r"":133}']"


In [2]:
df_src.count_region.value_counts()

0     5104
2       31
1       16
4        3
5        3
3        3
6        1
10       1
Name: count_region, dtype: int64

In [3]:
%%time

trans = A.Compose([
    A.ShiftScaleRotate(shift_limit=0.05,
                       scale_limit=0.10,
                       rotate_limit=7.0,
                       p=0.3),
    A.Perspective(p=0.3),
    A.RandomRotate90(p=1.0),
    A.RandomResizedCrop(240, 320, scale=(0.5, 1.0)),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(hue=0.05, p=1),
], keypoint_params=A.KeypointParams(format='xys', remove_invisible=True))


f_cnt = 0
for _, row in df_src[df_src.count_region > 0].iterrows():
    img = cv2.imread(f'train/{row["ID_img"]}')[..., ::-1]
    region_list = sorted(json.loads(row.region_shape.replace("'", "")),
                         key=lambda x: (int(x['cx']), int(x['cy'])))
    keypoints = []
    for region in region_list:
        keypoints.append((int(region['cx']),
                          int(region['cy']),
                          int(region['r'])))
    cnt = 0
    while cnt < 200:
        img_trans = trans(image=img, keypoints=keypoints)
        labels = img_trans['keypoints']
        if len(labels):
            cnt += 1
            labels = [f'0 {x[0]/320:g} {x[1]/240:g} {2 * x[2]/320:g} {2 * x[2]/240:g}' for x in labels]
            image = img_trans['image'][..., ::-1]
            cv2.imwrite(f"omsk_320/images/train/{f_cnt:05d}.png", image)
            with open(f"omsk_320/labels/train/{f_cnt:05d}.txt", 'w') as file:
                file.writelines("\n".join(labels))
            f_cnt += 1

trans = A.Compose([
    A.RandomRotate90(p=0.5),
    A.RandomResizedCrop(240, 320, scale=(0.8, 1.0)),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(hue=0.05, p=1),
], keypoint_params=A.KeypointParams(format='xys', remove_invisible=True))


for _, row in df_src[df_src.count_region > 0].iterrows():
    img = cv2.imread(f'train/{row["ID_img"]}')[..., ::-1]
    region_list = sorted(json.loads(row.region_shape.replace("'", "")),
                         key=lambda x: (int(x['cx']), int(x['cy'])))

    keypoints = []
    for region in region_list:
        keypoints.append((int(region['cx']),
                          int(region['cy']),
                          int(region['r'])))
    cnt = 0
    while cnt < 20:
        img_trans = trans(image=img, keypoints=keypoints)
        labels = img_trans['keypoints']
        if len(labels):
            cnt += 1
            labels = [f'0 {x[0]/320:g} {x[1]/240:g} {2 * x[2]/320:g} {2 * x[2]/240:g}' for x in labels]
            image = img_trans['image'][..., ::-1]
            cv2.imwrite(f"omsk_320/images/test/{f_cnt:05d}.png", image)
            with open(f"omsk_320/labels/test/{f_cnt:05d}.txt", 'w') as file:
                file.writelines("\n".join(labels))
            f_cnt += 1

CPU times: user 55min 26s, sys: 23min 37s, total: 1h 19min 4s
Wall time: 28min 30s
