In [1]:
import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
import os

  data = yaml.load(f.read()) or {}


Функция, производящая преобразования:

In [2]:
def gen_background(dir_name, x_max, y_max):
    backgrounds = os.listdir(dir_name)
    bg_num = random.randint(0, len(backgrounds) - 1)
    bg_name = os.path.join(dir_name, backgrounds[bg_num])
    
    background = cv2.imread(bg_name)
    background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
    background = cv2.resize(background, (x_max, y_max))
    background = A.Rotate()(image=background)['image']
    
    return background

def transform_image(input_name, output_name):
    image = cv2.imread(input_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
    y_max, x_max, _ = image.shape
    
    background = gen_background('backgrounds', x_max, y_max)
    
    pts1 = np.float32([[0, 0], [x_max, 0], [0, y_max], [x_max, y_max]])
    
    x_left_k = np.random.uniform(0, 0.2)
    x_right_k = np.random.uniform(0.8, 1)
    bg_left = 0
    bg_right = y_max
    
    # перспективное искажение
    if random.random() < 0.75: # вид снизу вверх
        y_top_left_k = np.random.uniform(0, 0.2)
        y_top_right_k = np.random.uniform(0, 0.2)
        pts2 = np.float32([[x_max * x_left_k, y_max * y_top_left_k], [x_max * x_right_k, y_max * y_top_right_k],
                           [0, y_max], [x_max, y_max]])
        bg_left = int(y_max * max(y_top_left_k, y_top_right_k))
    else: # вид сверху вниз
        y_bottom_left_k = np.random.uniform(0.8, 1)
        y_bottom_right_k = np.random.uniform(0.8, 1)
        pts2 = np.float32([[0, 0], [x_max, 0],
                           [x_max * x_left_k, y_max * y_bottom_left_k], [x_max * x_right_k, y_max * y_bottom_right_k]])
        bg_right = int(y_max * min(y_bottom_left_k, y_bottom_right_k))
    
    transform_matrix = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(image, transform_matrix, (x_max, y_max), borderMode=cv2.BORDER_TRANSPARENT)
    
    border_width = (x_max * np.random.uniform(0.1, 0.2, size=4)).astype(int)
    dst = cv2.copyMakeBorder(dst, *border_width.tolist(), cv2.BORDER_CONSTANT, cv2.BORDER_TRANSPARENT)
    dst = cv2.resize(dst, (x_max, y_max))
    
    # добавление фона
    background = cv2.warpPerspective(background, transform_matrix, (x_max, y_max), borderMode=cv2.BORDER_REFLECT101)
    background = background[bg_left:bg_right, int(x_max * x_left_k):int(x_max * x_right_k)]
    background = cv2.resize(background, (x_max, y_max))

    mask = dst[:, :, 3] == 255
    background[mask] = 0
    dst = cv2.add(dst, background)
    
    # освещение
    b = cv2.imread('backgrounds\\table_1.jpg')
    b = cv2.cvtColor(b, cv2.COLOR_BGR2BGRA)
    b = cv2.resize(b, (x_max, y_max))
    scale = random.randint(50, 500) * 2 + 1
    lightning = cv2.GaussianBlur(dst, (scale, scale), 0)
    dst = cv2.addWeighted(dst, 1, lightning, 0.3, 0)
    
    # размытие
    if random.random() < 0.8:
        scale_x = random.randint(0, 15) * 2 + 1
        scale_y = random.randint(0, 15) * 2 + 1
        dst = cv2.GaussianBlur(dst, (scale_x, scale_y), 0)
    
    # яркость
    dst = A.RandomBrightnessContrast(brightness_limit=(-0.3, 0), contrast_limit=0, p=0.7)(image=dst)['image']
    dst = A.RandomToneCurve(p=1.)(image=dst[:, :, 0:3])['image']

    cv2.imwrite(output_name, dst)

Применение ко всем изображениям:

In [3]:
for image_num, image_name in enumerate(os.listdir('images')):
    for i in range(10):
        transform_image(os.path.join('images', image_name), os.path.join('aug_images', f'{image_num + 1}_{i + 1}.png'))