## Create cutmix augmentation

### Finally we get such images:

In [None]:
import cv2
import matplotlib.pyplot as plt


example1 = cv2.imread("../input/cutmix-examples/1c60d7a7695d_c1a8710f13a5_b2a7f3d06a50_caa06f9a4057_cutmix.png", cv2.IMREAD_COLOR)
example2 = cv2.imread("../input/cutmix-examples/3b70c0fef171_6b2f2fab222f_a28407ce196e_559904fcd4a2_cutmix.png", cv2.IMREAD_COLOR)

fig, ax = plt.subplots(1,2, figsize=(17, 17))
ax[0].imshow(example1)
ax[1].imshow(example2)
fig.tight_layout()

### You can mix different classes or cutmix only one class. 

In [None]:
# imports
from typing import List

import pandas
import pandas as pd
import random
import numpy as np
import cv2
import os
import torch
import shutil

from pathlib import Path
from random import shuffle

from pandas import DataFrame
from tqdm import tqdm

In [None]:
# constants
IMAGE_WIDTH = 704
IMAGE_HEIGHT = 520
CLASSES = ['astro', 'cort', 'shsy5y']
MIN_SIZES = [150, 75, 75]

In [None]:
# helping functions
def get_all_files_in_folder(folder: Path, types: List) -> List[Path]:
    files_grabbed = []
    for t in types:
        files_grabbed.extend(folder.rglob(t))
    files_grabbed = sorted(files_grabbed, key=lambda x: x)
    return files_grabbed


def seed_everything(seed: int) -> None:
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


def recreate_folder(path):
    output_dir = Path(path)
    if output_dir.exists() and output_dir.is_dir():
        shutil.rmtree(output_dir)
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
# get points for every part (4 parts)
def get_points_for_parts():
    result = []
    all_points = list(range(IMAGE_WIDTH * IMAGE_HEIGHT))

    start_points = [0, IMAGE_WIDTH // 2, len(all_points) // 2, len(all_points) // 2 + IMAGE_WIDTH // 2]
    lenghts = [len(all_points) // 2, len(all_points) // 2, len(all_points), len(all_points)]

    for (s, lenght) in zip(start_points, lenghts):
        points = []
        while s < lenght:
            for j in range(IMAGE_WIDTH // 2):
                points.append(s)
                s += 1

            s += IMAGE_WIDTH // 2
        result.append(points)

    return result

In [None]:
def calculate_mask_for_current_part(annot, points, label_index) -> str:
    start_points = [int(x) for x in annot.split()[::2]]
    lenghts = [int(x) for x in annot.split()[1::2]]

    annot_result = ""
    size = 0

    for i, start_point in enumerate(start_points):
        mask_line = []
        for j in range(lenghts[i]):
            mask_line.append(start_point + j)

        mask_line = sorted(list(set(mask_line) & set(points)))

        if mask_line:
            annot_result += str(mask_line[0]) + " " + str(len(mask_line)) + " "
            size += len(mask_line)

    annot_result = annot_result[:-1]

    if size < MIN_SIZES[label_index]:
        annot_result = ""

    # delete samples on borders
    # if annot_result != "" and annot_result != annot:
    #     annot_result = ""

    return annot_result

In [None]:
def create_augmented_image(df: DataFrame, input_images_path: str, output_images_path: str, points_parts) -> List:
    all_images_ids = list(set(list(df["id"])))
    shuffle(all_images_ids)

    cutmix_images = []
    cutmix_images_ids = []
    annotations = {}
    for i in range(4):
        image_id = random.choice(all_images_ids)
        image = cv2.imread(str(Path(input_images_path).joinpath(str(image_id))) + ".png", cv2.IMREAD_COLOR)
        cutmix_images_ids.append(image_id)
        cutmix_images.append(image)

        image_annotation = list(df[df["id"] == image_id]["annotation"])
        label = list(df[df["id"] == image_id]["cell_type"])[0]
        for annot in image_annotation:
            mask_annot = calculate_mask_for_current_part(annot, points_parts[i], CLASSES.index(label))
            if mask_annot:
                key = label
                value = mask_annot
                annotations[key] = annotations[key] + [value] if key in annotations else annotations.setdefault(
                    key,
                    [value])

    image = np.vstack((np.hstack((cutmix_images[0][:IMAGE_HEIGHT // 2, :IMAGE_WIDTH // 2],
                                  cutmix_images[1][:IMAGE_HEIGHT // 2, IMAGE_WIDTH // 2:])),
                       np.hstack((cutmix_images[2][IMAGE_HEIGHT // 2:, :IMAGE_WIDTH // 2],
                                  cutmix_images[3][IMAGE_HEIGHT // 2:, IMAGE_WIDTH // 2:]))))

    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    cv2.imwrite(str(Path(output_images_path).joinpath("images").joinpath("_".join(cutmix_images_ids) + "_cutmix.png")),
                image)

    annot_res = []
    for key, value in annotations.items():
        for an in value:
            annot_res.append(
                ["_".join(cutmix_images_ids) + "_cutmix", an, IMAGE_WIDTH, IMAGE_HEIGHT, key, '', '', '',
                 ''])

    return annot_res

In [None]:
# main
seed_everything(42)
input_images_path = "../input/sartorius-cell-instance-segmentation/train"
output_images_path = "/kaggle/working/cutmix"
recreate_folder(output_images_path)
recreate_folder(output_images_path + "/images")

df = pd.read_csv("../input/sartorius-cell-instance-segmentation/train.csv")
# df = df[df["cell_type"] == "cort"] #cutmix only one class

points_parts = get_points_for_parts()

annot_result = []
count = 3 # count of cutmix images
for i in tqdm(range(count)):
    annot = create_augmented_image(df, input_images_path, output_images_path, points_parts)
    annot_result.extend(annot)

# create annotations
df = pd.DataFrame(annot_result,
                  columns=['id', 'annotation', 'width', 'height', 'cell_type', 'plate_time', 'sample_date',
                           'sample_id', 'elapsed_timedelta'])
df.to_csv("/kaggle/working/cutmix/" + "images_aug.csv", index=False)
print("Done!")

### If you find it useful please upvote 👍