In [1]:
# Загружаем модули. Добавлены параметры, чтобы не засорять ноутбук сообщениями, не имеющими никакого отношения к решению
!pip install segmentation_models --root-user-action=ignore > /dev/null
!pip install keras-unet-collection --root-user-action=ignore > /dev/null
!pip install albumentations --root-user-action=ignore > /dev/null

In [2]:
# Подключаем модули
import os
from random import seed, shuffle
import cv2
import keras.backend as K
import segmentation_models as sm
import tensorflow as tf
from albumentations import Compose, ShiftScaleRotate, HueSaturationValue, RandomGamma, Sharpen, Blur, HorizontalFlip, VerticalFlip, RandomBrightnessContrast, CLAHE, ImageCompression, MultiplicativeNoise
from keras.callbacks import EarlyStopping
from numpy import array, zeros, expand_dims, uint8
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from tqdm.keras import TqdmCallback
from keras.losses import binary_crossentropy
from pathlib import Path
import gc
import shutil

Segmentation Models: using `keras` framework.


In [3]:
# Глобальные настройки решения
IMG_HEIGHT = 512
IMG_WIDTH  = 512

IMG_CHANNELS = 3

SEED = 500

THRESHOLD = 220

In [4]:
# Функция потери - DiceLoss + BinaryCrossEntropy
def diceLoss(targetsPure, inputsPure):
    targets = tf.cast(K.flatten(targetsPure), tf.float32)
    inputs = tf.cast(K.flatten(inputsPure), tf.float32)

    intersection = K.sum(targets * inputs)
    dice = (2 * intersection + 1e-6) / (K.sum(targets) + K.sum(inputs) + 1e-6)
    return 1 - dice


def bceDiceLoss(y_true, y_pred):
    return K.mean(binary_crossentropy(y_true, y_pred)) + diceLoss(y_true, y_pred)

In [5]:
# Все настройки обучения и работы с моделями
MODEL_TAG = "resnext101"

classes = \
{
    "wall":
    {
        "classNumber": 1,
        'model': sm.Unet("resnext101", classes = 1, activation = 'sigmoid'),
        'train': True,
        "continue-train": True,
        "max-train-images": 4000,

        "optimizer": tf.keras.optimizers.Adam(learning_rate = 0.001),
        "loss": bceDiceLoss,
        "metrics": [sm.metrics.iou_score],
        "saved-model": "models-" + MODEL_TAG + "-wall/unet.ckpt",
        "batch-size": 4,
        "epochs": 10,
        "callbacks": [EarlyStopping(monitor = 'val_iou_score', mode = 'max', patience = 4, verbose = 0, restore_best_weights = True)],

        "max-percent": 0.4,
        
        "transforms": Compose(
        [  
            RandomGamma(gamma_limit = (80, 120), p = 0.3),
            Sharpen(p = 0.15),
            Blur(blur_limit = 3, p = 0.2),
            HorizontalFlip(p = 0.5),
            VerticalFlip(p = 0.5),
            RandomBrightnessContrast(p = 0.25),
            CLAHE(p = 0.15),
            ShiftScaleRotate(shift_limit = 0, scale_limit=0.3, rotate_limit=45, interpolation = 1, p = 0.3),
            ImageCompression(quality_lower = 60, quality_upper = 100, p = 0.25),
            MultiplicativeNoise(p=0.1)
        ])
    },

    "window":
    {
        "classNumber": 2,
        'model': sm.Unet("resnext101", classes = 1, activation = 'sigmoid'),
        'train': True,
        "continue-train": True,
        "max-train-images": 3000,

        "optimizer": tf.keras.optimizers.Adam(learning_rate = 0.001),
        "loss": bceDiceLoss,
        "metrics": [sm.metrics.iou_score],
        "saved-model": "models-" + MODEL_TAG + "-window/unet.ckpt",
        "batch-size": 4,
        "epochs": 10,
        "callbacks": [EarlyStopping(monitor = 'val_iou_score', mode = 'max', patience = 4, verbose = 0, restore_best_weights = True)],

        "max-percent": 0.1,
        
        "transforms": Compose(
        [  
            RandomGamma(gamma_limit = (80, 120), p = 0.3),
            Sharpen(p = 0.15),
            Blur(blur_limit = 3, p = 0.2),
            HorizontalFlip(p = 0.5),
            VerticalFlip(p = 0.5),
            RandomBrightnessContrast(p = 0.25),
            CLAHE(p = 0.15),
            ShiftScaleRotate(shift_limit = 0, scale_limit=0.3, rotate_limit=45, interpolation = 1, p = 0.3),
            ImageCompression(quality_lower = 60, quality_upper = 100, p = 0.25),
            MultiplicativeNoise(p=0.1)
        ])
    },

    "door":
    {
        "classNumber": 3,
        'model': sm.Unet("resnext101", classes = 1, activation = 'sigmoid'),
        'train': True,
        "continue-train": True,
        "max-train-images": 2500,

        "optimizer": tf.keras.optimizers.Adam(learning_rate = 0.001),
        "loss": bceDiceLoss,
        "metrics": [sm.metrics.iou_score],
        "saved-model": "models-" + MODEL_TAG + "-door/unet.ckpt",
        "batch-size": 4,
        "epochs": 10,
        "callbacks": [EarlyStopping(monitor = 'val_iou_score', mode = 'max', patience = 4, verbose = 0, restore_best_weights = True)],

        "max-percent": 0.1,
        
        "transforms": Compose(
        [  
            RandomGamma(gamma_limit = (80, 120), p = 0.3),
            Sharpen(p = 0.15),
            Blur(blur_limit = 3, p = 0.2),
            HorizontalFlip(p = 0.5),
            VerticalFlip(p = 0.5),
            RandomBrightnessContrast(p = 0.25),
            CLAHE(p = 0.15),
            ShiftScaleRotate(shift_limit = 0, scale_limit=0.4, rotate_limit=60, interpolation = 1, p = 0.7),
            ImageCompression(quality_lower = 60, quality_upper = 100, p = 0.25),
            MultiplicativeNoise(p=0.1)
        ])
    },
}

Downloading data from https://github.com/qubvel/classification_models/releases/download/0.0.1/resnext101_imagenet_1000_no_top.h5


In [6]:
# Инициализируем Keras и ставим Seed
sm.set_framework('tf.keras')
tf.keras.utils.set_random_seed(SEED)
seed(SEED)

In [7]:
# Все папки решения
root = "/notebooks"
trainPath = os.path.join(root, "splitted_train_512")

testPath = os.path.join(root, "test")
splittedTestPath = os.path.join(root, "splitted_test_512")

solutionPath     = os.path.join(root, "solution")
solutionFilename = os.path.join(root, "solution_file")
resultPath       = os.path.join(root, "result")
resultFilename   = os.path.join(root, "result_file")

In [8]:
# Функция-утилита, формирующая изображения и маски из списка файлов
def createSimpleDataset(startIndex, imagesList, masksList, images, masks, transforms):
    for idImage, (filename, fullName) in enumerate(tqdm(imagesList.items())):
        if masksList is not None:
            mask = cv2.imread(masksList[filename.replace("_preview.png", ".png")])
            mask = mask[:, :, 0]
            mask = expand_dims(mask, axis=-1)
        else:
            mask = None

        image = cv2.imread(fullName)

        height, width, channels = image.shape

        if height != IMG_HEIGHT or width != IMG_WIDTH:
            black = zeros((IMG_HEIGHT, IMG_WIDTH, 3), dtype=uint8)
            black[0:height, 0:width, :] = image
            image = black


        if transforms is not None:
            augmented = transforms(image = array(image), mask = mask)
            del image
            image = augmented["image"]
            if mask is not None:
                del mask
                mask = augmented["mask"]

        if mask is not None:
            masks[startIndex + idImage] = mask

        images[startIndex + idImage] = image

In [9]:
# Формирование набора данных для обучения
def createDataset(imagesList, masksList, transformsBasic, transformsMore, useMoreProportion = 0.0):
    dataLength = len(imagesList)
    moreSize = int(float(dataLength) * useMoreProportion) if transformsMore is not None and useMoreProportion > 0 else 0
    images = zeros((dataLength + moreSize, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype = uint8)

    if masksList is not None:
        masks = zeros((dataLength + moreSize, IMG_HEIGHT, IMG_WIDTH, 1), dtype = bool)
    else:
        masks = None

    createSimpleDataset(startIndex = 0, imagesList = imagesList, masksList = masksList, images = images, masks = masks, transforms = transformsBasic)

    if 0 < moreSize <= dataLength:
        imagesNames = list(imagesList.keys())
        shuffle(imagesNames)
        moreImagesList = {filename: fullName for filename, fullName in imagesList.items() if filename in imagesNames[:moreSize] }
        createSimpleDataset(startIndex = dataLength, imagesList = moreImagesList, masksList=masksList, images=images, masks=masks, transforms = transformsMore)

    elif moreSize > dataLength:
        moreSize -= dataLength
        imagesNames = list(imagesList.keys())

        if moreSize > dataLength:
            fullImagesList = imagesNames * (moreSize // dataLength)
            moreSize -= moreSize * (moreSize // dataLength)
        else:
            fullImagesList = []

        if moreSize > 0:
            shuffle(imagesNames)
            moreImagesList = imagesNames[:moreSize]
        else:
            moreImagesList = []

        allFiles = {filename: fullName for filename, fullName in imagesList.items() if filename in [*fullImagesList, *moreImagesList] }

        createSimpleDataset(startIndex = dataLength, imagesList = allFiles, masksList = masksList, images = images, masks = masks, transforms = transformsMore)

    return images, masks

In [10]:
# Тренировка, продолжение тренировки или загрузка натренерованной модели, в зависимости от настроек модели
def trainOrLoadModel(className, classData):
    model = classData['model']
    
    if classData["train"]:
        print("\nTrain class:", className)
        classPath = os.path.join(trainPath, className)
        validFilenames = [filename for filename in os.listdir(classPath) if filename.endswith(".png") and not filename.endswith("_preview.png")]

        if classData["max-train-images"] is not None:
            shuffle(validFilenames)
            validFilenames = validFilenames[:classData["max-train-images"]]

        validTrain, validTest = train_test_split(validFilenames, test_size = 0.05, random_state = SEED)

        maskFilesTrain   = {filename: os.path.join(classPath, filename) for filename in validTrain}
        imagesFilesTrain = {filename.replace(".png", "_preview.png"): os.path.join(classPath, filename.replace(".png", "_preview.png")) for filename in validTrain}
        maskFilesTest    = {filename: os.path.join(classPath, filename) for filename in validTest}
        imagesFilesTest  = {filename.replace(".png", "_preview.png"): os.path.join(classPath, filename.replace(".png", "_preview.png")) for filename in validTest}


        X_train, y_train = createDataset(imagesList = imagesFilesTrain, masksList = maskFilesTrain,
                                         transformsBasic = classData["transforms"], transformsMore = None, useMoreProportion = 0)

        X_test, y_test = createDataset(imagesList = imagesFilesTest, masksList = maskFilesTest,
                                       transformsBasic = None, transformsMore = None, useMoreProportion = 0)

        if classData["continue-train"]:
            model.load_weights(os.path.join(root, classData["saved-model"])).expect_partial()

        model.compile(classData["optimizer"], classData["loss"], classData["metrics"])

        gc.collect()

        model.fit(x=X_train, y=y_train, batch_size=classData["batch-size"], epochs=classData["epochs"], validation_data=(X_test, y_test), verbose=1,
                  shuffle=True, callbacks=classData["callbacks"])# + [TqdmCallback(verbose=2)])

        del X_train
        del y_train

        gc.collect()

        model.save_weights(os.path.join(root, classData["saved-model"]))

    else:
        model.load_weights(os.path.join(root, classData["saved-model"])).expect_partial()

    K.clear_session()

In [11]:
# Выполняем обучение, дообучение или загрузку для всех моделей классов
for className, classData in classes.items():
    trainOrLoadModel(className, classData)


Train class: wall


100%|██████████| 3800/3800 [00:57<00:00, 66.01it/s]
100%|██████████| 200/200 [00:02<00:00, 92.43it/s] 
2022-11-24 11:51:38.858211: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2988441600 exceeds 10% of free system memory.
2022-11-24 11:51:43.047913: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2988441600 exceeds 10% of free system memory.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

Train class: window


100%|██████████| 2850/2850 [00:46<00:00, 61.77it/s]
100%|██████████| 150/150 [00:01<00:00, 107.73it/s]
2022-11-24 13:47:04.308505: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2241331200 exceeds 10% of free system memory.
2022-11-24 13:47:06.750399: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2241331200 exceeds 10% of free system memory.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

Train class: door


100%|██████████| 2375/2375 [00:40<00:00, 58.24it/s]
100%|██████████| 125/125 [00:01<00:00, 97.92it/s]
2022-11-24 14:58:15.288943: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1867776000 exceeds 10% of free system memory.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


In [12]:
# Получаем словарь имён всех тестовых файлов и для каждого из них формируем список его файлов-кусочков
testFiles = { filename: [] for filename in os.listdir(testPath) if filename.endswith(".png") }

_ = [ testFiles[filename[:40]].append(filename) for filename in os.listdir(splittedTestPath) if filename.endswith(".png") and filename[:40] in testFiles]

In [13]:
# Если ещё нет папок для формирования решения, то создаём их
if not os.path.exists(solutionPath):
    os.makedirs(solutionPath)

if not os.path.exists(resultPath):
    os.makedirs(resultPath)

In [14]:
# Если в папках решения есть файлы, то удаляем их
_ = [file.unlink() for file in Path(solutionPath).glob("*") if file.is_file()]
_ = [file.unlink() for file in Path(resultPath).glob("*") if file.is_file()]

In [15]:
# Формируем решение, анализируя кусочки изображения, а затем собирая файл решения и файл предпросмотра решения
for testFile, splitFiles in tqdm(testFiles.items()):
    image = cv2.imread(os.path.join(testPath, testFile))
    imageHeight, imageWidth = image.shape[:2]

    solution = zeros((imageHeight, imageWidth, IMG_CHANNELS), dtype = uint8)
    splitImages = zeros((len(splitFiles), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype = uint8)

    for indexSplit, splitFile in enumerate(splitFiles):
        splitImages[indexSplit, :, :, :] = cv2.imread(os.path.join(splittedTestPath, splitFile))

    masks = zeros((len(splitFiles), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=uint8)

    for indexClass, (className, classData) in enumerate(classes.items()):
        # Получить предсказание модели
        predictedMasks = (classData["model"].predict(splitImages, verbose = 0).squeeze(axis = 3) * 255).astype(uint8)
        predictedMasks[predictedMasks >= THRESHOLD] = 255
        predictedMasks[predictedMasks < THRESHOLD]  = 0

        for indexSplit in range(len(splitFiles)):
            mask = predictedMasks[indexSplit, :, :]
            foundPercent = cv2.countNonZero(mask) / mask.size

            if foundPercent < classData["max-percent"]:
                masks[indexSplit, :, :, indexClass] = mask

                for indexAnotherClass in range(len(classes)):
                    if indexClass != indexAnotherClass:
                        masks[indexSplit, :, :, indexAnotherClass][mask > 0] = 0

    for indexSplit, splitFile in enumerate(splitFiles):
        _, stringX, stringY = (splitFile.split(".")[1]).split("_")
        x, y = int(stringX), int(stringY)

        mask = masks[indexSplit, :, :, :]

        # Если маленькая маска выходит за пределы большой маски по высоте, то происходит уменьшение размера маленькой маски
        if y + IMG_HEIGHT > imageHeight:
            maskHeight = imageHeight - y
            mask       = mask[0:maskHeight, :, :]
        else:
            maskHeight = IMG_HEIGHT

        # Если маленькая маска выходит за пределы большой маски по ширине, то происходит уменьшение размера маленькой маски
        if x + IMG_WIDTH > imageWidth:
            maskWidth = imageWidth - x
            mask    = mask[:, 0:maskWidth, :]
        else:
            maskWidth = IMG_WIDTH

        # Добавляем маленькую маску на большую
        solution[y:y + maskHeight, x:x + maskWidth] = mask


    result = zeros((imageHeight, imageWidth), dtype=uint8)
    for index, (className, classData) in enumerate(classes.items()):
        result[solution[:,:, index] == 255] = classData["classNumber"]

    cv2.imwrite(os.path.join(solutionPath, testFile), image * 0.5 + solution * 0.9, [cv2.IMWRITE_PNG_COMPRESSION, 9])
    cv2.imwrite(os.path.join(resultPath, testFile),   result,   [cv2.IMWRITE_PNG_COMPRESSION, 9])

100%|██████████| 1500/1500 [27:08<00:00,  1.09s/it]


In [16]:
# Записываем все файлы решений и предпросмотра решений в zip-файлы, чтобы было удобно их сразу скачивать и загружать на leaderboard
shutil.make_archive(solutionFilename, "zip", solutionPath)
shutil.make_archive(resultFilename,   "zip", resultPath)

'/notebooks/result_file.zip'

In [17]:
# Записываем папки моделей в zip-файлы, чтобы было удобно их скачивать
shutil.make_archive(os.path.join(root, "models-" + MODEL_TAG + "-wall"),   "zip", os.path.join(root, "models-" + MODEL_TAG + "-wall"))
shutil.make_archive(os.path.join(root, "models-" + MODEL_TAG + "-window"), "zip", os.path.join(root, "models-" + MODEL_TAG + "-window"))
shutil.make_archive(os.path.join(root, "models-" + MODEL_TAG + "-door"),   "zip", os.path.join(root, "models-" + MODEL_TAG + "-door"))

'/notebooks/models-resnext101-door.zip'