# Выбор архитектуры модели для сегментации типов ландшафта по данным аэрофотосъемки

## Содержание

* [Задача](#Задача)
* [Исследуемые параметры](#Исследуемые-параметры)
* [Измеряемые характеристики](#Измеряемые-характеристики)
* [Описание исследования](#Описание-исследования)
* [Результат исследования](#Результат-исследования)
* [Вывод](#Вывод)

## Задача

Изучить влияние параметров UNet на качество модели и выбрать оптимальные значения.

## Исследуемые параметры

Количество фильров в блоках.

## Измеряемые характеристики

Метрика IoU на тестовой части датасета.

## Описание исследования

Импорты библиотек и вспомогательных скриптов

In [1]:
import sys
sys.path.append('../')

from datetime import datetime
from functools import partial

import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf

from srr.batchflow import FilesIndex, Dataset, Pipeline, F, V, B, C, R, P
from srr.batchflow.models.tf import UNet
from srr.batchflow.research import Research, Results, Option, Grid

from srr import AerialBatch
from srr.core.utils import get_origs, ce_dice_loss, make_mask

Вспомогательная функция для работы с масками:

In [2]:
# Mask classes 
# 0 Unknown
# 1 Water
# 2 Forest land
# 3 Urban land
# 5 Rangeland
# 6 Agriculture land
# 7 Barren land

make_mask = partial(make_mask, classes=(0,1,2,3,5,6,7))

Определим функцию потерь:

Задание датасета

In [3]:
ind = FilesIndex(path='../ignore/deepglobe/train/*.jpg')
ads = Dataset(ind, AerialBatch)
ads.split(0.8, shuffle=False)

Задание модели

In [4]:
model_config = {
    'initial_block/inputs': 'images',
    'inputs': dict(images={'shape': (256,256,3)}, 
                   masks={'name':'targets',
                          'shape': (256,256,7)}),
    'filters': C('filters'),
    'head/num_classes': 7,
    'loss': ce_dice_loss,
    'optimizer': 'Adam',
    'output': ['proba']
}

Описание пайплайнов для обучения и оценки моделей

In [5]:
n_reps = 4
batch_size = 8
n_epochs = 200
n_iters = int(n_epochs * (len(ads.train) / batch_size))
iters_to_test = 1500
itt = "%{}".format(iters_to_test)

print("Total number of iterations: {}.\nTest after each {} iterations.".format(n_iters, iters_to_test))

folder = '../ignore/research/UNet_segmentation_result_' + 'test'#datetime.now().strftime(format='%Y%m%d%H%M')

Total number of iterations: 16050.
Test after each 1500 iterations.


In [6]:
crop_shape = (256, 256)

train_template = (
      Pipeline()
      .load(ind, fmt='image', dst='images')
      .load(ind, fmt='mask', dst='masks')
      .resize(size=(1224, 1224), src=['images', 'masks'], dst=['images', 'masks'])
      .apply_transform(get_origs, crop_shape, 1., src='masks', dst='origs')
      .crop(shape=crop_shape, src=[('images', 'origs'), ('masks', 'origs')], dst=['images', 'masks'])
      .rotate(P(R('randint', 0, 180)), src=['images', 'masks'], dst=['images', 'masks'], p=0.5)
      .flip('lr', src=['images', 'masks'], dst=['images', 'masks'], p=0.5)
      .enhance(factor=P(R('uniform', 0.5, 1.5)))
      .pil_convert(src='images', dst='images', p=C('grayscale'))
      .to_array(src=['images', 'masks'], dst=['images', 'masks'])
      .apply_transform(make_mask, src='masks', dst='masks')
      .init_variable('loss', init_on_each_run=list)
      .init_model('dynamic', UNet, 'unet', model_config)
      .train_model('unet', images=B('images'), targets=B('masks'),
                      fetches='loss', save_to=V('loss'), mode='w')
).run(batch_size, n_epochs=n_epochs, shuffle=True, lazy=True, drop_last=True)

test_template = (
      Pipeline()    
      .load(ind, fmt='image', dst='images')
      .load(ind, fmt='mask', dst='masks')
      .resize(size=(1224, 1224), src=['images', 'masks'], dst=['images', 'masks'])
      .apply_transform(get_origs, crop_shape, 1., src='masks', dst='origs')
      .crop(shape=crop_shape, src=[('images', 'origs'), ('masks', 'origs')], dst=['images', 'masks'])
      .pil_convert(src='images', dst='images', p=C('grayscale'))
      .to_array(src=['images', 'masks'], dst=['images', 'masks'])
      .apply_transform(make_mask, src='masks', dst='masks')
      .init_variable('predictions', init_on_each_run=list)
      .init_variable('metrics', init_on_each_run=None)
      .import_model('unet', C('import_from'))
      .predict_model('unet', images=B('images'), targets=B('masks'),
                   fetches=['proba'], save_to=[V('predictions')], mode='w')
      .gather_metrics('segmentation', axis=-1, targets=B('masks'), predictions=V('predictions'),
                      fmt='proba', save_to=V('metrics'), mode='u')
).run(batch_size, n_epochs=1, lazy=True)

Описание варьируемых параметров

In [7]:
gray = Option('grayscale', [0, 1])

filter_opts = Option('filters', [[8, 16, 32],
#                                  [32, 64, 128], [128, 256, 512],
#                                  [16, 32, 64, 128], [32, 64, 128, 256], [64, 128, 256, 512],
#                                  [8, 16, 32, 64, 128], [32, 64, 128, 256, 512],
                                 [8, 16, 32, 64, 128, 256, 512]])

opts = filter_opts * gray

Описание параметров эксперимента

Описание эксперимента

In [8]:
train_ppl = (train_template << ads.train)
test_ppl = (test_template << ads.test)

research = (Research()
            .pipeline(train_ppl, name='train', variables='loss')
            .pipeline(test_ppl, variables='metrics', execute=itt, dump=itt,
                      name='test', run=True, import_from='train')
            .grid(opts))

In [9]:
research.run(n_reps=n_reps, n_iters=n_iters, workers=8, name=folder, progress_bar=True, gpu=[0,1,2,3,4,5,6,7])

Research ../ignore/research/UNet_segmentation_result_test is starting...


  0%|          | 0/256800 [00:00<?, ?it/s]

Distributor has 16 jobs with 16050 iterations. Totally: 256800


100%|██████████| 256800/256800 [00:09<00:00, 27168.45it/s]


<srr.batchflow.research.research.Research at 0x7f416501ca90>

## Результаты исследования

In [None]:
# folder = '../ignore/research/UNet_segmentation_result_201903291534/'

In [None]:
df = Results(path=folder).load(use_alias=True, cv=['None'])

In [None]:
for conf in df.config.unique():
    print("Model configuration: \n{}".format(conf))
    for rep in df.repetition.unique():
        condition = (df.name=='test')&(df.config==conf)&(df.repetition==rep)
        values = df[condition]['metrics'].apply(lambda x: x.evaluate('iou')).values
        iters = np.arange(len(values)) * iters_to_test
        plt.plot(iters, values)
        plt.grid(True)
    plt.show()

In [None]:
for conf in df.config.unique():
    print("Model configuration: \n{}".format(conf))
    for rep in df.repetition.unique():
        condition = (df.name=='train')&(df.config==conf)&(df.repetition==rep)
        loss_ep = np.array(np.split(df[condition]['loss'], n_epochs))
        loss_mean = np.mean(loss_ep, axis=-1)
        plt.plot(loss_mean)
        plt.grid(True)
    plt.show()

In [None]:
for conf in df.config.unique():
    print("Model configuration: \n{}".format(conf))
    values = []
    for rep in df.repetition.unique():
        condition = (df.name=='test')&(df.config==conf)&(df.repetition==rep)
        values.append(df[condition]['metrics'].apply(lambda x: x.evaluate('iou')).values)
        iters = np.arange(len(values[0])) * iters_to_test
    values = np.array(values)
    plt.plot(iters, np.mean(values, axis=0))
    plt.grid(True)
    plt.show()

In [None]:
for conf in df.config.unique():
    print("Model configuration: \n{}".format(conf))
    loss_mean = []
    for rep in df.repetition.unique():
        condition = (df.name=='train')&(df.config==conf)&(df.repetition==rep)
        loss_ep = np.array(np.split(df[condition]['loss'], n_epochs))
        loss_mean.append(np.mean(loss_ep, axis=-1))
    loss_mean = np.mean(loss_mean, 0)
    plt.plot(loss_mean)
    plt.grid(True)
    plt.show()