# Segmentation networks

In [1]:
import sys
from time import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm
import pandas as pd

sys.path.append('..')
sys.path.append('../task_02')

from dataset import Pipeline, DatasetIndex, Dataset, B, V

from dataset.opensets import MNIST
from dataset.models.tf import UNet
from noised_mnist import NoisedMnist                                          # Batch subclass with loading and noise actions
from plot_functions import plot_noised_image, plot_examples_highlighted       # plot functions to demonstrate result
from utils import count_parameters, smooth

In [2]:
IMAGE_SIZE = 64     # image size
MNIST_SIZE = 65000  # MNIST database size
BATCH_SIZE = 16     # batch size for NN training
MAX_ITER = 1000     # number of iterations for NN training

In [3]:
level = 1           # the highest level of noise; [0, 1]
n_fragments = 90    # number of noise fragments per image  
size = 5            # size of noise fragment; 1, ..., 27
distr = 'uniform'   # distribution of fragments of image; 'uniform' or 'normal'

In [4]:
mnistset = MNIST(batch_class=NoisedMnist)

ExtractingExtractingExtractingExtracting    C:\Users\kozhevin\AppData\Local\Temp\train-labels-idx1-ubyte.gzC:\Users\kozhevin\AppData\Local\Temp\train-images-idx3-ubyte.gzC:\Users\kozhevin\AppData\Local\Temp\t10k-images-idx3-ubyte.gzC:\Users\kozhevin\AppData\Local\Temp\t10k-labels-idx1-ubyte.gz





In [5]:
placeholders_config = {
                       'images': {'shape': (IMAGE_SIZE, IMAGE_SIZE, 1),
                                  'type': 'float32',
                                  'name': 'reshaped_images'},
                
                       'masks': {'shape': (IMAGE_SIZE, IMAGE_SIZE),
                                 'type': 'int32',
                                 'transform': 'ohe',
                                 'classes': 2,
                                 'name': 'targets'}
                       }

Create feed dicts. The key is name of the tensor in tf graph, value is batch component.

In [6]:
train_feed_dict = {'images': B('images'),
                   'masks': B('masks')}        

test_feed_dict = {'images': B('images'),
                  'masks': B('masks')}

In [7]:
load_template = (Pipeline()
             .random_location(IMAGE_SIZE)      # put MNIST at random location
             .make_masks()                     # create mask for MNIST image location
             .create_noise('mnist_noise', level, n_fragments, size, distr)
             .add_noise())

In [8]:
def metric(masks, predictions, data_format='channels_last', mode='mse'):
    ind = np.index_exp[:, :, :, 1] if data_format == 'channels_last' else np.index_exp[:, 1, :, :]
    predictions = predictions[ind]
    if mode == 'mse':
         return np.mean((masks - predictions) ** 2)
    elif mode == 'iou':
        intersection = np.sum(np.logical_and((predictions > 0.5), masks), axis=(1, 2))
        union = np.sum(np.logical_or((predictions > 0.5), masks), axis=(1, 2))
        return 1 - np.mean(intersection / union)

In [9]:
model = UNet
layout = 'b'
max_iter=10
batch_size=16
times=10

In [10]:
def demonstrate_model(model, layout, max_iter=100, batch_size=16, times=10):
    config={'body/upsample': {'layout': layout, 'factor': 2}}

    model_stat = dict()
    stop_level = 0.005
    window = 20

    print('Create pipelines...')

    model_config = {'inputs': placeholders_config,
                    'input_block/inputs': 'images',
                    'batch_norm': {'momentum': 0.1},
                    'output': dict(ops=['proba']),
                    'loss': 'ce',
                    'optimizer': 'Adam',
                    **config}

    ppl_train = ((load_template << mnistset.train)                         # load data from file
            .init_model('static', model, layout, config=model_config)
            .init_variable('loss', init_on_each_run=list)
            .train_model(layout,
                         fetches='loss',
                         feed_dict=train_feed_dict,
                         save_to=V('loss'), mode='a'))

    parameters = count_parameters(ppl_train, layout)
    model_stat['params'] = parameters
    
    model_stat['loss_history'] = list()
    model_stat['quality_history'] = list()
    model_stat['iters'] = list()
    model_stat['early_stop'] = list()
    model_stat['total_time'] = list()
    model_stat['time'] = []

    print('Parameters: {}K'.format(parameters // (10 ** 3)))
    print('Start training...')

    test_template = (load_template
                    .import_model(layout, ppl_train)
                    .init_variable('predictions', init_on_each_run=list)
                    .init_variable('masks', init_on_each_run=list)
                    .update_variable('masks', B('masks'), mode='w')
                    .predict_model(layout,                                      
                                   fetches='output_proba',
                                   feed_dict=test_feed_dict,
                                   save_to=V('predictions'),
                                   mode='w'))
    ppl_test = test_template << mnistset.test

    

    for iteration in range(times):
        train_time = []
        quality = []
        early_stop = False
        print('Iteration:', iteration)
        for i in tqdm(range(max_iter)):
            start = time()
            ppl_train.next_batch(batch_size, n_epochs=None, shuffle=True)
            stop = time()
            train_time.append(stop-start)

            ppl_test.next_batch(50, n_epochs=None, shuffle=True)
            masks = ppl_test.get_variable('masks')
            predictions = ppl_test.get_variable('predictions')
            quality.append(metric(masks, predictions))

            if np.mean(np.array(quality)[-window:] < stop_level) > 0.8:
                early_stop = True
                break

        print('Early stop: {}'.format(early_stop))        
        print("Train time: {:05.3f} min".format(sum(train_time)/60))
        print('MSE:', quality[-1])
        
        model_stat['total_time'].append(sum(train_time)/60)
        model_stat['time'].append(train_time)
        model_stat['loss_history'].append(ppl_train.get_variable('loss')[-i-1:])
        model_stat['quality_history'].append(quality)
        model_stat['iters'].append(i)
        model_stat['early_stop'].append(early_stop)

        if iteration < times-1:
            graph = ppl_train.get_model_by_name(layout).graph.get_collection('trainable_variables')
            sess = ppl_train.get_model_by_name(layout).session
            for v in graph:
                sess.run(v.initializer)

    
    model_stat['ppl'] = ppl_train
    print('=' * 20)

    return model_stat

stat = dict()

In [11]:
layouts = ['b', 'tna', 'B', 'X']
for layout in layouts:
    print("Layout:", layout)
    stat[layout] = demonstrate_model(UNet, layout, MAX_ITER, BATCH_SIZE)

Layout: X
Create pipelines...
Parameters: 37041K
Start training...
Iteration: 0


 16%|█████████████                                                                  | 165/1000 [04:02<20:29,  1.47s/it]

Early stop: True
Train time: 1.594 min
MSE: 0.00437886456603
Iteration: 1



  0%|                                                                                         | 0/1000 [00:00<?, ?it/s]
  0%|                                                                                 | 1/1000 [00:01<23:48,  1.43s/it]
  0%|▏                                                                                | 2/1000 [00:02<24:04,  1.45s/it]
Exception in thread Thread-10:
Traceback (most recent call last):
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\threading.py", line 914, in _bootstrap_inner
    self.run()
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tqdm\_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

 19%|███████████████▏                                                               | 192/1000 [04:35<19:19,  1.43s/it]


Early stop: True
Train time: 1.790 min
MSE: 0.00497228131465
Iteration: 2


 27%|█████████████████████▏                                                         | 268/1000 [06:30<17:45,  1.46s/it]

Early stop: True
Train time: 2.525 min
MSE: 0.00499818392951
Iteration: 3



  0%|                                                                                         | 0/1000 [00:00<?, ?it/s]
  0%|                                                                                 | 1/1000 [00:01<23:59,  1.44s/it]
  0%|▏                                                                                | 2/1000 [00:02<24:00,  1.44s/it]
  0%|▏                                                                                | 3/1000 [00:04<23:55,  1.44s/it]
  0%|▎                                                                                | 4/1000 [00:05<23:50,  1.44s/it]
  0%|▍                                                                                | 5/1000 [00:07<23:41,  1.43s/it]
Exception in thread Thread-68939:
Traceback (most recent call last):
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\threading.py", line 914, in _bootstrap_inner
    self.run()
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tqdm\_tqdm.py", line 144, in run
    for insta

Early stop: True
Train time: 2.843 min
MSE: 0.00461998089985
Iteration: 4


 28%|██████████████████████                                                         | 279/1000 [06:47<17:34,  1.46s/it]

Early stop: True
Train time: 2.637 min
MSE: 0.00428205080702
Iteration: 5



  0%|                                                                                         | 0/1000 [00:00<?, ?it/s]
Exception in thread Thread-178764:
Traceback (most recent call last):
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\threading.py", line 914, in _bootstrap_inner
    self.run()
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tqdm\_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

 34%|██████████████████████████▌                                                    | 336/1000 [08:10<16:09,  1.46s/it]


Early stop: True
Train time: 3.165 min
MSE: 0.00394883690412
Iteration: 6


 37%|████████████████████████████▉                                                  | 366/1000 [09:13<15:58,  1.51s/it]

Early stop: True
Train time: 3.525 min
MSE: 0.00483597062257
Iteration: 7



  0%|                                                                                         | 0/1000 [00:00<?, ?it/s]
  0%|                                                                                 | 1/1000 [00:01<24:24,  1.47s/it]
  0%|▏                                                                                | 2/1000 [00:02<24:21,  1.46s/it]
  0%|▏                                                                                | 3/1000 [00:04<24:21,  1.47s/it]
Exception in thread Thread-297229:
Traceback (most recent call last):
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\threading.py", line 914, in _bootstrap_inner
    self.run()
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tqdm\_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

 25%|███████████████████▊                   

Early stop: True
Train time: 2.373 min
MSE: 0.00420716612389
Iteration: 8


 29%|███████████████████████▏                                                       | 293/1000 [07:04<17:03,  1.45s/it]

Early stop: True
Train time: 2.744 min
MSE: 0.00383827809278
Iteration: 9



  0%|                                                                                         | 0/1000 [00:00<?, ?it/s]
  0%|                                                                                 | 1/1000 [00:01<23:53,  1.44s/it]
  0%|▏                                                                                | 2/1000 [00:02<23:58,  1.44s/it]
  0%|▏                                                                                | 3/1000 [00:04<24:01,  1.45s/it]
Exception in thread Thread-416078:
Traceback (most recent call last):
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\threading.py", line 914, in _bootstrap_inner
    self.run()
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tqdm\_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "C:\Anaconda3\envs\tensorflow-gpu\lib\_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

 35%|███████████████████████████▌           

Early stop: True
Train time: 3.245 min
MSE: 0.00424615070052


In [24]:
with open('times', 'rb') as f:
    r = pickle.load(f)

In [45]:
import pandas as pd

df = {}
for k, v in r.items():
    x = sorted(v)
    df[k] = {'median': np.median(v), 'mean': np.mean(v), 'partial mean': np.mean(x[2:-2])}
df = pd.DataFrame(df).transpose().sort_values(by=['median'])

In [46]:
df

Unnamed: 0,mean,median,partial mean
tna,2.323997,2.377511,2.260181
X,2.644189,2.690606,2.714648
b,2.940775,2.76729,2.792791
B,6.239011,5.886285,6.006232


In [None]:
test_template = (load_template                     
                .init_variable('masks', init_on_each_run=list)
                .init_variable('images', init_on_each_run=list)
                .update_variable('masks', B('masks'), mode='e')
                .update_variable('images', B('images'), mode='e'))

for layout in layouts:
    test_template = (test_template
                     .import_model(layout, stat[layout]['ppl'])
                     .init_variable(layout+'_predictions', init_on_each_run=list)
                     .predict_model(layout,                                      
                                    fetches='output_proba',
                                    feed_dict=test_feed_dict,
                                    save_to=V(layout+'_predictions'),
                                    mode='e'))

print('Start testing...')

ppl_test = test_template << mnistset.test

for i in tqdm(range(50)):
    ppl_test.next_batch(10, n_epochs=1, shuffle=False)

In [None]:
masks = np.array(ppl_test.get_variable('masks'))
images = np.array(ppl_test.get_variable('images'))
for layout in layouts:
    predictions = np.array(ppl_test.get_variable(layout+'_predictions'))
    quality = metric(masks, predictions)

    stat[layout]['predictions'] = predictions
    stat[layout]['quality'] = quality

In [None]:
plt.figure(figsize=(15, 5))
for layout in layouts:
    plt.subplot(1, 2, 1)
    x = np.cumsum(stat[layout]['time'][-1]) / 60
    y = stat[layout]['quality_history'][-1]
    plt.plot(x, smooth(y), label=layout)
    plt.ylim((0, 0.02))
    plt.legend()
    plt.title('MSE')
    plt.subplot(1, 2, 2)
    y = stat[layout]['loss_history'][-1]
    plt.plot(x, smooth(y), label=layout)
    plt.ylim((0, 1))
    plt.legend()
    plt.title('Loss')
plt.show()

In [None]:
def stat_table(stat):
    df = dict()
    for layout, layout_stat in stat.items():
        new_stat = {
            'Time per iter, s': np.mean(layout_stat['time'][-1]),
            'Train time, m': np.sum(layout_stat['time'][-1]) // 60,
            'quality': layout_stat['quality'],
            'Parameters': layout_stat['params']
        }
        df[layout] = new_stat
    df = pd.DataFrame(df).transpose()
    return df

In [None]:
stat_table(stat).sort_values(by=['Train time, m'])

In [None]:
n_examples = 20
plt.figure(figsize=(20,3.5*n_examples))
for i in range(n_examples):
    plt.subplot(n_examples, len(layouts)+1, (len(layouts)+1)*i+1)
    plt.imshow(np.squeeze(images[i]))
    plt.title('image')
    for j, layout in enumerate(layouts):
        mask = stat[layout]['predictions'][i]
        plt.subplot(n_examples, len(layouts)+1, (len(layouts)+1)*i+j+2)
        plt.imshow(mask[:,:,1])
        plt.title(layout)
plt.show()