In [1]:
import os, glob, sys
import matplotlib.pyplot as plt
import numpy as np
import h5py
import tensorflow as tf
%matplotlib inline
import skimage
from skimage.io import imread, imshow, imsave
from tensorflow.python.keras.models import *
from tensorflow.python.keras.layers import *
from tensorflow.python.keras.optimizers import *
from tensorflow.python.keras.callbacks import ModelCheckpoint, LearningRateScheduler
import time
import functools
from eval import *
from ShowColors import *
from ImportUtil import *
import random
%env CITYSCAPES_DATASET = /home/rvygon/data/
from tensorflow.metrics import *
%load_ext autoreload
%autoreload 2
config = tf.ConfigProto(
        device_count = {'GPU': 4}
    )
sess = tf.Session(config=config)

env: CITYSCAPES_DATASET=/home/rvygon/data/


In [2]:
EPOCHS = 50
EPOCHS_PER_EVAL = 1
BATCH_SIZE = 1
TOTAL_SIZE = 1
VAL_SIZE = 10
SCALE_RATE = 4
IMG_SHAPE = (int(1024/SCALE_RATE), int(2048/SCALE_RATE), 3)
VERBOSE = 1
START_INDEX = 0

In [3]:
### AUGMENTATION BLOCK

def flip_img(horizontal_flip, image, label):
    if horizontal_flip:
        flip_prob = tf.random_uniform([], 0.0, 1.0)
        image, label = tf.cond(tf.less(flip_prob, 0.5),
                                   lambda: (tf.image.flip_left_right(image), tf.image.flip_left_right(label)),
                                   lambda: (image, label))
    return image, label            

def crop_img(crop_rate, image, label):
    if crop_rate is not None:
        image = tf.image.resize_images(tf.image.central_crop(image, crop_rate), (IMG_SHAPE[0], IMG_SHAPE[1]), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        label = tf.image.resize_images(tf.image.central_crop(label, crop_rate), (IMG_SHAPE[0], IMG_SHAPE[1]), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    return image, label

def _augment(image,
             label,
             hue_delta=0,
             horisontal_flip=False,
             width_shift_range=0,
             height_shift_range=0,
             crop_rate=0.25):
    if hue_delta:
        image = tf.image.random_hue(image, hue_delta)
    image, label = flip_img(horisontal_flip, image, label)   
    image, label = crop_img(crop_rate, image, label)
    return image, label
def to_tensor(image, label):
    return image, label

tr_cfg = {
    'hue_delta': 0.05,
    'horisontal_flip': True,
    'crop_rate' : 0.25
}
tr_preprocessing_fn = functools.partial(_augment, **tr_cfg)

In [4]:
#run this cell once
#%run  cityscapesscripts/preparation/createTrainIdLabelImgs
def upd_print(str):
    sys.stdout.write('\r')       
    sys.stdout.write(str)
    sys.stdout.flush()

def tversky_loss(y_true, y_pred):
    alpha = 0.5
    beta  = 0.5
    
    ones = K.ones(K.shape(y_true))
    p0 = y_pred      # proba that pixels are class i
    p1 = ones - y_pred # proba that pixels are not class i
    g0 = y_true
    g1 = ones - y_true
    
    num = K.sum(p0 * g0, (0, 1, 2))
    den = num + alpha * K.sum(p0 * g1, (0, 1, 2)) + beta * K.sum(p1 * g0, (0, 1, 2)) + 1e-8
    
    T = K.sum(num / den) # when summing over classes, T has dynamic range [0 Ncl]
    
    classNumber = K.cast(K.shape(y_true)[-1], 'float32') ### equal classNumber = 20.0
    return classNumber - T

In [5]:
from tensorflow.python.keras.utils import to_categorical

x_train_data, y_train_data = importBatch(TOTAL_SIZE,
                                         START_INDEX,
                                         VERBOSE,
                                         'train',
                                         SCALE_RATE)
#y_train_data = to_categorical(y_train_data)
x_train_data = x_train_data.astype('float32')
y_train_data = y_train_data.astype('int32')
x_val_data, y_val_data, files = importBatch(VAL_SIZE,
                                            START_INDEX,
                                            VERBOSE,
                                            'val',
                                          SCALE_RATE)
y_train_data=np.expand_dims(y_train_data,axis=3)
y_val_data = to_categorical(y_val_data)
x_val_data = x_val_data.astype('float32')
y_val_data = y_val_data.astype('float32')
y_train_data.shape

(1, 256, 512, 1)

In [6]:
"""DeepLab v3 models based on slim library."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import preprocessing
import tensorflow as tf

from tensorflow.contrib.slim.nets import resnet_v2
from tensorflow.contrib import layers as layers_lib
from tensorflow.contrib.framework.python.ops import arg_scope
from tensorflow.contrib.layers.python.layers import layers
from tensorflow.contrib.slim.python.slim.nets import resnet_utils
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
import sys

import tensorflow as tf
from tensorflow.python import debug as tf_debug



_NUM_CLASSES = 20
_HEIGHT = 256
_WIDTH = 512
_DEPTH = 3
_MIN_SCALE = 0.5
_MAX_SCALE = 2.0
_IGNORE_LABEL = 255

_POWER = 0.9
_MOMENTUM = 0.9

_BATCH_NORM_DECAY = 0.9997

_NUM_IMAGES = {
    'train': 10582,
    'validation': 1449,
}



_BATCH_NORM_DECAY = 0.9997
_WEIGHT_DECAY = 5e-4


def atrous_spatial_pyramid_pooling(inputs, output_stride, batch_norm_decay, is_training, depth=256):
  with tf.device('/GPU:2'):
      """Atrous Spatial Pyramid Pooling.

      Args:
        inputs: A tensor of size [batch, height, width, channels].
        output_stride: The ResNet unit's stride. Determines the rates for atrous convolution.
          the rates are (6, 12, 18) when the stride is 16, and doubled when 8.
        batch_norm_decay: The moving average decay when estimating layer activation
          statistics in batch normalization.
        is_training: A boolean denoting whether the input is for training.
        depth: The depth of the ResNet unit output.

      Returns:
        The atrous spatial pyramid pooling output.
      """
      with tf.variable_scope("aspp"):
        if output_stride not in [8, 16]:
          raise ValueError('output_stride must be either 8 or 16.')

        atrous_rates = [6, 12, 18]
        if output_stride == 8:
          atrous_rates = [2*rate for rate in atrous_rates]

        with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
          with arg_scope([layers.batch_norm], is_training=is_training):
            inputs_size = tf.shape(inputs)[1:3]
            # (a) one 1x1 convolution and three 3x3 convolutions with rates = (6, 12, 18) when output stride = 16.
            # the rates are doubled when output stride = 8.
            conv_1x1 = layers_lib.conv2d(inputs, depth, [1, 1], stride=1, scope="conv_1x1")
            conv_3x3_1 = resnet_utils.conv2d_same(inputs, depth, 3, stride=1, rate=atrous_rates[0], scope='conv_3x3_1')
            conv_3x3_2 = resnet_utils.conv2d_same(inputs, depth, 3, stride=1, rate=atrous_rates[1], scope='conv_3x3_2')
            conv_3x3_3 = resnet_utils.conv2d_same(inputs, depth, 3, stride=1, rate=atrous_rates[2], scope='conv_3x3_3')

            # (b) the image-level features
            with tf.variable_scope("image_level_features"):
              # global average pooling
              image_level_features = tf.reduce_mean(inputs, [1, 2], name='global_average_pooling', keepdims=True)
              # 1x1 convolution with 256 filters( and batch normalization)
              image_level_features = layers_lib.conv2d(image_level_features, depth, [1, 1], stride=1, scope='conv_1x1')
              # bilinearly upsample features
              image_level_features = tf.image.resize_bilinear(image_level_features, inputs_size, name='upsample')

            net = tf.concat([conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3, image_level_features], axis=3, name='concat')
            net = layers_lib.conv2d(net, depth, [1, 1], stride=1, scope='conv_1x1_concat')

            return net


def deeplab_v3_generator(num_classes,
                         output_stride,
                         base_architecture,
                         pre_trained_model,
                         batch_norm_decay,
                         data_format='channels_last'):
  
      """Generator for DeepLab v3 models.

      Args:
        num_classes: The number of possible classes for image classification.
        output_stride: The ResNet unit's stride. Determines the rates for atrous convolution.
          the rates are (6, 12, 18) when the stride is 16, and doubled when 8.
        base_architecture: The architecture of base Resnet building block.
        pre_trained_model: The path to the directory that contains pre-trained models.
        batch_norm_decay: The moving average decay when estimating layer activation
          statistics in batch normalization.
        data_format: The input format ('channels_last', 'channels_first', or None).
          If set to None, the format is dependent on whether a GPU is available.
          Only 'channels_last' is supported currently.

      Returns:
        The model function that takes in `inputs` and `is_training` and
        returns the output tensor of the DeepLab v3 model.
      """
      if data_format is None:
        # data_format = (
        #     'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
        pass

      if batch_norm_decay is None:
        batch_norm_decay = _BATCH_NORM_DECAY

      if base_architecture not in ['resnet_v2_50', 'resnet_v2_101']:
        raise ValueError("'base_architrecture' must be either 'resnet_v2_50' or 'resnet_v2_101'.")

      if base_architecture == 'resnet_v2_50':
        base_model = resnet_v2.resnet_v2_50
      else:
        base_model = resnet_v2.resnet_v2_101

      def model(inputs, is_training):
        """Constructs the ResNet model given the inputs."""
        if data_format == 'channels_first':
          # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
          # This provides a large performance boost on GPU. See
          # https://www.tensorflow.org/performance/performance_guide#data_formats
          inputs = tf.transpose(inputs, [0, 3, 1, 2])

        # tf.logging.info('net shape: {}'.format(inputs.shape))

        with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
          logits, end_points = base_model(inputs,
                                          num_classes=None,
                                          is_training=is_training,
                                          global_pool=False,
                                          output_stride=output_stride)

        if is_training:
          exclude = [base_architecture + '/logits', 'global_step']
          variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude)
          tf.train.init_from_checkpoint(pre_trained_model,
                                        {v.name.split(':')[0]: v for v in variables_to_restore})

        inputs_size = tf.shape(inputs)[1:3]
        net = end_points[base_architecture + '/block4']
        net = atrous_spatial_pyramid_pooling(net, output_stride, batch_norm_decay, is_training)
        with tf.variable_scope("upsampling_logits"):
          net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv_1x1')
          logits = tf.image.resize_bilinear(net, inputs_size, name='upsample')

        return logits

      return model

def deeplabv3_model_fn(features, labels, mode, params):
  images = tf.cast(features,tf.uint8)
  #images = tf.cast(
  #    tf.map_fn(preprocessing.mean_image_addition, features),
  #    tf.uint8)

  network = deeplab_v3_generator(params['num_classes'],
                                 params['output_stride'],
                                 params['base_architecture'],
                                 params['pre_trained_model'],
                                 params['batch_norm_decay'])
    
    
  logits = network(features, mode == tf.estimator.ModeKeys.TRAIN)

  pred_classes = tf.expand_dims(tf.argmax(logits, axis=3, output_type=tf.int32), axis=3)

  pred_decoded_labels = tf.py_func(preprocessing.decode_labels,
                                   [pred_classes, params['batch_size'], params['num_classes']],
                                   tf.uint8)

  predictions = {
      'classes': pred_classes,
      'probabilities': tf.nn.softmax(logits, name='softmax_tensor'),
      'decoded_labels': pred_decoded_labels,      
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    # Delete 'decoded_labels' from predictions because custom functions produce error when used with saved_model
    predictions_without_decoded_labels = predictions.copy()
    del predictions_without_decoded_labels['decoded_labels']

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'preds': tf.estimator.export.PredictOutput(
                predictions_without_decoded_labels)
        })

  gt_decoded_labels = tf.py_func(preprocessing.decode_labels,
                                 [labels, params['batch_size'], params['num_classes']], tf.uint8)

  labels = tf.squeeze(labels, axis=3)  # reduce the channel dimension.

  logits_by_num_classes = tf.reshape(logits, [-1, params['num_classes']])
  labels_flat = tf.reshape(labels, [-1, ])

  valid_indices = tf.to_int32(labels_flat <= params['num_classes'] - 1)
  valid_logits = tf.dynamic_partition(logits_by_num_classes, valid_indices, num_partitions=2)[1]
  valid_labels = tf.dynamic_partition(labels_flat, valid_indices, num_partitions=2)[1]

  preds_flat = tf.reshape(pred_classes, [-1, ])
  valid_preds = tf.dynamic_partition(preds_flat, valid_indices, num_partitions=2)[1]
  confusion_matrix = tf.confusion_matrix(valid_labels, valid_preds, num_classes=params['num_classes'])

  predictions['valid_preds'] = valid_preds
  predictions['valid_labels'] = valid_labels
  predictions['confusion_matrix'] = confusion_matrix

  cross_entropy = tf.losses.sparse_softmax_cross_entropy(
      logits=valid_logits, labels=valid_labels)

  # Create a tensor named cross_entropy for logging purposes.
  tf.identity(cross_entropy, name='cross_entropy')
  tf.summary.scalar('cross_entropy', cross_entropy)

  if not params['freeze_batch_norm']:
    train_var_list = [v for v in tf.trainable_variables()]
  else:
    train_var_list = [v for v in tf.trainable_variables()
                      if 'beta' not in v.name and 'gamma' not in v.name]

  # Add weight decay to the loss.
  with tf.variable_scope("total_loss"):
    loss = cross_entropy + params.get('weight_decay', _WEIGHT_DECAY) * tf.add_n(
        [tf.nn.l2_loss(v) for v in train_var_list])
  # loss = tf.losses.get_total_loss()  # obtain the regularization losses as well

  if mode == tf.estimator.ModeKeys.TRAIN:
    tf.summary.image('images', images)
    tf.summary.image('valid_labels',tf.reshape(tf.cast(valid_labels,tf.uint8),(1,256,512,1)))
    tf.summary.image('valid_preds',tf.reshape(tf.cast(valid_preds,tf.uint8),(1,256,512,1)))
                     # Concatenate row-wise.

    global_step = tf.train.get_or_create_global_step()

    if params['learning_rate_policy'] == 'piecewise':
      # Scale the learning rate linearly with the batch size. When the batch size
      # is 128, the learning rate should be 0.1.
      initial_learning_rate = 0.1 * params['batch_size'] / 128
      batches_per_epoch = params['num_train'] / params['batch_size']
      # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
      boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]]
      values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]]
      learning_rate = tf.train.piecewise_constant(
          tf.cast(global_step, tf.int32), boundaries, values)
    elif params['learning_rate_policy'] == 'poly':
      learning_rate = tf.train.polynomial_decay(
          params['initial_learning_rate'],
          tf.cast(global_step, tf.int32) - params['initial_global_step'],
          params['max_iter'], params['end_learning_rate'], power=params['power'])
    else:
      raise ValueError('Learning rate policy must be "piecewise" or "poly"')

    # Create a tensor named learning_rate for logging purposes
    tf.identity(learning_rate, name='learning_rate')
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = tf.train.MomentumOptimizer(
        learning_rate=learning_rate,
        momentum=params['momentum'])

    # Batch norm requires update ops to be added as a dependency to the train_op
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step, var_list=train_var_list)
  else:
    train_op = None

  accuracy = tf.metrics.accuracy(
      valid_labels, valid_preds)
  mean_iou = tf.metrics.mean_iou(valid_labels, valid_preds, params['num_classes'])
  metrics = {'px_accuracy': accuracy, 'mean_iou': mean_iou}

  # Create a tensor named train_accuracy for logging purposes
  tf.identity(accuracy[1], name='train_px_accuracy')
  tf.summary.scalar('train_px_accuracy', accuracy[1])

  def compute_mean_iou(total_cm, name='mean_iou'):
    """Compute the mean intersection-over-union via the confusion matrix."""
    sum_over_row = tf.to_float(tf.reduce_sum(total_cm, 0))
    sum_over_col = tf.to_float(tf.reduce_sum(total_cm, 1))
    cm_diag = tf.to_float(tf.diag_part(total_cm))
    denominator = sum_over_row + sum_over_col - cm_diag

    # The mean is only computed over classes that appear in the
    # label or prediction tensor. If the denominator is 0, we need to
    # ignore the class.
    num_valid_entries = tf.reduce_sum(tf.cast(
        tf.not_equal(denominator, 0), dtype=tf.float32))

    # If the value of the denominator is 0, set it to 1 to avoid
    # zero division.
    denominator = tf.where(
        tf.greater(denominator, 0),
        denominator,
        tf.ones_like(denominator))
    iou = tf.div(cm_diag, denominator)

    for i in range(params['num_classes']):
      tf.identity(iou[i], name='train_iou_class{}'.format(i))
      tf.summary.scalar('train_iou_class{}'.format(i), iou[i])

    # If the number of valid entries is 0 (no classes) we return 0.
    result = tf.where(
        tf.greater(num_valid_entries, 0),
        tf.reduce_sum(iou, name=name) / num_valid_entries,
        0)
    return result

  train_mean_iou = compute_mean_iou(mean_iou[1])

  tf.identity(train_mean_iou, name='train_mean_iou')
  tf.summary.scalar('train_mean_iou', train_mean_iou)

  return tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=predictions,
      loss=loss,
      train_op=train_op,
eval_metric_ops=metrics)

def input_fn(is_training, dataset, batch_size, num_epochs=1):
    """ Input_fn using the tf.data input pipeline for CIFAR-10 dataset.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
  Returns:
    A tuple of images and labels.
  """


    iterator = dataset.make_one_shot_iterator()
    images, labels = iterator.get_next()
    return images, labels
    
    




In [None]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
#with tf.device("/gpu:2"):
#    tf.app.run()

with tf.device('/GPU:2'):
    session_config=tf.ConfigProto(device_count={'GPU': 4})
    # Set up a RunConfig to only save checkpoints once per training cycle.
    run_config = tf.estimator.RunConfig().replace(save_checkpoints_steps=1)
    
    model = tf.estimator.Estimator(
        model_fn=deeplabv3_model_fn,
        config=run_config,
        model_dir='home/rvygon/SiriusCV/deeplab',
        params = {
            'output_stride': 8,
            'batch_size': BATCH_SIZE,
            'base_architecture': 'resnet_v2_101',
            'pre_trained_model': '/home/rvygon/data/DeepLab/resnet_v2_101.ckpt',
            'batch_norm_decay': _BATCH_NORM_DECAY,
            'num_classes': _NUM_CLASSES,
            'tensorboard_images_max_outputs': 6,
            'weight_decay': 2e-4,
            'learning_rate_policy': 'poly',
            'num_train': 1,
            'initial_learning_rate': 7e-3,
            'max_iter': 3000,
            'end_learning_rate': 1e-6,
            'power': _POWER,
            'momentum': _MOMENTUM,
            'freeze_batch_norm': False,
            'initial_global_step': 0
          })

    for _ in range(EPOCHS // EPOCHS_PER_EVAL):
      tensors_to_log = {
        'learning_rate': 'learning_rate',
        'cross_entropy': 'cross_entropy',
        'train_px_accuracy': 'train_px_accuracy',
        'train_mean_iou': 'train_mean_iou',
      }

    dataset = tf.data.Dataset.from_tensor_slices((x_train_data, y_train_data))


    dataset = dataset.repeat().batch(BATCH_SIZE)
    
    
    

    logging_hook = tf.train.LoggingTensorHook(
      tensors=tensors_to_log, every_n_iter=10)
    train_hooks = [logging_hook]
    eval_hooks = None


    tf.logging.info("Start training.")
    model.train(
      input_fn=lambda: input_fn(True, dataset, BATCH_SIZE, EPOCHS_PER_EVAL),
      hooks=train_hooks,
      steps=1  # For debug
    )

    tf.logging.info("Start evaluation.")
    # Evaluate the model and print results
    """eval_results = model.evaluate(
        # Batch size must be 1 for testing because the images' size differs
        input_fn=lambda: input_fn(False, dataset, 1),
        hooks=eval_hooks,
        # steps=1  # For debug
        )
    print(eval_results)"""
    pred = model.predict(input_fn=lambda: input_fn(False, dataset,1))
    imshow(np.squeeze(next(pred)['classes']))




INFO:tensorflow:Using config: {'_model_dir': 'home/rvygon/SiriusCV/deeplab', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 1, '_save_checkpoints_secs': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc45fe9f9b0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Start training.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from home/rvygon/SiriusCV/deeplab/model.ckpt-60
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_ini