In [2]:
import numpy as np, pandas as pd
import keras
from keras import backend as K
from tqdm import tqdm
import cv2
import warnings
warnings.filterwarnings("ignore")

In [5]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, df, batch_size = 10, subset = "train", shuffle = False, preprocess = None):
        super().__init__()
        self.df = df
        self.shuffle = shuffle
        self.subset = subset
        self.batch_size = batch_size
        self.preprocess = preprocess

        
        if (self.subset == "train") or (self.subset == "val"):
            self.data_path = '../input/severstal-steel-defect-detection/train_images/'
        elif self.subset == "test":
            self.data_path = '../input/severstal-steel-defect-detection/test_images/'
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.df) / self.batch_size)) # возвращает количество батчей за эпоху
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.df)) # возвращает одномерный массив с равномерно разнесенными значениями внутри заданного интервала
        if self.shuffle == True:
            np.random.shuffle(self.indexes) # перемешивает входной датасет df каждую эпоху
    
    def augment(self, images, masks):
        data_gen_args = dict(horizontal_flip = True, vertical_flip = True)
        image_datagen = ImageDataGenerator(**data_gen_args)
        mask_datagen = ImageDataGenerator(**data_gen_args)
        seed = random.randint(1, 1000)
        #image_datagen.fit(images, augment=False, rounds=1, seed=seed)
        #mask_datagen.fit(masks, augment=False, rounds=1, seed=seed)
        images_aug = image_datagen.flow(images, seed=seed, batch_size = self.batch_size)[0]
        masks_aug = mask_datagen.flow(masks, seed=seed, batch_size = self.batch_size)[0]
        return images_aug, masks_aug
    
    def augment1(self, img, mask):
        rand = random.randint(1, 1000)
        if(rand > 900): return cv2.flip(img, 0), cv2.flip(mask, 0) #отражает изображение по вертикали
        if(rand < 100): return cv2.flip(img, 1), cv2.flip(mask, 1) #отражает изображение по горизонтали
        else: return img, mask
    
    def __getitem__(self, index): 
        images = np.empty((self.batch_size, 256, 1600, 3), dtype=np.float32) # массив, состоящий из batch_size изображений (256x1600x3)
        masks = np.empty((self.batch_size, 256, 1600, 4), dtype=np.int8) # массив, состоящий из batch_size изображений с масками
        indexes = self.indexes[index * self.batch_size:(index+1) * self.batch_size]
        
        for i, img_id in enumerate(self.df['ImageId'].iloc[indexes]):
            img = cv2.imread(self.data_path + img_id) # цветовым пространством по умолчанию в OpenCV является BGR
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # чтобы исправить это, используется cvtColor(image, flag) и рассмотренный выше флаг
            images[i,] = img.astype(np.float32) / 255.
            if (self.subset == "train") or (self.subset == "val"): 
                for j in range(4):
                    masks[i,:,:,j] = rle2mask(self.df['e' + str(j+1)].iloc[indexes[i]], img.shape) # 4 канала, нулевой канал - дефект первого типа, третий канал - дефект четвертого типа
            if self.subset == "train":
                images[i,], masks[i,] = self.augment1(images[i,], masks[i,])
        if self.subset == 'train':
            #images, masks = self.augment(images, masks)
            return images, masks
        if self.subset == 'val':
            return images, masks
        else: return images # если test

In [6]:
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

In [7]:
from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
from tensorflow.python import ops, math_ops, state_ops, control_flow_ops
from tensorflow.python.keras import backend as K

__all__ = ['RAdam']


class RAdam(OptimizerV2):
    """RAdam optimizer.
    According to the paper
    [On The Variance Of The Adaptive Learning Rate And Beyond](https://arxiv.org/pdf/1908.03265v1.pdf).
    """

    def __init__(self,
                 learning_rate=0.001,
                 beta_1=0.9,
                 beta_2=0.999,
                 epsilon=1e-7,
                 weight_decay=0.,
                 amsgrad=False,
                 total_steps=0,
                 warmup_proportion=0.1,
                 min_lr=0.,
                 name='RAdam',
                 **kwargs):
        r"""Construct a new Adam optimizer.
        Args:
            learning_rate: A Tensor or a floating point value.    The learning rate.
            beta_1: A float value or a constant float tensor. The exponential decay
                rate for the 1st moment estimates.
            beta_2: A float value or a constant float tensor. The exponential decay
                rate for the 2nd moment estimates.
            epsilon: A small constant for numerical stability. This epsilon is
                "epsilon hat" in the Kingma and Ba paper (in the formula just before
                Section 2.1), not the epsilon in Algorithm 1 of the paper.
            weight_decay: A floating point value. Weight decay for each param.
            amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from
                the paper "On the Convergence of Adam and beyond".
            total_steps: An integer. Total number of training steps.
                Enable warmup by setting a positive value.
            warmup_proportion: A floating point value. The proportion of increasing steps.
            min_lr: A floating point value. Minimum learning rate after warmup.
            name: Optional name for the operations created when applying gradients.
                Defaults to "Adam".    @compatibility(eager) When eager execution is
                enabled, `learning_rate`, `beta_1`, `beta_2`, and `epsilon` can each be
                a callable that takes no arguments and returns the actual value to use.
                This can be useful for changing these values across different
                invocations of optimizer functions. @end_compatibility
            **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`,
                `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip
                gradients by value, `decay` is included for backward compatibility to
                allow time inverse decay of learning rate. `lr` is included for backward
                compatibility, recommended to use `learning_rate` instead.
        """

        super(RAdam, self).__init__(name, **kwargs)
        self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
        self._set_hyper('beta_1', beta_1)
        self._set_hyper('beta_2', beta_2)
        self._set_hyper('decay', self._initial_decay)
        self._set_hyper('weight_decay', weight_decay)
        self._set_hyper('total_steps', float(total_steps))
        self._set_hyper('warmup_proportion', warmup_proportion)
        self._set_hyper('min_lr', min_lr)
        self.epsilon = epsilon or K.epsilon()
        self.amsgrad = amsgrad
        self._initial_weight_decay = weight_decay
        self._initial_total_steps = total_steps

    def _create_slots(self, var_list):
        for var in var_list:
            self.add_slot(var, 'm')
        for var in var_list:
            self.add_slot(var, 'v')
        if self.amsgrad:
            for var in var_list:
                self.add_slot(var, 'vhat')

    def set_weights(self, weights):
        params = self.weights
        num_vars = int((len(params) - 1) / 2)
        if len(weights) == 3 * num_vars + 1:
            weights = weights[:len(params)]
        super(RAdam, self).set_weights(weights)

    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = self._get_hyper('beta_1', var_dtype)
        beta_2_t = self._get_hyper('beta_2', var_dtype)
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)

        if self._initial_total_steps > 0:
            total_steps = self._get_hyper('total_steps', var_dtype)
            warmup_steps = total_steps * self._get_hyper('warmup_proportion', var_dtype)
            min_lr = self._get_hyper('min_lr', var_dtype)
            decay_steps = K.maximum(total_steps - warmup_steps, 1)
            decay_rate = (min_lr - lr_t) / decay_steps
            lr_t = tf.where(
                local_step <= warmup_steps,
                lr_t * (local_step / warmup_steps),
                lr_t + decay_rate * K.minimum(local_step - warmup_steps, decay_steps),
            )

        sma_inf = 2.0 / (1.0 - beta_2_t) - 1.0
        sma_t = sma_inf - 2.0 * local_step * beta_2_power / (1.0 - beta_2_power)

        m_t = state_ops.assign(m,
                               beta_1_t * m + (1.0 - beta_1_t) * grad,
                               use_locking=self._use_locking)
        m_corr_t = m_t / (1.0 - beta_1_power)

        v_t = state_ops.assign(v,
                               beta_2_t * v + (1.0 - beta_2_t) * math_ops.square(grad),
                               use_locking=self._use_locking)
        if self.amsgrad:
            vhat = self.get_slot(var, 'vhat')
            vhat_t = state_ops.assign(vhat,
                                      math_ops.maximum(vhat, v_t),
                                      use_locking=self._use_locking)
            v_corr_t = math_ops.sqrt(vhat_t / (1.0 - beta_2_power))
        else:
            vhat_t = None
            v_corr_t = math_ops.sqrt(v_t / (1.0 - beta_2_power))

        r_t = math_ops.sqrt((sma_t - 4.0) / (sma_inf - 4.0) *
                            (sma_t - 2.0) / (sma_inf - 2.0) *
                            sma_inf / sma_t)

        var_t = tf.where(sma_t >= 5.0, r_t * m_corr_t / (v_corr_t + epsilon_t), m_corr_t)

        if self._initial_weight_decay > 0.0:
            var_t += self._get_hyper('weight_decay', var_dtype) * var

        var_update = state_ops.assign_sub(var,
                                          lr_t * var_t,
                                          use_locking=self._use_locking)

        updates = [var_update, m_t, v_t]
        if self.amsgrad:
            updates.append(vhat_t)
        return control_flow_ops.group(*updates)

    def _resource_apply_sparse(self, grad, var, indices):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        beta_1_t = self._get_hyper('beta_1', var_dtype)
        beta_2_t = self._get_hyper('beta_2', var_dtype)
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)

        if self._initial_total_steps > 0:
            total_steps = self._get_hyper('total_steps', var_dtype)
            warmup_steps = total_steps * self._get_hyper('warmup_proportion', var_dtype)
            min_lr = self._get_hyper('min_lr', var_dtype)
            decay_steps = K.maximum(total_steps - warmup_steps, 1)
            decay_rate = (min_lr - lr_t) / decay_steps
            lr_t = tf.where(
                local_step <= warmup_steps,
                lr_t * (local_step / warmup_steps),
                lr_t + decay_rate * K.minimum(local_step - warmup_steps, decay_steps),
            )

        sma_inf = 2.0 / (1.0 - beta_2_t) - 1.0
        sma_t = sma_inf - 2.0 * local_step * beta_2_power / (1.0 - beta_2_power)

        m = self.get_slot(var, 'm')
        m_scaled_g_values = grad * (1 - beta_1_t)
        m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
        m_corr_t = m_t / (1.0 - beta_1_power)

        v = self.get_slot(var, 'v')
        v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
        v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
        with ops.control_dependencies([v_t]):
            v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)

        if self.amsgrad:
            vhat = self.get_slot(var, 'vhat')
            vhat_t = state_ops.assign(vhat,
                                      math_ops.maximum(vhat, v_t),
                                      use_locking=self._use_locking)
            v_corr_t = math_ops.sqrt(vhat_t / (1.0 - beta_2_power))
        else:
            vhat_t = None
            v_corr_t = math_ops.sqrt(v_t / (1.0 - beta_2_power))

        r_t = math_ops.sqrt((sma_t - 4.0) / (sma_inf - 4.0) *
                            (sma_t - 2.0) / (sma_inf - 2.0) *
                            sma_inf / sma_t)

        var_t = tf.where(sma_t >= 5.0, r_t * m_corr_t / (v_corr_t + epsilon_t), m_corr_t)

        if self._initial_weight_decay > 0.0:
            var_t += self._get_hyper('weight_decay', var_dtype) * var

        var_update = self._resource_scatter_add(var, indices, tf.gather(-lr_t * var_t, indices))

        updates = [var_update, m_t, v_t]
        if self.amsgrad:
            updates.append(vhat_t)
        return control_flow_ops.group(*updates)

    def get_config(self):
        config = super(RAdam, self).get_config()
        config.update({
            'learning_rate': self._serialize_hyperparameter('learning_rate'),
            'beta_1': self._serialize_hyperparameter('beta_1'),
            'beta_2': self._serialize_hyperparameter('beta_2'),
            'decay': self._serialize_hyperparameter('decay'),
            'weight_decay': self._serialize_hyperparameter('weight_decay'),
            'epsilon': self.epsilon,
            'amsgrad': self.amsgrad,
            'total_steps': self._serialize_hyperparameter('total_steps'),
            'warmup_proportion': self._serialize_hyperparameter('warmup_proportion'),
            'min_lr': self._serialize_hyperparameter('min_lr'),
        })
        return config

In [8]:
def jaccard_distance_loss(y_true, y_pred, smooth = 1):
    """
    Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
            = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
    
    The jaccard distance loss is usefull for unbalanced datasets. This has been
    shifted so it converges on 0 and is smoothed to avoid exploding or disapearing
    gradient.
    
    Ref: https://en.wikipedia.org/wiki/Jaccard_index
    
    @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96
    @author: wassname
    """
    #tf.to_float = lambda x: tf.cast(x, tf.float32)
    #y_pred = tf.to_float(y_pred > 0.45)
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return (1 - jac) * smooth

In [9]:
def Dice_Coef(y_true, y_pred, smooth = 1):
    
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    
    intersection = K.sum(y_true_f * y_pred_f)
    
    return (2*intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def Dice_Loss(y_true, y_pred):
    return 1.0 - Dice_Coef(y_true, y_pred)

def bce_dice_loss(y_true, y_pred):
    return keras.losses.binary_crossentropy(y_true, y_pred) + Dice_Loss(y_true, y_pred)

def wbce_dice_loss(y_true, y_pred):
    return weighted_bce()(y_true, y_pred) + Dice_Loss(y_true, y_pred)

def weighted_bce(weight = 0.6):
    
    def convert_2_logits(y_pred):
        y_pred = tf.clip_by_value(y_pred, K.epsilon(), 1 - K.epsilon())
        return tf.math.log(y_pred / (1-y_pred))
    
    def weighted_binary_crossentropy(y_true, y_pred):
        y_pred = convert_2_logits(y_pred)
        loss = tf.nn.weighted_cross_entropy_with_logits(logits = y_pred, labels = y_true, pos_weight = weight)
        return loss
    
    return weighted_binary_crossentropy

In [10]:
from keras.models import load_model
model = load_model('../input/pre-model10/256_resnet34 0.908.h5', custom_objects = {'RAdam': RAdam, 'dice_coef': dice_coef, 'wbce_dice_loss': wbce_dice_loss})

In [11]:
test = pd.read_csv('../input/severstal-steel-defect-detection/sample_submission.csv')

In [16]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [17]:
test_batches = DataGenerator(test, subset = 'test', batch_size = 2)
batch_size = 2

In [31]:
predictions = []
#vanish = [5*700, 5*1300, 2*1000, 4*1300]
vanish = [3000, 10000, 2000, 2*3000]
THRESHOLD = 0.45
sum = [0] * 4
for i, batch in enumerate(tqdm(test_batches)): # index, (batch_size, 256, 1600, 3)
    preds = model.predict_generator(batch) # массив из batch_size предсказаний, shape = (batch_size, 256, 1600, 4)
    for k, img in enumerate(batch): # прохожу по каждому изображению из батча
        mask = preds[k]
        mask[mask >= THRESHOLD] = 1
        mask[mask < THRESHOLD] = 0
        for cls in range(4): # прохожу по каждому из 4 классов дефектов
            sum[cls] = sum[cls] + np.sum(mask[:,:,cls])
            if np.sum(mask[:,:,cls]) <= vanish[cls]:
                mask[:,:,cls] = 0
            rle = mask2rle(mask[:,:,cls])
            name = test['ImageId'].iloc[i * batch_size + k] + f"_{cls + 1}"
            predictions.append([name, rle])

df = pd.DataFrame(predictions, columns=['ImageId_ClassId', 'EncodedPixels'])
df.to_csv("submission.csv", index=False)

100%|██████████| 2753/2753 [05:17<00:00,  8.67it/s]


In [32]:
df1 = df[df['EncodedPixels'] != '']
sum1 = 4 * [0]
for i in range(df1.shape[0]):
    if int(df1['ImageId_ClassId'].iloc[i].split('_')[1]) == 1:
        sum1[0] = sum1[0] + 1
    if int(df1['ImageId_ClassId'].iloc[i].split('_')[1]) == 2:
        sum1[1] = sum1[1] + 1
    if int(df1['ImageId_ClassId'].iloc[i].split('_')[1]) == 3:
        sum1[2] = sum1[2] + 1
    if int(df1['ImageId_ClassId'].iloc[i].split('_')[1]) == 4:
        sum1[3] = sum1[3] + 1
print(sum1)
for i in range(4):
    print(sum[i] / sum1[i])

[55, 0, 1255, 307]
22727.2
inf
23572.46374501992
33384.84364820847
