## Sparse - Kaggle Steel Defects - Segmentation (locate and identify defects)

Link to competition: https://www.kaggle.com/c/severstal-steel-defect-detection

This notebook was converted from prior Kaggle work.  Migrated to TF 2.x and converted various methods to be more native TF.  This was something I played with to see if using Sparse labels could help with the memory requirements and improve training.  It did help with memory, but training was similar to the other segmentation notebook.  I did not use this for the final merge of segmentation and classification.

- Pre-trained model is from Pavel Yakubovshiy, (https://github.com/qubvel/segmentation_models) 

Final dice_coef score from Training images:  0.6055773677232597

In [None]:
#"""
# Google Collab specific stuff....
from google.colab import drive
drive.mount('/content/drive')

import os
!ls "/content/drive/My Drive"

USING_COLLAB = True
%tensorflow_version 2.x
#"""

In [None]:
# To start, install kaggle libs
#!pip install -q kaggle

# Workaround to install the newest version
# https://stackoverflow.com/questions/58643979/google-colaboratory-use-kaggle-server-version-1-5-6-client-version-1-5-4-fai
!pip install kaggle --upgrade --force-reinstall --no-deps

In [None]:
# Upload your "kaggle.json" file that you created from your Kaggle Account tab
# If you downloaded it, it would be in your "Downloads" directory

from google.colab import files
files.upload()

In [None]:
# On your VM, create kaggle directory and modify access rights

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
#!kaggle competitions list
!kaggle competitions download -c severstal-steel-defect-detection

In [None]:
!unzip -uq severstal-steel-defect-detection.zip 
!ls train_images/a75bb4c01*.*

In [None]:
# Cleanup to add some space....
!rm -r test_images
!rm severstal-steel-defect-detection.zip

In [None]:
# Setup sys.path to find MachineLearning lib directory

try: USING_COLLAB
except NameError: USING_COLLAB = False

%load_ext autoreload
%autoreload 2

import sys
if "MachineLearning" in sys.path[0]:
    pass
else:
    print(sys.path)
    if USING_COLLAB:
        sys.path.insert(0, '/content/drive/My Drive/GitHub/MachineLearning/lib')  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    else:
        sys.path.insert(0, '/Users/john/Documents/GitHub/MachineLearning/lib')  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    
    print(sys.path)

In [None]:
#%reload_ext autoreload


In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os, sys, random, warnings, time, copy, csv, gc
import numpy as np 

import matplotlib.pyplot as plt
%matplotlib inline

import cv2
from tqdm import tqdm_notebook, tnrange, tqdm
import pandas as pd

import tensorflow as tf
print(tf.__version__)

from tensorflow.keras.models import load_model 

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

AUTOTUNE = tf.data.experimental.AUTOTUNE
print("AUTOTUNE: ", AUTOTUNE)

from TrainingUtils import *
from losses_and_metrics.Losses_Babakhin import make_loss, Kaggle_IoU_Precision, dice_coef_loss_bce

#warnings.filterwarnings("ignore", category=DeprecationWarning)
#warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)

## Examine and understand data


In [None]:
# GLOBALS/CONFIG ITEMS

# Set root directory path to data
if USING_COLLAB:
    #ROOT_PATH = "/content/drive/My Drive/ImageData/KaggleSteelDefects"  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    ROOT_PATH = ""
    MODEL_PATH= "/content/drive/My Drive/ImageData/KaggleSteelDefects"  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    
else:
    ROOT_PATH = "/Users/john/Documents/ImageData/KaggleSteelDefects"  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    MODEL_PATH= "/Users/john/Documents/ImageData/KaggleSteelDefects"  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    
# Establish global dictionary
parms = GlobalParms(MODEL_NAME="model-SteelDefects-Sparse-Segmentation-V01.h5",
                    ROOT_PATH=ROOT_PATH,

                    TRAIN_PATH="train_images", 
                    MODEL_PATH=MODEL_PATH,
                    SMALL_RUN=False,
                    NUM_CLASSES=5,
                    CLASS_NAMES=["Outside", "1", "2", "3", "4"],
                    IMAGE_ROWS=256,
                    IMAGE_COLS=800,
                    IMAGE_CHANNELS=3,
                    BATCH_SIZE=16,
                    EPOCS=20,
                    IMAGE_EXT=".jpg",
                    FINAL_ACTIVATION='sigmoid',
                    LOSS=tf.keras.losses.BinaryCrossentropy(from_logits=True))

# Other globals...
ORIG_MASK_SHAPE = (256, 1600)

parms.print_contents()

In [None]:
# Simple helper method to display batches of images with labels....  

def show_image_masks(image_in, masks_in):
    if tf.is_tensor(image_in):
        image = image_in.numpy()
        masks = masks_in.numpy()
    else:
        image = image_in
        masks = masks_in

    #print(image.shape, masks.shape)

    # cv2.polylines and cv2.findContours display better when range is 0-255
    # https://docs.opencv.org/2.4/modules/core/doc/drawing_functions.html
    image = image * 255
    palet = [(100, 100, 100), (249, 192, 12), (0, 185, 241), (114, 0, 218), (249,50,12)]
    title = "Labels: "
    fig, ax = plt.subplots(1,1,figsize=(20,10))
      
    for j in range(1, parms.NUM_CLASSES):
        #msk = np.ascontiguousarray(masks[:, :, j], dtype=np.uint8)
        mask =  np.where(masks == j, 1, 0)
        mask = mask.astype(np.uint8)
        #print(mask.shape, mask.dtype)
        if np.count_nonzero(mask) > 0:
            title = title + str(j+1) + ",  "
            contours, _ = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
            for i in range(0, len(contours)):
                cv2.polylines(image, contours[i], True, palet[j], 2) 

    title = title[:-3]  
    ax.set_title(title)

    #ax.imshow(tf.keras.preprocessing.image.array_to_img(image), cmap=plt.get_cmap('gray'))
    #print(image.shape, image.dtype, np.max(image), np.min(image))
    ax.imshow(image/255, cmap=plt.get_cmap('gray'))


def show_batch_image_masks(image, masks):
    for i in range(len(image)):
        show_image_masks(image[i], masks[i])

# Helper methods to create mask's or rle's
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle2mask(rle, input_shape):
    width, height = input_shape[:2]
    
    mask= np.zeros( width*height ).astype(np.uint8)
    
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1
        current_position += lengths[index]
        
    return mask.reshape(height, width).T

def build_masksORIG(rles, input_shape):
    depth = len(rles)
    masks = np.zeros((*input_shape, depth))
    
    for i, rle in enumerate(rles):
        if type(rle) is str:
            masks[:, :, i] = rle2mask(rle, input_shape)
    
    return masks

def build_masks2(rles, input_shape):
    depth = len(rles)
    masks = np.zeros((*input_shape,1))
    #sparse_mask = np.zeros((*input_shape,1))
    sparse_mask = np.zeros((parms.IMAGE_ROWS, parms.IMAGE_COLS, 1))
    
    for i, rle in enumerate(rles):
        if type(rle) is str:
            masks = rle2mask(rle, input_shape)
            masks = masks.reshape((*input_shape, 1))
            masks = np.resize(masks, (parms.IMAGE_ROWS, parms.IMAGE_COLS, 1))

            sparse_mask = np.where(masks > 0.5, i+1, sparse_mask)
            #sparse_mask = np.where(masks == 1, i+1, sparse_mask)

    return sparse_mask

def build_masks(rles, input_shape):
    depth = len(rles)
    masks = np.zeros((*input_shape,1))
    sparse_mask = np.zeros((*input_shape,1))
    
    for i, rle in enumerate(rles):
        if type(rle) is str:
            masks = rle2mask(rle, input_shape)
            masks = masks.reshape((*input_shape, 1))
            #masks = np.resize(masks, (parms.IMAGE_ROWS, parms.IMAGE_COLS, 1))

            #sparse_mask = np.where(masks > 0.5, i+1, sparse_mask)
            sparse_mask = np.where(masks == 1, i+1, sparse_mask)

    return sparse_mask

def build_rles(masks):
    width, height, depth = masks.shape
    
    rles = [mask2rle(masks[:, :, i])
            for i in range(depth)]
    
    return rles


In [None]:
# Load train DEFECT csv
image_defect_df = pd.read_csv(os.path.join(parms.ROOT_PATH, "train.csv"))

# Load image file sizes for possible stratification usage
image_defect_df['ImageSize'] = image_defect_df['ImageId'].map(lambda image_id: round(os.stat(os.path.join(parms.TRAIN_PATH, image_id)).st_size))
#image_defect_df['ImageSize'] = 50

print(image_defect_df.loc[image_defect_df["ImageId"] == "0025bde0c.jpg"])
image_defect_df.head()

In [None]:
# Stratifing by image_size, my prior notebook used the number of white pixels, this was easier and gave a better spread
image_defect_df_cut = pd.cut(image_defect_df["ImageSize"], bins=[0, 85000, 104000, 115000, 1000000]) 
ax = image_defect_df_cut.value_counts(sort=False).plot.bar(rot=0, color="b", figsize=(20,6)) 
plt.show() 

In [None]:
# Apply method to create the group number
def group_by_image_size(x):
    #[0, 85000, 104000, 115000, 1000000])
    if x < 85000:
        return 0
    elif x < 104000:
        return 1
    elif x < 115000:
        return 2
    else:
        return 3

image_defect_df['ImageGroup'] = image_defect_df['ImageSize'].apply(group_by_image_size)
image_defect_df.head()


In [None]:
# Select a balanced subset for training
SAMPLES_PER_GROUP = 200000
balanced_train_df = image_defect_df.groupby('ImageGroup').apply(lambda x: x.sample(SAMPLES_PER_GROUP) if len(x) > SAMPLES_PER_GROUP else x)
balanced_train_df['ImageGroup'].hist(bins=balanced_train_df['ImageGroup'].max()+1)
print(balanced_train_df.shape[0], 'ImageGroup')


## Build an input pipeline

In [None]:
# Split train and val, stratify by number of targets

train_df, valid_df = train_test_split(balanced_train_df, 
                                      test_size = 0.2,
                                      stratify = balanced_train_df['ImageGroup'])

# Add some more training examples from the sparse examples
#print('Original Training len: ', train_df.shape[0], "  Validation len: ", valid_df.shape[0])
#add_more_df = train_df.loc[train_df["DefectCount"] > 1]
#add_more_df = pd.concat([add_more_df, add_more_df])
#train_df = pd.concat([train_df, add_more_df])
#train_df.reset_index(drop=True)

train_df = shuffle(train_df) # Shuffle

print('After Adjust, Training len: ', train_df.shape[0], "  Validation len: ", valid_df.shape[0])

In [None]:
# set lengths and steps
train_len = len(train_df)
val_len = len(valid_df)
images_list_len = train_len + val_len

steps_per_epoch = np.ceil(train_len // parms.BATCH_SIZE) # set step sizes based on train & batch
validation_steps = np.ceil(val_len // parms.BATCH_SIZE) # set step sizes based on val & batch

print("Total number: ", images_list_len, "  Train number: ", train_len, "  Val number: ", val_len)
print("Steps/EPOC: ", steps_per_epoch, "  Steps/Validation: ", validation_steps)

In [None]:
# Final look at the distribution since we added more of the sparse cases
print(train_df["ImageGroup"].value_counts())
print(valid_df["ImageGroup"].value_counts())

### Training and Validation setup

In [None]:
# Read, decode the image, convert to float
def read_decode_image(image_id: tf.Tensor) -> tf.Tensor:
    file_path = parms.TRAIN_PATH + "/" + image_id
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=parms.IMAGE_CHANNELS)
    image = tf.image.convert_image_dtype(image, parms.IMAGE_DTYPE)
    return image

# Build mask(s) from rles
def load_masks(image_id_in: tf.Tensor) -> tf.Tensor:
    image_id = image_id_in.numpy().decode("utf-8")
    image_df = image_defect_df.loc[image_defect_df['ImageId'] == image_id]
    #print("df ", image_id, image_df)

    rles = [None] * parms.NUM_CLASSES # Create blank list
    for i, image_row in image_df.iterrows():
        indx = int(image_row["ClassId"]) - 1
        #print("row ", indx, image_row)
        rles[indx] = image_row["EncodedPixels"] # Fill in any encoded masks
        
    masks = build_masks(rles, input_shape=ORIG_MASK_SHAPE)

    return masks

# Augmentations for training dataset, done after cache
def image_aug(image: tf.Tensor, masks: tf.Tensor) -> tf.Tensor:
    # Must use custom precent, random.uniform, because both image and mask must match
    
    if tf.random.uniform(()) > 0.5:
        image = tf.image.flip_left_right(image)
        masks = tf.image.flip_left_right(masks)
        #for i in range(parms.NUM_CLASSES):
        #    masks[:, :, i] = tf.image.flip_left_right(masks[:, :, i])
        
    if tf.random.uniform(()) > 0.5:
        image = tf.image.flip_up_down(image)
        masks = tf.image.flip_up_down(masks)
        #for i in range(parms.NUM_CLASSES):
        #    masks[:, :, i] = tf.image.flip_up_down(masks[:, :, i])

    return image, masks

# pre-cache mapped method to load image and masks
def process_load_image_masks(image_id: tf.Tensor) -> tf.Tensor:
    image = read_decode_image(image_id)  

    [masks,] = tf.py_function(load_masks, [image_id], [tf.int32])  #parms must be tensors
    #masks.set_shape((*ORIG_MASK_SHAPE, parms.NUM_CLASSES))
    masks.set_shape((parms.IMAGE_ROWS, parms.IMAGE_COLS, 1))
    #masks.set_shape((*ORIG_MASK_SHAPE, 1))
    
    image = tf.image.resize(image, [parms.IMAGE_ROWS, parms.IMAGE_COLS])
    masks = tf.image.resize(masks, [parms.IMAGE_ROWS, parms.IMAGE_COLS])
    return image, masks

# post-cache mapped method, does image augmentation
def process_train_post_cache(image: tf.Tensor, masks: tf.Tensor) -> tf.Tensor:
    image, masks = image_aug(image, masks)
    return image, masks


In [None]:
# Create Dataset from pf
train_dataset = tf.data.Dataset.from_tensor_slices(train_df["ImageId"].values)
                                               
# Verify image and label were loaded
for image_id in train_dataset.take(2):
    train_image_id = image_id.numpy().decode("utf-8")
    print("Image ID: ", image_id.numpy().decode("utf-8"))

# map training images to processing, includes any augmentation
train_dataset = train_dataset.map(process_load_image_masks, num_parallel_calls=AUTOTUNE)

# Verify the mapping worked
for image, masks in train_dataset.take(1):
    print("Image shape: {}  Max: {}  Min: {}".format(image.numpy().shape, np.max(image.numpy()), np.min(image.numpy())))
    print("Masks shape: {}  Max: {}  Min: {}".format(masks.numpy().shape, np.max(masks.numpy()), np.min(masks.numpy())))
    some_image = image.numpy()
    some_masks = masks.numpy()

# Remove cache if running under Kaggle
train_dataset = train_dataset.cache("./steel_train_seg2.tfcache") \
#train_dataset = train_dataset \
                             .map(process_train_post_cache, num_parallel_calls=AUTOTUNE) \
                             .batch(parms.BATCH_SIZE) \
                             .prefetch(1) \
                             .repeat()

# Uncomment to show the batch of images, execute this cell multiple times to see the images
for batch_image, batch_masks in train_dataset.take(1):
    show_batch_image_masks(batch_image, batch_masks)

show_batch_image_masks([some_image], [some_masks])

In [None]:
# Double check that training labels and image_id are all good, can use different image_id's
image_defect_df.loc[image_defect_df["ImageId"] == train_image_id]
#image_defect_df.loc[image_defect_df["ImageId"] == "3604dfc38.jpg"]

In [None]:
# Create Dataset from pd
val_dataset = tf.data.Dataset.from_tensor_slices(valid_df["ImageId"].values)


# Verify image and label were loaded
for image_id in val_dataset.take(2):
    val_image_id = image_id.numpy().decode("utf-8")
    print("Image ID: ", image_id.numpy().decode("utf-8"))

    # map training images to processing, includes any augmentation
val_dataset = val_dataset.map(process_load_image_masks, num_parallel_calls=AUTOTUNE)

# Verify the mapping worked
for image, masks in val_dataset.take(1):
    print("Image shape: {}  Max: {}  Min: {}".format(image.numpy().shape, np.max(image.numpy()), np.min(image.numpy())))
    print("Masks shape: {}  Max: {}  Min: {}".format(masks.numpy().shape, np.max(masks.numpy()), np.min(masks.numpy())))
    some_image = image
    some_masks = masks

# Remove cache if running under Kaggle
val_dataset = val_dataset.cache("./steel_val_seg2.tfcache2") \
                         .batch(parms.BATCH_SIZE) \
                         .prefetch(1) \
                         .repeat()


In [None]:
# Double check that val labels and image_id are all good, can use different image_id's
image_defect_df.loc[image_defect_df["ImageId"] == val_image_id]

In [None]:
# Final check before model training.  Test Validation or Train by changing the dataset

#for batch_image, batch_masks in train_dataset.take(1):
for batch_image, batch_masks in val_dataset.take(1):  
    show_batch_image_masks(batch_image, batch_masks)
    
show_batch_image_masks([some_image], [some_masks])

## Build  model
- add and validate pretrained model as a baseline

In [None]:
# Create any call backs for training...These are the most common.

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger

#reduce_lr = ReduceLROnPlateau(monitor='val_dice_coe', patience=3, verbose=1, min_lr=1e-6)
#earlystopper = EarlyStopping(patience=6, verbose=1)
#checkpointer = ModelCheckpoint(parms.MODEL_PATH, monitor='val_dice_coe', verbose=1, mode="max", save_best_only=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, min_lr=1e-6)
earlystopper = EarlyStopping(patience=6, verbose=1)
checkpointer = ModelCheckpoint(parms.MODEL_PATH, monitor='val_loss', verbose=1, mode="min", save_best_only=True)


In [None]:
# Create model and compile it

from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, Conv2D, MaxPooling2D, BatchNormalization, UpSampling2D, Conv2DTranspose, Concatenate, Activation
from tensorflow.keras.losses import binary_crossentropy, categorical_crossentropy
from tensorflow.keras.optimizers import Adadelta, Adam, Nadam, SGD
########

# https://lars76.github.io/neural-networks/object-detection/losses-for-segmentation/
def combo_loss(y_true, y_pred):
    def dice_loss(y_true, y_pred):
        numerator = 2 * tf.reduce_sum(y_true * y_pred, axis=(1,2,3))
        denominator = tf.reduce_sum(y_true + y_pred, axis=(1,2,3))
        return tf.reshape(1 - numerator / denominator, (-1, 1, 1))
    return tf.keras.losses.binary_crossentropy(y_true, y_pred, from_logits=True) + dice_loss(y_true, y_pred)

K = tf.keras.backend

def dice_coef(y_true, y_pred, smooth=1.0):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / \
           (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def compile_model(parms, model):
# sample_weights = tf.convert_to_tensor([0.1, .225, .225, .225, .225])

    model.compile(
        #loss='binary_crossentropy',
        #loss=combo_loss,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=Adam(lr=0.00005),  #
        metrics=['accuracy'])
    
    return model



## Train model

In [None]:
# train from empty seg model, comment out if loading existing model
STARTING_MODEL_PATH = "/content/drive/My Drive/GitHub/MachineLearning/2-KaggleSteelDefects/segmodel-256-800-c5-V01.h5"
model = load_model(STARTING_MODEL_PATH)
print("Loaded: ", STARTING_MODEL_PATH)

model = compile_model(parms, model)

In [None]:
# Train model

history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=parms.EPOCS, 
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    callbacks=[reduce_lr, earlystopper, checkpointer] 
                    )
