# Project D - Severstal Steel Defect Detection Kaggle Competition 
## Semantic Segmentation of steel defects
### Authors: Utkrisht Rajkumar, Subrato Chakravorty, Chi-Hsin Lo

This is the file used to load the different trained models and perform inference. We post process by removing connected components that are too small.

In [13]:
import os
import json
import gc
import cv2
import keras
from keras import backend as K
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, load_model
from keras.layers import concatenate, Input
from keras.layers import *
from keras.optimizers import Adam
from keras.callbacks import Callback, ModelCheckpoint
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.utils import multi_gpu_model
import numpy as np
import pandas as pd
from tqdm import tqdm
import sys

### Post Process

Connected components with less pixels than min_size are ignored. This value is based on best practices observed in other Kaggle notebooks.

In [7]:
def post_process(probability, threshold=0.5, min_size=3000):
    '''Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored'''
    mask = probability >= 0.5
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((256, 1600), np.float32)
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
    return predictions

### Loss and accuracy metrics

In [None]:
from keras.metrics import binary_crossentropy
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_loss(y_true,y_pred):
    return 1-dice_coef(y_true,y_pred)

def BCE_loss(y_true, y_pred):
    return (binary_crossentropy(y_true, y_pred))

def bce_dice(y_true, y_pred):
    return BCE_loss(y_true, y_pred) + dice_loss(y_true, y_pred)

In [11]:
root_dir = '../input/'

### Load trained model

In [12]:
BATCH_SIZE = 64
model_name = 'unet'
model_path = './models/' + model_name + '.h5'
model = load_model(model_path, custom_objects={'bce_dice': bce_dice, 'dice_coef': dice_coef})
sample_path = root_dir + 'sample_submission.csv'
sub_df = pd.read_csv(sample_path)
sub_df['ImageId'] = sub_df['ImageId_ClassId'].apply(lambda x: x.split('_')[0])
test_imgs = pd.DataFrame(sub_df['ImageId'].unique(), columns=['ImageId'])

### Predict on test images

In [None]:
test_df = []
for i in range(0, test_imgs.shape[0], 300):
    batch_idx = list(range(i, min(test_imgs.shape[0], i + 300)))
    test_generator = DataGenerator(batch_idx, df=test_imgs, shuffle=False,mode='predict',base_path='../input/test_images',
        target_df=sub_df, batch_size=1, n_classes=4)
    batch_pred_masks = model.predict_generator(test_generator, workers=1, verbose=1, use_multiprocessing=False)
    
    for j, b in tqdm(enumerate(batch_idx)):
        filename = test_imgs['ImageId'].iloc[b]
        image_df = sub_df[sub_df['ImageId'] == filename].copy()
        
        pred_masks = batch_pred_masks[j, ].round().astype(int)
        pred_rles = build_rles(pred_masks)
        
        image_df['EncodedPixels'] = pred_rles
        test_df.append(image_df)
    gc.collect()

### Generate submission csv for kaggle

In [None]:
submission_path = './submissions/' + model_name '.csv'
test_df[['ImageId_ClassId', 'EncodedPixels']].to_csv(submission_path, index=False)