## Kaggle Steel Defects - Classifier (good vs has a defect)

Link to competition: https://www.kaggle.com/c/severstal-steel-defect-detection

This notebook was converted from my prior Kaggle notebook.  Migrated to TF 2.x and converted various methods to be more native TF.  

Comp: https://www.kaggle.com/c/severstal-steel-defect-detection/overview

First Place: https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114254

People used his model: https://www.kaggle.com/lightforever/severstal-mlcomp-catalyst-infer-0-90672

https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/106462#latest-634450



## Final Classification Report and metrics from Training images:

Epoch 00010: val_loss improved from 0.15071 to 0.14525, saving model to /content/drive/My Drive/ImageData/KaggleSteelDefects/model-SteelDefects-Classification-V01.h5

563/563 [==============================] - 190s 338ms/step - loss: 0.0559 - accuracy: 0.9811 - val_loss: 0.1453 - val_accuracy: 0.9462 - lr: 5.0000e-06

Accuracy : 0.9461538461538461
Classification Report
              precision    recall  f1-score   support

           0    0.94389   0.94389   0.94389       998
           1    0.94824   0.94824   0.94824      1082

    accuracy                        0.94615      2080
   macro avg    0.94607   0.94607   0.94607      2080
weighted avg    0.94615   0.94615   0.94615      2080




In [None]:
#"""
# Google Collab specific stuff....
from google.colab import drive
drive.mount('/content/drive')

import os
!ls "/content/drive/My Drive"

USING_COLLAB = True
%tensorflow_version 2.x
#"""

In [None]:
# To start, install kaggle libs
#!pip install -q kaggle

# Workaround to install the newest version
# https://stackoverflow.com/questions/58643979/google-colaboratory-use-kaggle-server-version-1-5-6-client-version-1-5-4-fai
!pip install kaggle --upgrade --force-reinstall --no-deps

In [None]:
# Upload your "kaggle.json" file that you created from your Kaggle Account tab
# If you downloaded it, it would be in your "Downloads" directory

from google.colab import files
files.upload()

In [None]:
# On your VM, create kaggle directory and modify access rights

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
#!kaggle competitions list
!kaggle competitions download -c severstal-steel-defect-detection

In [None]:
!unzip -uq severstal-steel-defect-detection.zip 
!ls train_images/a75bb4c01*.*

In [None]:
# Setup sys.path to find MachineLearning lib directory

try: USING_COLLAB
except NameError: USING_COLLAB = False

%load_ext autoreload
%autoreload 2

import sys
if "MachineLearning" in sys.path[0]:
    pass
else:
    print(sys.path)
    if USING_COLLAB:
        sys.path.insert(0, '/content/drive/My Drive/GitHub/MachineLearning/lib')  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    else:
        sys.path.insert(0, '/Users/john/Documents/GitHub/MachineLearning/lib')  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    
    print(sys.path)

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os, sys, random, warnings, time, copy, csv, gc
import numpy as np 

import matplotlib.pyplot as plt
%matplotlib inline

import cv2
from tqdm import tqdm_notebook, tnrange, tqdm
import pandas as pd

import tensorflow as tf
print(tf.__version__)

from tensorflow.keras.models import load_model 

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

AUTOTUNE = tf.data.experimental.AUTOTUNE
print("AUTOTUNE: ", AUTOTUNE)

from TrainingUtils import *

#warnings.filterwarnings("ignore", category=DeprecationWarning)
#warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)

## Examine and understand data


In [None]:
# GLOBALS/CONFIG ITEMS

# Set root directory path to data
if USING_COLLAB:
    #ROOT_PATH = "/content/drive/My Drive/ImageData/KaggleSteelDefects"  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    ROOT_PATH = ""
else:
    ROOT_PATH = "/Users/john/Documents/ImageData/KaggleSteelDefects"  ###### CHANGE FOR SPECIFIC ENVIRONMENT
        
# Establish global dictionary
parms = GlobalParms(MODEL_NAME="model-SteelDefects-Classification-V01.h5",
                    ROOT_PATH=ROOT_PATH,
                    TRAIN_PATH="train_images", 
                    MODEL_PATH="/content/drive/My Drive/ImageData/KaggleSteelDefects",
                    SMALL_RUN=False,
                    NUM_CLASSES=2,
                    CLASS_NAMES=["Good", "Defect"],
                    IMAGE_ROWS=224,
                    IMAGE_COLS=224,
                    IMAGE_CHANNELS=3,
                    BATCH_SIZE=16,
                    EPOCS=20,
                    IMAGE_EXT=".jpg",
                    FINAL_ACTIVATION='sigmoid',
                    LOSS='binary_crossentropy',
                    METRICS=['accuracy'])

parms.print_contents()

In [None]:
# Simple helper method to display batches of images with labels....        
def show_batch(image_batch, label_batch, number_to_show=25, r=5, c=5, print_shape=False):
    show_number = min(number_to_show, parms.BATCH_SIZE)

    if show_number < 8: #if small number, then change row, col and figure size
        if parms.IMAGE_COLS > 64 or parms.IMAGE_ROWS > 64:
            plt.figure(figsize=(25,25)) 
        else:
            plt.figure(figsize=(10,10))  
        r = 4
        c = 2 
    else:
        plt.figure(figsize=(10,10))  

    if show_number == 1:
        image_batch = np.expand_dims(image_batch, axis=0)
        label_batch = np.expand_dims(label_batch, axis=0)

    for n in range(show_number):
        if print_shape:
            print("Image shape: {}  Max: {}  Min: {}".format(image_batch[n].shape, 
                                                             np.max(image_batch[n]), 
                                                             np.min(image_batch[n])))
        ax = plt.subplot(r,c,n+1)
        cmap="gray"
        if len(image_batch[n].shape) == 3:
            if image_batch[n].shape[2] == 3:
                cmap="viridis"
                
        plt.imshow(tf.keras.preprocessing.image.array_to_img(image_batch[n]), cmap=plt.get_cmap(cmap))
        plt.title(parms.CLASS_NAMES[np.argmax(label_batch[n])])
        plt.axis('off')


In [None]:
# Get all file names
image_file_list = load_file_names_Util(parms.TRAIN_PATH,
                                       parms.IMAGE_EXT,
                                       full_file_path=False)
print(image_file_list[:5])

# Create train ALL csv
all_df = pd.DataFrame(image_file_list, columns=["ImageId"])
all_df["ClassId"] = 0
all_df["DefectCount"] = 0
all_df["Label"] = 0
print(all_df.loc[all_df["ImageId"] == "0025bde0c.jpg"])
#all_df.head()

In [None]:
# Load train DEFECT csv
image_defect_df = pd.read_csv(os.path.join(parms.ROOT_PATH, "train.csv"))
image_defect_df = image_defect_df.drop(columns='EncodedPixels')
image_defect_df["DefectCount"] = 1
print(image_defect_df.loc[image_defect_df["ImageId"] == "0025bde0c.jpg"])
#image_defect_df.head()

In [None]:
# Apply group to get the DefectCounts
image_defect_df = image_defect_df.groupby(by=["ImageId"], axis=0).agg('sum')
image_defect_df.reset_index(inplace=True)
image_defect_df["Label"] = 1
print(image_defect_df.loc[image_defect_df["ImageId"] == "0025bde0c.jpg"])
#image_defect_df.head()

In [None]:
# Merge the two df's
all_df = pd.concat([all_df,image_defect_df]).drop_duplicates(['ImageId'],keep='last').sort_values('Label')
all_df.reset_index(inplace=True)
print(all_df.loc[all_df["ImageId"] == "0025bde0c.jpg"])
#all_df.head()

In [None]:
#all_df.loc[all_df["ImageId"] == "0025bde0c.jpg"]
#all_df.loc[all_df["DefectCount"] > 1]
#all_df["ClassId"].value_counts()
#print(len(all_df))
#all_df.head()

In [None]:
print(all_df['DefectCount'].value_counts())

In [None]:
# Select a subset of the iamges

SAMPLES_PER_GROUP = 5000
balanced_all_df = all_df.groupby('DefectCount').apply(lambda x: x.sample(SAMPLES_PER_GROUP) if len(x) > SAMPLES_PER_GROUP else x)
print(balanced_all_df.shape)
print(balanced_all_df["DefectCount"].value_counts())
#balanced_all_df.head()


## Build an input pipeline

In [None]:
# Split train and val, stratify by number of targets

train_df, valid_df = train_test_split(balanced_all_df, 
                                      test_size = 0.2,
                                      stratify = balanced_all_df['DefectCount'])

# Add some more training examples from the sparse examples
print('Original Training len: ', train_df.shape[0], "  Validation len: ", valid_df.shape[0])
add_more_df = train_df.loc[train_df["DefectCount"] > 1]
add_more_df = pd.concat([add_more_df, add_more_df])
train_df = pd.concat([train_df, add_more_df])
train_df.reset_index(drop=True)

train_df = shuffle(train_df) # Shuffle

print('After Adjust, Training len: ', train_df.shape[0], "  Validation len: ", valid_df.shape[0])

In [None]:
# set lengths and steps
train_len = len(train_df)
val_len = len(valid_df)
images_list_len = train_len + val_len

steps_per_epoch = np.ceil(train_len // parms.BATCH_SIZE) # set step sizes based on train & batch
validation_steps = np.ceil(val_len // parms.BATCH_SIZE) # set step sizes based on val & batch

print("Total number: ", images_list_len, "  Train number: ", train_len, "  Val number: ", val_len)
print("Steps/EPOC: ", steps_per_epoch, "  Steps/Validation: ", validation_steps)

In [None]:
# Final look at the distribution since we added more of the sparse cases
print(train_df["DefectCount"].value_counts())
print(valid_df["DefectCount"].value_counts())

In [None]:
#balanced_all_df.describe()
#all_df.head()

### Training and Validation setup

In [None]:
def image_rescale_1_neg_1(image: tf.Tensor) -> tf.Tensor:
    image = tf.image.resize(image, (parms.IMAGE_COLS, parms.IMAGE_ROWS))
    # takes Any scale and converts to 1..-1
    image = (tf.constant(2., dtype=tf.float32)*(image - tf.math.reduce_min(image))/(tf.math.reduce_max(image) - tf.math.reduce_min(image)))-1
    return image

# Read, decode the image, convert to float
def read_decode_image(image_id: tf.Tensor) -> tf.Tensor:
    file_path = parms.TRAIN_PATH + "/" + image_id
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=parms.IMAGE_CHANNELS)
    image = tf.image.convert_image_dtype(image, parms.IMAGE_DTYPE)

    image = tf.image.resize(image, [parms.IMAGE_ROWS, parms.IMAGE_COLS])

    return image

# Augmentations for training dataset, done after cache
def image_aug(image: tf.Tensor) -> tf.Tensor:
    image = tf.image.rot90(image, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)) #0-4, 0/360, 90/180/270

    return image


def label_to_onehot(label: tf.Tensor) -> tf.Tensor:
    return tf.one_hot(label, parms.NUM_CLASSES)

# pre-cache mapped method
def process_train_pre_cache(image_id: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
    image = read_decode_image(image_id)  
    return image, label_to_onehot(label)

# post-cache mapped method, does image augmentation and label to one-hot
def process_train_post_cache(image: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
    image = image_aug(image)
    return image, label

# method mapped to load val
def process_val(image_id: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
    image = read_decode_image(image_id)
    return image, label_to_onehot(label)
  

In [None]:
# Create Dataset from pf
train_dataset = tf.data.Dataset.from_tensor_slices((train_df["ImageId"].values,
                                                    train_df["Label"].values)
                                                  )

# Verify image and label were loaded
for image_id, label in train_dataset.take(2):
    train_image_id = image_id.numpy().decode("utf-8")
    print("Image ID: ", image_id.numpy().decode("utf-8"), "  Label: ", label.numpy())

# map training images to processing, includes any augmentation
train_dataset = train_dataset.map(process_train_pre_cache, num_parallel_calls=AUTOTUNE)

# Verify the mapping worked
for image, label in train_dataset.take(1):
    print("Image shape: {}  Max: {}  Min: {}".format(image.numpy().shape, np.max(image.numpy()), np.min(image.numpy())))
    print("Label: ", label.numpy())
    some_image = image.numpy()
    some_label = label.numpy()

# Remove cache if running under Kaggle
train_dataset = train_dataset.cache("./steel_train2.tfcache") \
                             .map(process_train_post_cache, num_parallel_calls=AUTOTUNE) \
                             .batch(parms.BATCH_SIZE) \
                             .prefetch(1) \
                             .repeat()

# Show the images, execute this cell multiple times to see the images
for image, label in train_dataset.take(1):
    sample_image, sample_label = image, label
show_batch(sample_image, sample_label)

In [None]:
# Double check that training labels and image_id are all good, can use different image_id's
all_df.loc[all_df["ImageId"] == train_image_id]

In [None]:
# Create Dataset from pd
val_dataset = tf.data.Dataset.from_tensor_slices((valid_df["ImageId"].values,
                                                  valid_df["Label"].values)
                                                 )


# Verify image and label were loaded
for image_id, label in val_dataset.take(2):
    val_image_id = image_id.numpy().decode("utf-8")
    print("Image ID: ", image_id.numpy().decode("utf-8"), "  Label: ", label.numpy())

    # map training images to processing, includes any augmentation
val_dataset = val_dataset.map(process_val, num_parallel_calls=AUTOTUNE)

# Verify the mapping worked
for image, label in val_dataset.take(1):
    print("Image shape: {}  Max: {}  Min: {}".format(image.numpy().shape, np.max(image.numpy()), np.min(image.numpy())))
    print("Label: ", label.numpy())
    some_image = image.numpy()
    some_label = label.numpy()

# Remove cache if running under Kaggle
val_dataset = val_dataset.cache("./steel_val.tfcache2") \
                         .batch(parms.BATCH_SIZE) \
                         .prefetch(1) \
                         .repeat()


In [None]:
# Double check that val labels and image_id are all good, can use different image_id's
all_df.loc[all_df["ImageId"] == val_image_id]

In [None]:
# Final check before model training.  Test Validation or Train by changing the dataset

#for image, mask in train_dataset.take(1):
for image, label in val_dataset.take(1):
    show_batch(image, label)  # Will show all of the batch


## Build  model
- add and validate pretrained model as a baseline

In [None]:
# Create any call backs for training...These are the most common.

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger

reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, min_lr=1e-6)
earlystopper = EarlyStopping(patience=6, verbose=1)
checkpointer = ModelCheckpoint(parms.MODEL_PATH, monitor='val_loss', verbose=1, mode="auto", save_best_only=True)


In [None]:
# Create model and compile it

from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, Conv2D, MaxPooling2D, BatchNormalization, UpSampling2D, Conv2DTranspose, Concatenate, Activation
from tensorflow.keras.losses import binary_crossentropy, categorical_crossentropy
from tensorflow.keras.optimizers import Adadelta, Adam, Nadam, SGD
########

# https://www.tensorflow.org/api_docs/python/tf/keras/applications
from tensorflow.keras.applications import MobileNet, imagenet_utils, ResNet50
from tensorflow.keras.layers import Dense,GlobalAveragePooling2D

densenet = tf.keras.applications.DenseNet121(include_top=False, input_shape=(224,224,3))

#mobileNet = tf.keras.applications.mobilenet.MobileNet()
#resNet50 = tf.keras.applications.ResNet50()

# Build and compile model.  I used this model before, did not adjust parms.
# You can change to try different configurations.  (DO percentages, Dense layers, etc)
def build_compile_model():
    model = Sequential()
    model.add(densenet)
    model.add(GlobalAveragePooling2D())

    model.add(Dropout(0.3))
    model.add(Dense(1024,activation='relu'))
    model.add(Dropout(0.4))

    #model.add(Dropout(0.5))
    model.add(Dense(parms.NUM_CLASSES, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=0.00005),  #
        metrics=['accuracy'])
    
    return model



## Train model

In [None]:
# Reload the model from prior run
#model = load_model(parms.MODEL_PATH)


In [None]:
# Train model

model = build_compile_model()

history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=parms.EPOCS, 
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    callbacks=[reduce_lr, earlystopper, checkpointer] 
                    )

In [None]:

# Plot the training history
history_df = pd.DataFrame(history.history)
plt.figure()
history_df[['loss', 'val_loss']].plot(title="Loss")
plt.xlabel('Epocs')
plt.ylabel('Loss')
history_df[['accuracy', 'val_accuracy']].plot(title="Accuracy")
plt.xlabel('Epocs')
plt.ylabel('Accuracy')
plt.show()

## Validate model's predictions
- Create actual_lables and predict_labels
- Calculate Confusion Matrix & Accuracy
- Display results


In [None]:
#Load saved model
model = load_model(parms.MODEL_PATH)

In [None]:
# Use model to generate predicted labels and probabilities

labels, predict_labels, predict_probabilities, bad_results = predictions_using_dataset(model, val_dataset, validation_steps, parms.BATCH_SIZE)
#labels, predict_labels, predict_probabilities, bad_results = predictions_using_dataset(model, val_dataset, 1, parms.BATCH_SIZE)


In [None]:
show_confusion_matrix(labels, predict_labels, parms.CLASS_NAMES)

In [None]:
# Graph the results
display_prediction_results(labels, predict_labels, predict_probabilities, parms.NUM_CLASSES, parms.CLASS_NAMES)


In [None]:
#Create a df from the bad results list, can save as csv or use for further analysis
bad_results_df = pd.DataFrame(bad_results, columns =['actual', 'predict', 'prob', 'image'])
bad_results_df.head()

In [None]:
bad_act, bad_pred, bad_prob, bad_images = zip(*bad_results)


In [None]:
# display images....        
def show_bad_batch(image_batch, bad_act, bad_pred, number_to_show=25):
    plt.figure(figsize=(10,10))
    show_number = number_to_show
    if len(image_batch) < number_to_show:
        show_number = len(image_batch)
      
    for n in range(show_number):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(tf.keras.preprocessing.image.array_to_img(np.squeeze(image_batch[n])))
        #s = parms.CLASS_NAMES[bad_pred[n][0]]
        s = "Act: "+ str(bad_act[n][0]) + " Pred: " + str(bad_pred[n][0])
        plt.title(s)
        plt.axis('off')

In [None]:

show_bad_batch(bad_images, bad_act, bad_pred)