In [1]:
import os
import tensorflow as tf
tf.enable_eager_execution()
assert(tf.executing_eagerly)
import sys
sys.path.append("../")
from dataset_utils.tf_data_loader import ImageInputPipeline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Analyze Ferritic steel dataset

## Analyze train dataset

In [2]:
base_dir = "/mnt/sda/deep_learning/CSE527_FinalProject-master/images/"
train_rel_map = {"images": "train", "labels": "label"}
assert(os.path.isdir(base_dir))

In [3]:
train_input_pipeline = ImageInputPipeline(train_rel_map, ".tif", base_dir, shuffle=False)

Number of examples in dataset:  3940


InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

In [None]:
size = (256, 256)
num_pix_img = size[0] * size[1]
train_ds = train_input_pipeline._input_fn(size=size, batch_size=1, augment=False)

An example of how an image with defect area between 0.1% and 0.14% looks like:

In [None]:
%matplotlib notebook
for img, mask in train_ds:
    if (mask>0).numpy().sum() > 0:
        percent_defect = 100*((mask.numpy()>0).sum())/num_pix_img
        if percent_defect > 0.1 and percent_defect < 0.14:
            print(percent_defect)
            plt.imshow(img[0,...,0], cmap="gray")
            plt.imshow(mask[0,...,0], alpha=0.3)
            plt.figure()
            plt.imshow(img[0,...,0], cmap="gray")
            break

In [None]:
pix_sum_crack = []
pix_sum_ext = []
for img, mask in train_ds:
    pix_sum_crack.append((mask.numpy()==1).sum())
    pix_sum_ext.append((mask.numpy()==2).sum())

In [None]:
train_df = pd.DataFrame(zip(pix_sum_crack, pix_sum_ext), columns=("#pix_crack", "#pix_ext")).replace(0, np.nan)
train_df.head()

In [None]:
train_df["%pix_crack"] = np.where(pd.notna(train_df["#pix_crack"]), 100 * train_df["#pix_crack"]/num_pix_img, 0)
train_df["%pix_crack_na"] = 100 * train_df["#pix_crack"]/num_pix_img
train_df["%pix_ext"] = np.where(pd.notna(train_df["#pix_ext"]), 100 * train_df["#pix_ext"]/num_pix_img, 0)
train_df["%pix_ext_na"] = 100 * train_df["#pix_ext"]/num_pix_img
train_df.head()

The summary below shows that:

- Percentage of pixels with crack in the dataset is 0.06%
- Out of 3940 images, only 211 has crack in it
- If only images with cracks are considered, then the percentage of pixels with crack is 1.21%
- --
- Percentage of pixels with extrusion in the dataset is 2.34%
- Out of 3940 images, only 1328 has extrusion in it
- If only images with extrusions are considered, then the percentage of pixels with extrusion is 6.95%

In [None]:
train_df.describe()

The histogram below shows that 80% of the images with cracks have percentage of cracks in it in the range from 0 to 1.15%

In [None]:
%matplotlib notebook
plt.hist(train_df["%pix_crack_na"], bins=50, cumulative=True)

Plotting histogram in the range 0 to 1.15 % image area for crack

In [None]:
%matplotlib notebook
plt.hist(train_df["%pix_crack_na"], bins=50, cumulative=False, range=(0,1.15))

The histogram below shows that 80% of the images with extrusions have percentage of extrusion area in the image in the range from 0 to 10.75 %

In [None]:
%matplotlib notebook
plt.hist(train_df["%pix_ext_na"], bins=50, cumulative=True)

Plotting histogram in the range 0 to 10.75% image area for extrusions

In [None]:
%matplotlib notebook
plt.hist(train_df["%pix_ext_na"], bins=50, cumulative=False, range=(0,10.75))

## Analyze test dataset

In [None]:
base_dir = "/mnt/sda/deep_learning/CSE527_FinalProject-master/images"
test_rel_map = {"images": "test", "labels": "test_label"}
assert(os.path.isdir(base_dir))

In [None]:
test_input_pipeline = ImageInputPipeline(test_rel_map, ".tif", base_dir, shuffle=False)

In [None]:
size = (256, 256)
num_pix_img = size[0] * size[1]
test_ds = test_input_pipeline._input_fn(size=size, batch_size=1, augment=False)

An example of how an image with defect area between 0.1% and 0.14% looks like:

In [None]:
%matplotlib notebook
for img, mask in test_ds:
    if (mask>0).numpy().sum() > 0:
        percent_defect = 100*((mask.numpy()>0).sum())/num_pix_img
        if percent_defect > 0.1 and percent_defect < 0.14:
            print(percent_defect)
            plt.imshow(img[0,...,0], cmap="gray")
            plt.imshow(mask[0,...,0], alpha=0.3)
            plt.figure()
            plt.imshow(img[0,...,0], cmap="gray")
            break

In [None]:
pix_sum_crack = []
pix_sum_ext = []
for img, mask in test_ds:
    pix_sum_crack.append((mask.numpy()==1).sum())
    pix_sum_ext.append((mask.numpy()==2).sum())

In [None]:
test_df = pd.DataFrame(zip(pix_sum_crack, pix_sum_ext), columns=("#pix_crack", "#pix_ext")).replace(0, np.nan)
test_df.head()

In [None]:
test_df["%pix_crack"] = np.where(pd.notna(test_df["#pix_crack"]), 100 * test_df["#pix_crack"]/num_pix_img, 0)
test_df["%pix_crack_na"] = 100 * test_df["#pix_crack"]/num_pix_img
test_df["%pix_ext"] = np.where(pd.notna(test_df["#pix_ext"]), 100 * test_df["#pix_ext"]/num_pix_img, 0)
test_df["%pix_ext_na"] = 100 * test_df["#pix_ext"]/num_pix_img
test_df.head()

The summary below shows that:

- Percentage of pixels with crack in the dataset is 0.09%
- Out of 860 images, only 60 have cracks in it
- If only images with cracks are considered, then the percentage of pixels with crack is 1.39%
- --
- Percentage of pixels with extrusion in the dataset is 2.21%
- Out of 860 images, only 295 have extrusions in it
- If only images with extrusions are considered, then the percentage of pixels with extrusion is 6.43%

In [None]:
test_df.describe()

The histogram below shows that 80% of the images with cracks have percentage of cracks in it in the range from 0 to 1.2%

In [None]:
%matplotlib notebook
plt.hist(test_df["%pix_crack_na"], bins=50, cumulative=True)

Plotting histogram in the range 0 to 1.2 % image area for crack

In [None]:
%matplotlib notebook
plt.hist(test_df["%pix_crack_na"], bins=50, cumulative=False, range=(0,1.2))

The histogram below shows that 80% of the images with extrusions have percentage of extrusion area in the image in the range from 0 to 10

In [None]:
%matplotlib notebook
plt.hist(test_df["%pix_ext_na"], bins=50, cumulative=True)

Plotting histogram in the range 0 to 10 % image area for extrusions

In [None]:
%matplotlib notebook
plt.hist(test_df["%pix_ext_na"], bins=50, cumulative=False, range=(0,10))