In [1]:
import numpy as np
import glob
import skimage.io as io
import os.path
import tensorflow as tf

In [3]:
def fimg_to_fmask(img_path):
    # convert an image file path into a corresponding mask file path 
    dirname, basename = os.path.split(img_path)
    maskname = basename.replace(".tif", "_mask.tif")
    return os.path.join(dirname, maskname)

In [4]:
origin_images_subset = [img for img in glob.glob("train_subset/*.tif") if 'mask' not in img]
paired_images_subset = [(img, fimg_to_fmask(img)) for img in origin_images_subset]
print("number of image segmentation pairs: ", len(paired_images_subset))

number of image segmentation pairs:  599


In [5]:
origin_images_full = [img for img in glob.glob("train/*.tif") if 'mask' not in img]
paired_images_full = [(img, fimg_to_fmask(img)) for img in origin_images_full]
print("number of image segmentation pairs: ", len(paired_images_full))

number of image segmentation pairs:  5635


In [None]:
%matplotlib inline
# check an image instance
img = io.imread('train_subset/1_1.tif')
mask =io.imread('train_subset/1_1_mask.tif')
print(type(img))
print(img.shape)
io.imshow(img)
io.show()
io.imshow(mask)
io.show()

In [6]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

def image_augmentation(img, save_dir):
    datagen = ImageDataGenerator(
            rotation_range=180,
            horizontal_flip=True,
            fill_mode='nearest')
    img = np.expand_dims(img, 0)
    img = np.expand_dims(img, -1)
    
    i = 0
    for batch in datagen.flow(img, batch_size=1,
                          save_to_dir=save_dir, save_prefix="mask", save_format='jpeg'):
        i += 1
        if i > 10:
            break

Using TensorFlow backend.


In [5]:
def images_split(paired_images, full=False, train=True):
    
    fcn_img = "data_fcn"
    fcn_mask = "data_fcn"
    simple_cnn_img = "data_simple_cnn"
    simple_cnn_mask = "data_simple_cnn"
    
    if full:
        fcn_img += "_full"
        fcn_mask += "_full"
        simple_cnn_img += "_full"
        simple_cnn_mask += "_full"
    
    if train:
        fcn_img += "/train/images/images/"
        fcn_mask += "/train/masks/masks/"
        simple_cnn_img += "/train/no_mask/"
        simple_cnn_mask += "/train/mask/"
    else: 
        fcn_img += "/validation/images/images/"
        fcn_mask += "/validation/masks/masks/"
        simple_cnn_img += "/validation/no_mask/"
        simple_cnn_mask += "/validation/mask/"
        
    
    count_no_mask = 1
    count_mask = 1
    count_fcn = 1
    for raw_img, raw_mask in paired_images:
        img = io.imread(raw_img)
        mask = io.imread(raw_mask) / 255
        for i in range(6):
            for j in range(5):
                small_img = img[i*70:(i+1)*70, j*116:(j+1)*116]
                small_mask = mask[i*70:(i+1)*70, j*116:(j+1)*116]
                io.imsave(fcn_img + str(count_fcn) + ".jpg", small_img / 255)
                io.imsave(fcn_mask + str(count_fcn) + "_mask.jpg", small_mask)
                count_fcn += 1
                if np.sum(mask[i*70:(i+1)*70, j*116:(j+1)*116]) >= 400:
                    io.imsave(simple_cnn_mask + str(count_mask) + "_mask.jpg", small_img)
                    count_mask += 1
                else:
                    io.imsave(simple_cnn_img + str(count_no_mask) + ".jpg", small_img)
                    count_no_mask += 1
    print("Finished splitting and saving images and segmentations")

In [None]:
images_split(paired_images_subset[:480], full=False, train=True)
images_split(paired_images_subset[480:], full=False, train=False)

In [9]:
imgs_no_mask = [img for img in glob.glob("data_simple_cnn/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn/train/mask/*")]

print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))

images with no mask:  13982
images with mask:  10275
mask to no mask ratio 0.7348734086682878


In [None]:
# 5635
images_split(paired_images_full[:5000], full=True, train=True)

In [None]:
images_split(paired_images_full[5000:], full=True, train=False)

In [10]:
imgs_no_mask = [img for img in glob.glob("data_simple_cnn_full/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn_full/train/mask/*")]

print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))

images with no mask:  323227
images with mask:  350497
mask to no mask ratio 1.0843679519347085


In [11]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

def image_augmentation(img, save_dir, save_prefix):
    datagen = ImageDataGenerator(
            rotation_range=180,
            horizontal_flip=True,
            fill_mode='nearest')
    img = np.expand_dims(img, 0)
    img = np.expand_dims(img, -1)
    
    i = 0
    for batch in datagen.flow(img, batch_size=1,
                          save_to_dir=save_dir, save_prefix=save_prefix, save_format='jpg'):
        i += 1
        if i > 5:
            break

### augment train/mask

In [12]:
import random

for count in range((len(imgs_no_mask) - len(imgs_mask))//5):
    n = len(imgs_mask)
    i = random.randint(1, n-1)
    small_img = io.imread(imgs_mask[i])
    image_augmentation(small_img, "data_simple_cnn_full/train/mask/", "aug_"+str(count))

#### check image numbers

In [12]:
imgs_no_mask = [img for img in glob.glob("data_simple_cnn_full/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn_full/train/mask/*")]

print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))

images with no mask:  323227
images with mask:  350497
mask to no mask ratio 1.0843679519347085


### augment both train/mask and train/no_mask

In [14]:
for count in range(150000//5):
    i = random.randint(1, len(imgs_mask)-1)
    small_img = io.imread(imgs_mask[i])
    image_augmentation(small_img, "data_simple_cnn_full/train/mask/", "more_aug_"+str(count))
    j = random.randint(1, len(imgs_no_mask)-1)
    small_img = io.imread(imgs_no_mask[j])
    image_augmentation(small_img, "data_simple_cnn_full/train/no_mask/", "more_aug_"+str(count))

#### check image numbers

In [13]:
imgs_no_mask = [img for img in glob.glob("data_simple_cnn_full/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn_full/train/mask/*")]

print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))

images with no mask:  323227
images with mask:  350497
mask to no mask ratio 1.0843679519347085


### check validation image numbers

In [14]:
imgs_no_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/no_mask/*")]
imgs_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/mask/*")]

print("images with no mask: ", len(imgs_no_mask_val))
print("images with mask: ", len(imgs_mask_val))
print("mask to no mask ratio", float(len(imgs_mask_val))/len(imgs_no_mask_val))

images with no mask:  43955
images with mask:  47440
mask to no mask ratio 1.0792856330337846


### augment validation/mask

In [17]:
for count in range((len(imgs_no_mask_val) - len(imgs_mask_val))//5):
    n = len(imgs_mask_val)
    i = random.randint(1, n-1)
    small_img = io.imread(imgs_mask_val[i])
    image_augmentation(small_img, "data_simple_cnn_full/validation/mask/", "aug_"+str(count))

### check validation image numbers

In [15]:
imgs_no_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/no_mask/*")]
imgs_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/mask/*")]

print("images with no mask: ", len(imgs_no_mask_val))
print("images with mask: ", len(imgs_mask_val))
print("mask to no mask ratio", float(len(imgs_mask_val))/len(imgs_no_mask_val))

images with no mask:  43955
images with mask:  47440
mask to no mask ratio 1.0792856330337846


### augment both validation/mask and validation/no_mask

In [20]:
for count in range(15000//5):
    i = random.randint(1, len(imgs_mask_val)-1)
    small_img = io.imread(imgs_mask_val[i])
    image_augmentation(small_img, "data_simple_cnn_full/validation/mask/", "more_aug_"+str(count))
    j = random.randint(1, len(imgs_no_mask_val)-1)
    small_img = io.imread(imgs_no_mask_val[j])
    image_augmentation(small_img, "data_simple_cnn_full/validation/no_mask/", "more_aug_"+str(count))

#### check image numbers

In [16]:
imgs_no_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/no_mask/*")]
imgs_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/mask/*")]

print("images with no mask: ", len(imgs_no_mask_val))
print("images with mask: ", len(imgs_mask_val))
print("mask to no mask ratio", float(len(imgs_mask_val))/len(imgs_no_mask_val))

images with no mask:  43955
images with mask:  47440
mask to no mask ratio 1.0792856330337846


# Augment data_fcn and data_fcn_full for FCN model training

## augment images and corresponding masks if defected

In [19]:
def defected(mask):
    return np.sum(mask[:,:]) >= 400

train_masks = [ma for ma in glob.glob("data_fcn/train/masks/masks/*")]
defected_masks = [defected(io.imread(ma)) for ma in train_masks]
print("No. of train masks: ", len(train_masks))
print("No. of defected masks: ", sum(defected_masks))
print("defected ratio: ", float(sum(defected_masks))/len(train_masks))

data_fcn/train/masks/masks/9952_mask.jpg
No. of train masks:  14400
No. of defected masks:  528
defected ratio:  0.03666666666666667


In [None]:
def image_augmentation(img, save_dir, save_prefix, seed):
    datagen = ImageDataGenerator(rotation_range=180, horizontal_flip=True, fill_mode='nearest')
    img = np.expand_dims(img, 0)
    img = np.expand_dims(img, -1)
    i = 0
    for batch in datagen.flow(img, batch_size=1, save_to_dir=save_dir, 
                              save_prefix=save_prefix, save_format='jpg', seed=seed):
        i += 1
        if i > 5:
            break

In [23]:
def fimg_to_fmask(img_path, mask_folder):
    # convert an image file path into a corresponding mask file path 
    dirname, basename = os.path.split(img_path)
    maskname = basename.replace(".jpg", "_mask.jpg")
    return os.path.join(mask_folder, maskname)

img_folder = "data_fcn/train/images/images/"
mask_folder = "data_fcn/train/masks/masks/"

fcn_train_imgs = [img for img in glob.glob(img_folder+"*")]
fcn_train_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_train_imgs]

#mask=[] img=[]
#for batch in datagen.flow(x, batch_size=1, seed=1337): img.append(batch)
#for batch in datagen.flow(ymask, batch_size=1, seed=1337): mask.append(batch)

14400
