In [127]:
import pandas as pd
import numpy as np
import os
import sys
from PIL import Image, ImageFilter
from skimage.io import imread, imshow
from skimage.transform import resize
from skimage.color import rgb2gray
from skimage import data, img_as_float
from skimage import exposure
from tqdm import tqdm
import warnings
import pickle

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

In [128]:
#image shape to resize to
IMG_HEIGHT = 256
IMG_WIDTH = 256
IMG_CHANNELS = 1

#location of input data
train_path = 'C:/CompetitionData/stage1_train/'
train_ids = next(os.walk(train_path))[1]

#location of output data
#output_image_folder = 'C:/CompetitionData/train_images/'
#output_image_masks = 'C:/CompetitionData/train_masks/'

#output pickle locations for X_train and Y_train
output_train_pickle = 'C:/CompetitionData/X_train.p'
output_masks_pickle = 'C:/CompetitionData/Y_train.p'

In [129]:
def rgbToGray(img):
    grayImg = 0.0722*img[:,:,0] + 0.7152*img[:,:,1] + 0.2126*img[:,:,2]
    #print(grayImg.shape)   
    return np.expand_dims(grayImg, axis=2)

In [130]:
def paddingImg(img):
    #Begin padding routine
    #Begin padding routine
    desired_size = np.zeros((IMG_HEIGHT,IMG_WIDTH,1),dtype=np.uint8)

    desired_size[:img.shape[0],:img.shape[1],:1] = img
    return desired_size

In [131]:
def resizer(img):
    desired_size = resize(img, (IMG_HEIGHT, IMG_WIDTH, 1), mode='constant', preserve_range=True)
    return desired_size

In [132]:
def paddingMask(mask):
    desired_mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    desired_mask[:mask.shape[0],:mask.shape[1], 0] = mask
    return desired_mask

In [133]:
def maskResizer(mask):
    #desired_mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    desired_mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    desired_mask = np.expand_dims(desired_mask, axis=2)
    #print('desired mask: ', desired_mask.shape)
    return desired_mask

In [134]:
def padAndResize(img):
    # First create a shape big enough to hold the odd shaped image
    desired_img = np.zeros((MAX_HEIGHT_TOLERANCE,MAX_WIDTH_TOLERANCe,IMG_CHANNELS),dtype=np.uint8)
    # Put that image into the array
    desired_img[:img.shape[0],:img.shape[1],:3] = img
    # Now resize to minimize the stretching
    desired_img = resize(desired_img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    return desired_img

In [140]:
# Get and resize train images and masks
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.bool)

print('Getting and resizing train images and masks ... ')
sys.stdout.flush()

for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    fn_ = ''.join([train_path,id_,'/images/',id_,'.png'])
    
    img = imread(fn_)[:,:,:3]
    orig_file_height, orig_file_width = img.shape[0:2]
    
    #grey scale conversion
    img = rgbToGray(img)
    
    #pad if necessary
    #if (img.shape[0] <= IMG_HEIGHT) and (img.shape[1] <= IMG_WIDTH):
    #    img = paddingImg(img)
            
    #resize if necessary
    #else:
    if (orig_file_height != IMG_HEIGHT) or (orig_file_width != IMG_WIDTH):
        img = resizer(img)
 
    #Convert to float before exposure shift
    #img = np.array(img, dtype=np.float)
    
    #imgadapt = img_as_float(img)
    
    
    #change exposure to adaptive histogram
    #imgadapt = exposure.equalize_adapthist(img, clip_limit=0.03)
    
    #convert it back to uint
    #img = np.array(imgadapt, dtype=np.uint8)
    

    X_train[n] = img
    
    #consolidate masks

        
    mask_X = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    for mask_file in next(os.walk(''.join([train_path,id_,'/masks/'])))[2]:
        mask_ = imread(''.join([train_path,id_,'/masks/',mask_file]))
        
        #if (orig_file_height <= IMG_HEIGHT) and (orig_file_width <= IMG_WIDTH):
            #print('padding')
         #   mask_ = paddingMask(mask_)
        #else:
            #print('sizer')
        if (orig_file_height != IMG_HEIGHT) or (orig_file_width != IMG_WIDTH):
            
            mask_ = maskResizer(mask_)
        else:
            mask_ = np.expand_dims(mask_, axis=2)
            #print(mask_.shape)     
        mask_X = np.maximum(mask_X, mask_)
    
    Y_train[n] = mask_X


print("Done.")

Getting and resizing train images and masks ... 


100%|████████████████████████████████████████████████████████████████████████████████| 670/670 [02:18<00:00,  4.84it/s]


Done.


In [141]:
#save the pickle files
pickle.dump(X_train, open(output_train_pickle, "wb"))
pickle.dump(Y_train, open(output_masks_pickle, "wb"))
print("Pickle files generated.")

Pickle files generated.
