In [41]:
import os
from PIL import Image
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
%matplotlib inline
import sys
from tqdm import tqdm
from skimage.io import imread, imshow
from skimage.transform import resize

In [42]:
train_images_folder = 'C:/CompetitionData/train_images'
train_masks_folder = 'C:/CompetitionData/train_masks'
new_train_images_folder = 'C:/newData/new_train_images'
new_train_masks_folder = 'C:/newData/new_train_masks'
train_path = 'C:/CompetitionData/stage1_train/'
train_ids = next(os.walk(train_path))[1]
#image shape to resize to
IMG_HEIGHT = 256
IMG_WIDTH = 256
IMG_CHANNELS = 1

In [43]:
def rgbToGray(img):
    grayImg = 0.0722*img[:,:,0] + 0.7152*img[:,:,1] + 0.2126*img[:,:,2]
    return np.expand_dims(grayImg, axis=2)

In [None]:
def resizer(img):
    desired_size = resize(img, (IMG_HEIGHT, IMG_WIDTH,1), mode='constant', preserve_range=True)
    return desired_size

In [45]:
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.bool)

print('Getting and resizing train images and masks ... ')
sys.stdout.flush()

for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    fn_ = ''.join([train_path,id_,'/images/',id_,'.png'])
    
    img = imread(fn_)[:,:,:3]
    orig_file_height, orig_file_width = img.shape[0:2]
    
    #grey scale conversion
    img = rgbToGray(img)
    
    #pad if necessary
    #if (img.shape[0] <= IMG_HEIGHT) and (img.shape[1] <= IMG_WIDTH):
    #    img = paddingImg(img)
            
    #resize if necessary
    #else:
    if (orig_file_height != IMG_HEIGHT) or (orig_file_width != IMG_WIDTH):
        img = resizer(img)
 
    #Convert to float before exposure shift
    #img = np.array(img, dtype=np.float)
    
    #imgadapt = img_as_float(img)
    
    
    #change exposure to adaptive histogram
    #imgadapt = exposure.equalize_adapthist(img, clip_limit=0.03)
    
    #convert it back to uint
    #img = np.array(imgadapt, dtype=np.uint8)
    

    X_train[n] = img
    
    #consolidate masks

        
    mask_X = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    for mask_file in next(os.walk(''.join([train_path,id_,'/masks/'])))[2]:
        mask_ = imread(''.join([train_path,id_,'/masks/',mask_file]))
        
        #if (orig_file_height <= IMG_HEIGHT) and (orig_file_width <= IMG_WIDTH):
            #print('padding')
         #   mask_ = paddingMask(mask_)
        #else:
            #print('sizer')
        if (orig_file_height != IMG_HEIGHT) or (orig_file_width != IMG_WIDTH):
            
            mask_ = maskResizer(mask_)
        else:
            mask_ = np.expand_dims(mask_, axis=2)
            #print(mask_.shape)     
        mask_X = np.maximum(mask_X, mask_)
    
    Y_train[n] = mask_X


print("Done.")

Getting and resizing train images and masks ... 



  0%|                                                                                          | 0/670 [00:00<?, ?it/s]
  0%|▎                                                                                 | 3/670 [00:00<01:17,  8.60it/s]
  1%|▍                                                                                 | 4/670 [00:00<02:00,  5.53it/s]
  1%|▉                                                                                 | 8/670 [00:00<01:18,  8.47it/s]
  1%|█▏                                                                               | 10/670 [00:01<01:10,  9.33it/s]
  2%|█▎                                                                               | 11/670 [00:01<01:10,  9.35it/s]
  2%|█▍                                                                               | 12/670 [00:01<01:34,  6.96it/s]
  2%|█▊                                                                               | 15/670 [00:01<01:23,  7.86it/s]
  3%|██▏                               

Done.


In [72]:
# we create two instances with the same arguments
data_gen_args = dict(featurewise_center=True,
                     featurewise_std_normalization=True,
                     rotation_range=90.,
                     horizontal_flip=True,
                     vertical_flip=True,
                     width_shift_range=0.2,
                     height_shift_range=0.2,
                     zoom_range=0.2)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

# Provide the same seed and keyword arguments to the fit and flow methods
seed = 1
image_datagen.fit(X_train, augment=True)
mask_datagen.fit(Y_train, augment=True)

i = 0    
for batch in image_datagen.flow(
    X_train,
    seed=seed,
    save_to_dir=new_train_images_folder,
    batch_size=32):
         i += 1
         if i > 200:
            break
        
n = 0

for batch in mask_datagen.flow(
    Y_train,
    seed=seed,
    save_to_dir=new_train_masks_folder,
    batch_size=32):
        n +=1
        if n > 200:
            break



In [73]:
train_ids_new = os.listdir('C:/newData/new_train_images/')

# Get and resize train images and masks
X_train_new = np.zeros((len(train_ids_new), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train_new = np.zeros((len(train_ids_new), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)

print('Getting and resizing train images and masks ... ')
sys.stdout.flush()

for n, id_ in tqdm(enumerate(train_ids_new), total=len(train_ids_new)):

    fn_ = ''.join(['C:/newData/new_train_images/',id_])

    img = imread(fn_)[:,:,:IMG_CHANNELS]
    orig_file_height, orig_file_width = img.shape[0:2]


    X_train_new[n] = img
    
#Get masks
for z, id_ in tqdm(enumerate(train_ids_new), total=len(train_ids_new)):
    mask_ = ''.join(['C:/newData/new_train_masks/',id_])
    #mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    mask = imread(mask_)
    mask = np.expand_dims(mask, axis=-1)
    
    Y_train_new[z] = mask    

print("Done.")

Getting and resizing train images and masks ... 


100%|█████████████████████████████████████████████████████████████████████████████| 6409/6409 [00:10<00:00, 602.18it/s]
100%|████████████████████████████████████████████████████████████████████████████| 6409/6409 [00:03<00:00, 1874.54it/s]


Done.


In [1]:
import pickle

#output pickle locations for X_train and Y_train
output_train_pickle = 'C:/CompetitionData/X_train.p'
output_masks_pickle = 'C:/CompetitionData/Y_train.p'

#save the pickle files
pickle.dump(X_train_new, open(output_train_pickle, "wb"))
pickle.dump(Y_train_new, open(output_masks_pickle, "wb"))
print("Pickle files generated.")

NameError: name 'X_train_new' is not defined