# Images PreProcessing

In [None]:
from skimage import io
from skimage import exposure

import gzip
import os
import gc
import pprint as pp
import numpy as np
import pickle
import matplotlib.pyplot as plt

PATH_TO_TRAIN = "../data/input/data_stage1_train.zip"
PATH_TO_INTERMEDIATE = "../data/intermediate"
PATH_TO_GENERATED_SLICES = "../data/intermediate/generated_slices.pickle"

kernel_size = 16
half_kernel = int(kernel_size/2)


In [None]:
images_to_process = []

for current_directory in os.listdir(PATH_TO_TRAIN):
    current_image = {}
    current_image["id"] = current_directory
    current_image["images"] = []
    current_image["masks"] = []
    for current_image_file in os.listdir(os.path.join(PATH_TO_TRAIN,current_directory,"images")):
        current_image["images"].append(os.path.join(PATH_TO_TRAIN,current_directory,"images", current_image_file))
    
    for current_image_mask in os.listdir(os.path.join(PATH_TO_TRAIN,current_directory,"masks")):
        current_image["masks"].append(os.path.join(PATH_TO_TRAIN,current_directory,"masks", current_image_mask))
   
    images_to_process.append(current_image)


In [None]:
masked_img = None
original_img = None
generate_images = False

def is_part_of_nuclei(current_x, current_y, masked_img):
    return 1 if masked_img[current_x + half_kernel,current_y + half_kernel] > 0 else 0

for image_to_process in images_to_process:
    
    generated_slices = []
    first = True 
    original_img = io.imread(image_to_process['images'][0], as_grey=True) 
    original_img = exposure.adjust_gamma(original_img)
    original_shape = original_img.shape
    original_img = np.pad(original_img,((half_kernel,half_kernel), (half_kernel,half_kernel)),'constant')
    for mask in image_to_process['masks']:
        cur_img = io.imread(mask, as_grey=True)
        if first:
            masked_img = cur_img
        else:
            masked_img = np.bitwise_or(masked_img,cur_img)
        first = False 
    masked_img = np.pad(masked_img,((half_kernel,half_kernel), (half_kernel,half_kernel)),'constant')
    padded_image_path = os.path.join(PATH_TO_INTERMEDIATE,image_to_process['id'] + '.png')
    padded_mask_path = os.path.join(PATH_TO_INTERMEDIATE,image_to_process['id'] + '.mask.png')
    io.imsave(padded_image_path,original_img)
    io.imsave(padded_mask_path,masked_img)
    
    for current_y in range(original_img.shape[1] - kernel_size):
        for current_x in range(original_img.shape[0]  - kernel_size):
            current_x_hex = hex(current_x).replace('0x','').zfill(3) 
            current_y_hex = hex(current_y).replace('0x','').zfill(3) 
            is_nuclei = is_part_of_nuclei(current_x, current_y, masked_img)
            
            current_slice = original_img[current_x : current_x + kernel_size, current_y : current_y + kernel_size]
            current_slice_path = os.path.join(PATH_TO_INTERMEDIATE,image_to_process['id']) + '.' 
            current_slice_path += current_x_hex + '.'
            current_slice_path += current_y_hex + '.'
            current_slice_path += str(is_nuclei) + ''
            current_slice_path += '.png'
            
            if generate_images:
                io.imsave(current_slice_path,current_slice)
                
            generated_slices.append( 
                {
                    "current_x" : current_x,
                    "current_y" : current_y,
                    "current_x_hex" : current_x_hex, 
                    "current_y_hex" : current_y_hex, 
                    "png_path" : current_slice_path,
                    "sum" : current_slice.sum(),
                    "is_nuclei" : is_nuclei,
                    "slice" : current_slice
                } )

            """
            Augmenting the images...
            """
            if is_nuclei > 0:
                generated_slices.append( 
                    {
                        "current_x" : current_x,
                        "current_y" : current_y,
                        "current_x_hex" : current_x_hex, 
                        "current_y_hex" : current_y_hex, 
                        "png_path" : current_slice_path,
                        "sum" : current_slice.sum(),
                        "is_nuclei" : is_nuclei,
                        "slice" : np.flipud(current_slice)
                    } )
                generated_slices.append( 
                    {
                        "current_x" : current_x,
                        "current_y" : current_y,
                        "current_x_hex" : current_x_hex, 
                        "current_y_hex" : current_y_hex, 
                        "png_path" : current_slice_path,
                        "sum" : current_slice.sum(),
                        "is_nuclei" : is_nuclei,
                        "slice" : np.fliplr(current_slice)
                    } )
                generated_slices.append( 
                    {
                        "current_x" : current_x,
                        "current_y" : current_y,
                        "current_x_hex" : current_x_hex, 
                        "current_y_hex" : current_y_hex, 
                        "png_path" : current_slice_path,
                        "sum" : current_slice.sum(),
                        "is_nuclei" : is_nuclei,
                        "slice" : np.fliplr(np.flipud(current_slice))
                    } )
                
    image_to_pickle = {
        'shape' : original_img.shape,
        'original_shape' : original_shape,
        'slices' : generated_slices,
        'id' : image_to_process['id'],
        'images' : image_to_process['images'],
        'masks' : image_to_process['masks']
    }
    with gzip.open(os.path.join(PATH_TO_INTERMEDIATE,image_to_process['id']) + '.pickle' , 'wb') as handle:
        pickle.dump(image_to_pickle, handle, protocol=pickle.HIGHEST_PROTOCOL)    
    del image_to_pickle
    gc.collect()
   


In [None]:
print([x['png_path'].replace(PATH_TO_INTERMEDIATE + "/","") for x in sorted(images_to_process[0]['slices'], key=lambda x : x['sum'], reverse = True) if x['is_nuclei'] == 1][10:11])

In [None]:
print(masked_img.shape)
print(original_img.shape)

In [None]:
print(696 /16 , 520 / 16)