In [1]:
# library import
import cv2
import imutils
from imutils import paths
import numpy as np
import os

In [2]:
# making variables to hold path files for multiple folds of images and mask
img_paths_fold1= "../../data/panuke/Fold_1/images/fold1/images.npy"
type_paths_fold1= "../../data/panuke/Fold_1/images/fold1/types.npy"
mask_paths_fold1= "../../data/panuke/Fold_1/masks/fold1/masks.npy"
type_paths_fold2= "../../data/panuke/Fold_2/images/fold2/types.npy"
img_paths_fold2= "../../data/panuke/Fold_2/images/fold2/images.npy"
mask_paths_fold2= "../../data/panuke/Fold_2/masks/fold2/masks.npy"
img_paths_fold3= "../../data/panuke/Fold_3/images/fold3/images.npy"
mask_paths_fold3= "../../data/panuke/Fold_3/masks/fold3/masks.npy"
type_paths_fold3= "../../data/panuke/Fold_3/images/fold3/types.npy"

img_dest_path= "./datset/images/"
mask_dest_path= "./datset/masks/"

In [3]:
# an iteratable list to work with
folds= [(img_paths_fold1, mask_paths_fold1, type_paths_fold1), 
        (img_paths_fold2, mask_paths_fold2, type_paths_fold2),
       (img_paths_fold3, mask_paths_fold3, type_paths_fold3)]

In [4]:
count=0
img_prefix= "img_000"
mask_prefix= "mask_000"
for fold in folds:
    # load the path files
    img_path= fold[0]
    mask_path= fold[1]
    type_path= fold[2]
    
    # load the raw encoded data along with type of tissue
    images= np.load(img_path)
    masks= np.load(mask_path)
    types= np.load(type_path)
    
    for i in range(len(images)):
        # change encoding to incorporate certain CV algos
        img= images[i].astype('uint8')
        mask= masks[i].astype('uint8')
        type_t= types[i]
        
        # index into only the neoplastic masks
        mask_neo= mask[:,:,0]
        #mask_neo[mask_neo>0]=255
        
        # check if even a single instance of neoplastic cell exists
        instance_count= np.unique(mask_neo)
        if len(instance_count)>1:
            count+=1
            
            # technique to save the masks and images in order
            if count>=10 and count<100:
                img_prefix= "img_00"
                mask_prefix= "mask_00"
                
            elif count>=100 and count<1000:
                img_prefix= "img_0"
                mask_prefix= "mask_0"
                
            elif count>=1000 and count<10000:
                img_prefix= "img_"
                mask_prefix= "mask_"
                
            img_save_path= os.path.sep.join([img_dest_path, 
                                             img_prefix+str(count)+"_"+str(type_t)+".png"])
            mask_save_path= os.path.sep.join([mask_dest_path, 
                                              mask_prefix+str(count)+".png"])
            
            # write png files to disk and compress them
            cv2.imwrite(img_save_path, img)
            cv2.imwrite(mask_save_path, mask_neo)

### Post-training validation data and tissue type setup

In [5]:
# Read all the validation data paths
with open("datset/val_img_paths.txt", "r") as f:
    arr= f.readlines()
    f.close()

In [6]:
# clean the paths and remove any extra characters such as white-spaces
val_images= [a.splitlines()[0] for a in arr]

In [12]:
# extract the image numbers and store as indexes
val_images_numbers= [i.split('\\')[-1].split('_')[-1] for i in val_images]

In [11]:
# load in the images with types
images_with_types=list(paths.list_images(img_dest_path))

In [20]:
# load image with their type indexed as numbers
images_with_types_numbers= [i.split('/')[-1].split('_')[1]+".png" for i in images_with_types]

In [28]:
# search which validation data is present in both the lists with type and index
# append it into a new type which stores indices
type_idx=[]
for val_n in val_images_numbers:
    if val_n in images_with_types_numbers:
        type_idx.append(images_with_types_numbers.index(val_n))

In [31]:
# write corresponding type to file similar to the image
with open('val_img_types.txt', 'w') as f:
    for i in type_idx:
        image_path= images_with_types[i]
        img_type= image_path.split('/')[-1].split('_')[-1].split('.')[0]
        f.write("%s\n"% img_type)
    f.close()    