In [1]:
# run: find . -name '*.DS_Store' -type f -delete
import os
import random as rnd
import numpy as np
import matplotlib.pyplot as plt

import matplotlib
matplotlib.rcParams['figure.figsize'] = (20,20)

In [2]:
divideData = 0.2

def ihcFormatter(imgIn, mask):
    ihcImgMasked = imgIn*mask[:,:,None]
    return ihcImgMasked

def subsamplerHE(path, imgIn, imgLabel, imgNum, dim, step):
    for i in range(0, len(imgIn) - dim - 1, step):
        for j in range(0, len(imgIn[i]) - dim - 1, step):
            subImg = imgIn[i:i + dim, j:j + dim]
            subImgLabel = imgLabel[i:i+dim,j:j+dim]
            threshold = rnd.random()
            if threshold > 1.-divideData:
                plt.imsave(path + "validation/" + "HE_i" + str(imgNum) + ".png", layerImages(subImg, subImgLabel))
                imgNum[3] += 1
            elif threshold > 1. - divideData*2:
                plt.imsave(path + "test/" +"HE_i" + str(imgNum) + ".png", layerImages(subImg, subImgLabel))
                imgNum[2] += 1
            else:
                plt.imsave(path+ "train/" + "HE_i" + str(imgNum) + ".png", layerImages(subImg, subImgLabel))
                imgNum[1] += 1
            imgNum[0] += 1
    return imgNum

def subsamplerIHC(path,imgIn, imgLabel, imgNum, dim, step):
    for i in range(0, len(imgIn)-dim-1,step):
        for j in range(0, len(imgIn[i])-dim-1, step):
            subImg = imgIn[i:i+dim,j:j+dim]
            blackPix = np.count_nonzero(np.all(subImg == [0,0,0], axis=2))
            if blackPix == 0:
                subImgLabel = imgLabel[i:i+dim,j:j+dim]
                threshold = rnd.random()
                if threshold > 1.-divideData:
                    plt.imsave(path + "validation/" + "IHC_Ki67_i" + str(imgNum) + ".png", layerImages(subImg, subImgLabel))
                    imgNum[3] += 1
                elif threshold > 1. - divideData*2:
                    plt.imsave(path + "test/" +"IHC_Ki67_i" + str(imgNum) + ".png", layerImages(subImg, subImgLabel))
                    imgNum[2] += 1
                else:
                    plt.imsave(path+ "train/" + "IHC_Ki67_i" + str(imgNum) + ".png", layerImages(subImg, subImgLabel))
                    imgNum[1] += 1
                imgNum[0] += 1
    return imgNum

def layerImages(img1, img2):
    stackedImg = np.asarray(np.dstack((img1, img2)))
    return stackedImg
#     nImg2 = img2 * 250
#     newImg2 = np.asarray(np.dstack((nImg2, nImg2, nImg2)), dtype=np.uint8)
#     return np.hstack((img1, newImg2))
    


def augmenter(array):
    plt.imsave('pic.jpg', imgIn)
    
def HEcrawler(path, targetPath, dim, step):
    imageCounter = [0,0,0,0]
    for subdir, dirs, files in os.walk(path):
        if len(files) != 2:
            print("This folder " + subdir + " does not contain only 2 images", files)
            continue
        image = plt.imread(subdir+"/"+files[0])
        mask = plt.imread(subdir+"/"+files[1])
        imageCounter = subsamplerHE(targetPath, image, mask, imageCounter, dim, step)
        print("Now there are " + str(imageCounter[0]) + " HE images")
    print("Train:      " + str(imageCounter[1]))
    print("Test:       " + str(imageCounter[2]))
    print("validation: " + str(imageCounter[3]))
    print()
        
def IHCcrawler(path, targetPath, dim, step):
    imageCounter = [0,0,0,0]
    for subdir, dirs, files in os.walk(path):
        imgAmount = int(len(files)/3)
        if imgAmount <= 0:
            continue
        for i in range(imgAmount):
            image = plt.imread(subdir+"/"+files[i+imgAmount])
            segmentedAreaMask = plt.imread(subdir+"/"+files[i+imgAmount*2])
            mask = plt.imread(subdir+"/"+files[i])
            imageSegmented = ihcFormatter(image, segmentedAreaMask)
            imageCounter = subsamplerIHC(targetPath, imageSegmented, mask, imageCounter, dim, step)
        
        print("Now there are " + str(imageCounter[0]) + " IHC_Ki69 images")
    print("Train:      " + str(imageCounter[1]))
    print("Test:       " + str(imageCounter[2]))
    print("validation: " + str(imageCounter[3]))
    print()
    
def crawler(path, targetPath, dim, step ):
    HEcrawler(path + "HE_merged/", targetPath + "HE/", dim, step)
    IHCcrawler(path + "IHC_Ki67/", targetPath + "IHC_Ki67/", dim, step)


In [None]:
dim = 128
crawler(path = "./data/", targetPath = "./processedData/", dim=dim, step=dim)

This folder ./data/HE_merged/ does not contain only 2 images []
Now there are 49 HE images
Now there are 98 HE images
Now there are 147 HE images
Now there are 196 HE images
Now there are 245 HE images
Now there are 294 HE images
Now there are 343 HE images
Now there are 392 HE images
Now there are 441 HE images
Now there are 490 HE images
Now there are 539 HE images
Now there are 588 HE images
Now there are 637 HE images
Now there are 686 HE images
Now there are 735 HE images
Now there are 784 HE images
Now there are 833 HE images
Now there are 882 HE images
Now there are 931 HE images
Now there are 980 HE images
Now there are 1029 HE images
Now there are 1078 HE images
Now there are 1127 HE images
Now there are 1176 HE images
Now there are 1225 HE images
Now there are 1274 HE images
Now there are 1323 HE images


In [None]:
# loadImg = plt.imread("processedData/IHC_Ki67/IHC_Ki67_i700.png")
# # print(np.unique(loadImg))
# realImg = (loadImg[:,:,:3]*255).astype(int)
# maskImg = loadImg[:,:,3]
# loadImg1 = plt.imread("processedData/HE/HE_i1000.png")
# # print(np.unique(loadImg))
# realImg1 = (loadImg1[:,:,:3]*255).astype(int)
# maskImg1 = loadImg1[:,:,3]
# # print(np.unique(maskImg*255))
# # print(np.unique(im_tissue-(realImg)))
# fig, ax = plt.subplots(2,2)
# ax[0,0].imshow(realImg)
# ax[0,1].imshow(maskImg)
# ax[1,0].imshow(realImg1)
# ax[1,1].imshow(maskImg1)

In [None]:
# result = np.count_nonzero(np.all(im_tissueHE==[0,0,0],axis=2))
# print(result)