In [None]:
#==================================================================
#Program: blockExtraction
#Version: 1.0
#Author: David Helminiak
#Date Created: September 7, 2024
#Date Last Modified: September 7, 2024
#Changelog: 1.0 - Assembly and cropping - September 2024
#Description: Extract .tif block images for samples from the associated WSI .jpg files using empirical thresholds
#Operation: Move back into main program directory before running.
#Status: Verified that the settings provided are not those used to extract blocks from WSI for original work.
#The first few results look correct, but the threshold values are off...
#See blockReExtraction.ipynb
#==================================================================

#Have the notebook fill more of the display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>.output_result { max-width:80% !important; }</style>"))

#RNG seed value to ensure run-to-run consistency (-1 to disable)
manualSeedValue = 0

#Debugging mode
debugMode = False

#TQDM progress bar visualization flag
asciiFlag = False

#Load external libraries
exec(open("./CODE/EXTERNAL.py", encoding='utf-8').read())

#When splitting WSI images, what size should the resulting blocks be (default: 400)
#Should remain consistent with block sizes given for training
blockSize = 400

#Specify original empirical threshold values
thresholdRedChannel = 5
thresholdPercentage = 0.8

#Compute block area for percent coverage computation
blockArea = blockSize*blockSize

#If visual confirmation that new block images match with a previous block set
visualConfirmation = False


In [None]:
#Store directory references
dir_data = '.' + os.path.sep + 'DATA' + os.path.sep
dir_blocks_data = dir_data + 'BLOCKS' + os.path.sep
dir_blocks_inputBlocks = dir_blocks_data + 'INPUT_BLOCKS' + os.path.sep
dir_blocks_inputWSI = dir_blocks_data + 'INPUT_WSI' + os.path.sep
dir_blocks_outputBlocks = dir_blocks_data + 'OUTPUT_BLOCKS' + os.path.sep
file_blocks_labels = dir_blocks_inputBlocks + 'Patch_list.xlsx'

#Create storage locations for new block files
if os.path.exists(dir_blocks_outputBlocks): shutil.rmtree(dir_blocks_outputBlocks)
os.makedirs(dir_blocks_outputBlocks)

#Get list of all the previous blocks that are intended for comparison with new extractions
if visualConfirmation:
    metadata = pd.read_excel(file_blocks_labels, header=None, names=['name', 'label'], converters={'name':str,'label':str})
    blockNamesAll_blocks, blockLabelsAll_blocks = np.array(metadata['name']), np.array(metadata['label'])
    blockSampleNamesAll_blocks = np.array([re.split('PS|_', blockName)[1] for blockName in blockNamesAll_blocks])
    blockFilenamesAll_blocks = [dir_blocks_inputBlocks + 'S' + blockSampleNamesAll_blocks[blockIndex] + os.path.sep + blockNamesAll_blocks[blockIndex] + '.tif' for blockIndex in range(0, len(blockNamesAll_blocks))]

#Get WSI .jpg filenames
filenamesWSI = natsort.natsorted(glob.glob(dir_blocks_inputWSI + '*.jpg'))
sampleNames = [os.path.basename(filename).split('.jpg')[0] for filename in filenamesWSI]
#sampleNames = ['11', '16', '17', '19', '2', '20', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '33', '34', '35', '37', '4', '44', '46', '47', '5', '50', '51', '54', '57', '58', '60', '61', '62', '64', '65', '67', '68', '7', '70', '9']

#Create subfolders for the block images derived from each sample
sampleFolders = [dir_blocks_outputBlocks + sampleName + os.path.sep for sampleName in sampleNames]
for sampleFolder in sampleFolders: os.makedirs(sampleFolder)

#Extract blocks for each sample
for sampleIndex, sampleName in tqdm(enumerate(sampleNames), total=len(sampleNames), desc='Samples', leave=True, ascii=asciiFlag):
    
    #Get filenames and indices for blocks that are intended for visual comparison
    if visualConfirmation: 
        blockFilenames = np.asarray(blockFilenamesAll_blocks)[np.where(blockSampleNamesAll_blocks == sampleName)[0]]
        blockIndices = np.asarray([int(os.path.basename(blockFilename).split('.tif')[0].split('_')[-1]) for blockFilename in blockFilenames])
    
    #Load the sample WSI
    imageWSI = cv2.cvtColor(cv2.imread(filenamesWSI[sampleIndex], cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
    
    #Compute number of blocks
    numRows, numColumns = int(np.floor(imageWSI.shape[0]/blockSize)), int(np.floor(imageWSI.shape[1]/blockSize))
    
    #Extract blocks where their red channel holds a specified percentage of values above the specified background threshold
    error = False
    blockIndex = 0
    for colNum in range(0, numColumns):
        startCol, endCol = colNum*blockSize, (colNum+1)*blockSize
        for rowNum in range(0, numRows): 
            startRow, endRow = rowNum*blockSize, (rowNum+1)*blockSize
            blockImage = imageWSI[startRow: endRow, startCol:endCol]
            areaPercentageR = np.sum(blockImage[:,:,0]>thresholdRedChannel)/blockArea
            if areaPercentageR > thresholdPercentage:
                blockIndex += 1
                filenameOutput = sampleFolders[sampleIndex] + 'PS' + sampleName + '_' + str(blockIndex) + '_' + str(rowNum) + '_' + str(colNum) + '.tif'
                writeSuccess = cv2.imwrite(filenameOutput, cv2.cvtColor(blockImage, cv2.COLOR_RGB2BGR), params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
                if visualConfirmation:
                    filenameCompare = blockFilenames[np.where(blockIndices==blockIndex)[0][0]]
                    compareBlockImage = cv2.cvtColor(cv2.imread(filenameCompare, cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
                    if np.sum(np.mean(np.abs(blockImage-compareBlockImage), -1)) > 0:
                        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
                        ax[0].set_title('Original')
                        ax[0].imshow(compareBlockImage)
                        ax[1].set_title('New')
                        ax[1].imshow(blockImage)
                        fig.suptitle('Sample ' + sampleName)
                        plt.tight_layout()
                        plt.show()
                        plt.close()
                        