In [None]:
#==================================================================
#Program: blockIndexing
#Version: 1.0
#Author: David Helminiak
#Date Created: September 7, 2024
#Date Last Modified: September 4, 2024
#Changelog: 1.0 - Assembly and cropping - September 2024
#Description: Assign row and column numbers to block images extracted from reference WSI. 
#Operation: Move back into main program directory before running.
#Status: Deprecated - WSI block extraction code (MATLAB-based) was received.
#This code was partially readopted in blockReExtraction.ipynb, when provided block extraction code,
#that this code was discontinued in favor of, failed to replicate the original block extraction
#==================================================================

#Have the notebook fill more of the display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>.output_result { max-width:80% !important; }</style>"))

#RNG seed value to ensure run-to-run consistency (-1 to disable)
manualSeedValue = 0

#Debugging mode
debugMode = False

#TQDM progress bar visualization flag
asciiFlag = False

#Load external libraries
exec(open("./CODE/EXTERNAL.py", encoding='utf-8').read())

#When splitting WSI images, what size should the resulting blocks be (default: 400)
#Should remain consistent with block sizes given for training
blockSize = 400


In [None]:
#Store directory references
dir_data = '.' + os.path.sep + 'DATA' + os.path.sep
dir_blocks_data = dir_data + 'BLOCKS' + os.path.sep
dir_blocks_inputBlocks = dir_blocks_data + 'INPUT_BLOCKS' + os.path.sep
dir_blocks_inputWSI = dir_blocks_data + 'INPUT_WSI_NEW' + os.path.sep
dir_blocks_outputBlocksOLD = dir_blocks_data + 'OUTPUT_BLOCKS_OLD' + os.path.sep
dir_blocks_outputBlocksNEW = dir_blocks_data + 'OUTPUT_BLOCKS_NEW' + os.path.sep
dir_blocks_outputBlocksDIF = dir_blocks_data + 'OUTPUT_BLOCKS_DIF'+ os.path.sep
dir_blocks_outputWSIOLD = dir_blocks_data + 'OUTPUT_WSI_OLD' + os.path.sep
dir_blocks_outputWSINEW = dir_blocks_data + 'OUTPUT_WSI_NEW' + os.path.sep
file_blocks_labels = dir_blocks_inputBlocks + 'Patch_list.xlsx'

#Create storage locations for renamed original, newly extracted, and difference block files
if os.path.exists(dir_blocks_outputBlocksOLD): shutil.rmtree(dir_blocks_outputBlocksOLD)
os.makedirs(dir_blocks_outputBlocksOLD)
if os.path.exists(dir_blocks_outputBlocksNEW): shutil.rmtree(dir_blocks_outputBlocksNEW)
os.makedirs(dir_blocks_outputBlocksNEW)
if os.path.exists(dir_blocks_outputBlocksDIF): shutil.rmtree(dir_blocks_outputBlocksDIF)
os.makedirs(dir_blocks_outputBlocksDIF)
if os.path.exists(dir_blocks_outputWSIOLD): shutil.rmtree(dir_blocks_outputWSIOLD)
os.makedirs(dir_blocks_outputWSIOLD)
if os.path.exists(dir_blocks_outputWSINEW): shutil.rmtree(dir_blocks_outputWSINEW)
os.makedirs(dir_blocks_outputWSINEW)

#Get list of all the available blocks
metadata = pd.read_excel(file_blocks_labels, header=None, names=['name', 'label'], converters={'name':str,'label':str})
blockNamesAll_blocks, blockLabelsAll_blocks = np.array(metadata['name']), np.array(metadata['label'])
blockSampleNamesAll_blocks = np.array([re.split('PS|_', blockName)[1] for blockName in blockNamesAll_blocks])
blockFilenamesAll_blocks = [dir_blocks_inputBlocks + 'S' + blockSampleNamesAll_blocks[blockIndex] + os.path.sep + blockNamesAll_blocks[blockIndex] + '.tif' for blockIndex in range(0, len(blockNamesAll_blocks))]


In [None]:
#Process each of the samples for which blocks exist
issueFiles = []
for sampleName in tqdm(np.unique(blockSampleNamesAll_blocks), desc='Samples', leave=True, ascii=asciiFlag):
    
    sampleName = '2'
    
    #Make sample specific subdirectories
    dir_OLD = dir_blocks_outputBlocksOLD + 'S' + sampleName + os.path.sep
    dir_NEW = dir_blocks_outputBlocksNEW + 'S' + sampleName + os.path.sep
    dir_DIF = dir_blocks_outputBlocksDIF + 'S' + sampleName + os.path.sep
    os.makedirs(dir_OLD)
    os.makedirs(dir_NEW)
    os.makedirs(dir_DIF)
    
    #Isolate blocks for the sample
    blockIndices = np.where(blockSampleNamesAll_blocks == sampleName)[0]
    blockFilenames = np.asarray(blockFilenamesAll_blocks)[blockIndices]
    
    #Load the sample WSI
    imageWSI = cv2.cvtColor(cv2.imread(dir_blocks_inputWSI + sampleName + '.jpg', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
    
    #Find the location of one of the blocks (assuming WSI was not resized before blocks were extracted)
    blockImage = cv2.cvtColor(cv2.imread(blockFilenames[0], cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
    heat_map = cv2.matchTemplate(imageWSI, blockImage, cv2.TM_CCOEFF_NORMED)
    startRow, startColumn = np.unravel_index(np.argmax(heat_map), heat_map.shape)

    #Assuming that the blocks are non-overlapping and match blockSize expectations, extrapolate crop parameters applied to the WSI before splitting
    cropLeft, cropRight, cropTop, cropBottom = startColumn, startColumn, startRow, startRow
    while cropLeft > 0: cropLeft-=blockSize
    while cropRight < imageWSI.shape[1]: cropRight+=blockSize
    while cropTop > 0: cropTop-=blockSize
    while cropBottom < imageWSI.shape[0]: cropBottom+=blockSize
    cropLeft, cropRight, cropTop, cropBottom = cropLeft+blockSize, cropRight-blockSize, cropTop+blockSize, cropBottom-blockSize

    #Crop the original WSI, as was done to the orignal WSI before splitting
    imageWSI = imageWSI[cropTop:cropBottom, cropLeft:cropRight]
    
    #Split the cropped WSI into blocks and flatten, creating a matched indexed locations list
    numRows, numColumns = math.ceil(imageWSI.shape[0]/blockSize), math.ceil(imageWSI.shape[1]/blockSize)
    splitWSI = imageWSI.reshape(numRows, blockSize, numColumns, blockSize, imageWSI.shape[2]).swapaxes(1,2)
    splitWSI = splitWSI.reshape(-1, splitWSI.shape[2], splitWSI.shape[3], splitWSI.shape[4])
    locations = [[rowNum, colNum] for rowNum in range(0, numRows) for colNum in range(0, numColumns)]
    
    #For each block find the best matching location
    locationIndices, combinedOLD, combinedNEW = [], np.zeros(imageWSI.shape, dtype=np.uint8), np.zeros(imageWSI.shape, dtype=np.uint8)
    for blockFilename in tqdm(blockFilenames, desc='Blocks', ascii=asciiFlag):

        #Load the block
        blockImage = cv2.cvtColor(cv2.imread(blockFilename, cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)

        #Compare against all blocks from the original WSI
        scores = [compare_MSE(blockImage, splitWSI[index]) for index, compareImage in enumerate(splitWSI)]

        #Get location data for the best match
        bestIndex = np.argmin(scores)
        location = locations[bestIndex]
        startRow, startColumn = location[0]*blockSize, location[1]*blockSize
        
        #Compare the blocks and if they were not identical, note the file for future evaluation
        if np.sum(blockImage-splitWSI[bestIndex]) !=0: issueFiles.append(blockFilename)

        #Merge block into combined visuals
        combinedOLD[startRow:startRow+blockSize, startColumn:startColumn+blockSize] = blockImage
        combinedNEW[startRow:startRow+blockSize, startColumn:startColumn+blockSize] = splitWSI[bestIndex]

        #Store original block data with new index data to disk
        filenameOLD = dir_OLD + os.path.basename(blockFilename).split('.tif')[0]+'_'+str(location[0])+'_'+str(location[1])+'.tif'
        writeSuccess = cv2.imwrite(filenameOLD, cv2.cvtColor(blockImage, cv2.COLOR_RGB2BGR), params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))

        #Store matched block data with new index data to disk
        filenameNEW = dir_NEW + os.path.basename(blockFilename).split('.tif')[0]+'_'+str(location[0])+'_'+str(location[1])+'.tif'
        writeSuccess = cv2.imwrite(filenameNEW, cv2.cvtColor(splitWSI[bestIndex], cv2.COLOR_RGB2BGR), params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
        
        #Store difference between the block images to disk
        maeDiff = np.mean(np.abs(blockImage-splitWSI[bestIndex]), -1).astype('uint8')
        filenameDIF = dir_DIF + os.path.basename(blockFilename).split('.tif')[0]+'_'+str(location[0])+'_'+str(location[1])+'.tif'
        writeSuccess = cv2.imwrite(filenameDIF, cv2.cvtColor(maeDiff, cv2.COLOR_RGB2BGR), params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
    
    #Store combined OLD WSI to disk; faster than previously conceived blockAssembly code
    filenameDif = dir_blocks_outputWSIOLD + sampleName + '.tif'
    writeSuccess = cv2.imwrite(filenameDif, cv2.cvtColor(combinedOLD, cv2.COLOR_RGB2BGR), params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
    
    #Store combined NEW WSI to disk; can use to help verify block assignment by comparison with original WSI
    filenameDif = dir_blocks_outputWSINEW + sampleName + '.tif'
    writeSuccess = cv2.imwrite(filenameDif, cv2.cvtColor(combinedNEW, cv2.COLOR_RGB2BGR), params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
    