In [None]:
#==================================================================
#Program: patchBasicReExtractor
#Version: 1.0
#Author: David Helminiak
#Date Created: 29 April 2025
#Date Last Modified: 30 April 2025
#Description: Re-extract patches from complete WSI and attempt to match them against originals
#Operation: Move back into main program directory before running.
#==================================================================

#Have the notebook fill more of the display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>.output_result { max-width:80% !important; }</style>"))

#Items otherwise covered when not running this code in a notebook
import tempfile
dir_tmp = tempfile.TemporaryDirectory(prefix='TMP_').name
configFileName = './CONFIG_0-TEST'

#Should parallelization calls be used
parallelization = True

#If parallelization is enabled, how many CPU threads should be used? (0 will use any/all available)
#Recommend starting at half of the available system threads if using hyperthreading,
#or 1-2 less than the number of system CPU cores if not using hyperthreading.
#Can adjust down to help manage RAM overhead, but it may have limited impact.
availableThreads = 4

#Which GPU(s) devices should be used; NA, but need to initialize (default: [])
gpus = []

#RNG seed value to ensure run-to-run consistency (-1 to disable)
manualSeedValue = 0

#Debugging mode
debugMode = True

#TQDM progress bar visualization flag
asciiFlag = False

#Load external libraries
exec(open("./CODE/EXTERNAL.py", encoding='utf-8').read())
exec(open("./CODE/COMPUTE.py", encoding='utf-8').read())

#When splitting WSI images, what size should the resulting patches be (4x default: 400; 10x default: 1000)
#Should remain consistent with original patch sizes
patchSize = 1000

#If crashed, what index should be restarted from (default: 0)
startIndex = 0


In [None]:
#Store directory references
dir_data = '.' + os.path.sep + 'DATA' + os.path.sep
dir_patches_data = dir_data + 'PATCHES' + os.path.sep
dir_patches_inputPatches = dir_patches_data + 'INPUT_PATCHES' + os.path.sep
dir_patches_inputWSI = dir_patches_data + 'INPUT_WSI' + os.path.sep
dir_patches_outputPatches = dir_patches_data + 'OUTPUT_PATCHES' + os.path.sep
dir_patches_outputVisuals = dir_patches_data + 'OUTPUT_patches_VISUALS' + os.path.sep
file_patches_labels = dir_patches_inputPatches + 'Patch_list.xlsx'

#Create storage locations for new patch files and visuals
if startIndex != 0:
    if os.path.exists(dir_patches_outputPatches): shutil.rmtree(dir_patches_outputPatches)
    os.makedirs(dir_patches_outputPatches)
    if os.path.exists(dir_patches_outputVisuals): shutil.rmtree(dir_patches_outputVisuals)
    os.makedirs(dir_patches_outputVisuals)

#Dataset 2 - 10x
patchSampleFolders = natsort.natsorted(glob.glob(dir_patches_inputPatches+'*'))[startIndex:]
patchFilenamesAll_patches = []
for patchSampleFolder in patchSampleFolders: patchFilenamesAll_patches += natsort.natsorted(glob.glob(patchSampleFolder+os.path.sep+'*.tif'))
patchSampleNamesAll_patches = np.asarray([re.split('P|_', os.path.basename(patchSampleFolder))[1] for patchSampleFolder in patchFilenamesAll_patches])

#Specify all samples that need to be processed
sampleNames = np.unique(patchSampleNamesAll_patches)

#Create subfolders for the patch images derived from each sample and comparisons between original and new patch images
sampleFolders = np.asarray([dir_patches_outputPatches + sampleName + os.path.sep for sampleName in sampleNames])
if startIndex != 0:
    for sampleFolder in sampleFolders: os.makedirs(sampleFolder)
sampleVisualsFolders = np.asarray([dir_patches_outputVisuals + sampleName + os.path.sep for sampleName in sampleNames])
if startIndex != 0:
    for sampleVisualsFolder in sampleVisualsFolders: os.makedirs(sampleVisualsFolder)


In [None]:
#Find best match for each original patch
def matchPatch(newPatchImages, patchFilenames, sampleFolder, sampleVisualsFolder, showProgressBars):
    samplesToCheck = []
    for index, patchFilename in tqdm(enumerate(patchFilenames), total=len(patchFilenames), desc='Patches', leave=False, ascii=asciiFlag, disable=showProgressBars): 
        originalPatchImage = cv2.cvtColor(cv2.imread(patchFilename, cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
        patchIndex = os.path.basename(patchFilenames[index]).split('.')[0].split('_')[1]
        scores = np.mean(np.abs(newPatchImages-originalPatchImage), axis=(1, 2, 3))
        bestIndex = np.argmin(scores)
        newPatchImage, startRow, startColumn = newPatchImages[bestIndex], newPatchLocations[bestIndex][0], newPatchLocations[bestIndex][1]
        filenameOutput = sampleFolder + 'PS' + sampleName + '_' + patchIndex + '_' + str(startRow) + '_' + str(startColumn) + '.tif'
        writeSuccess = cv2.imwrite(filenameOutput, cv2.cvtColor(newPatchImage, cv2.COLOR_RGB2BGR), params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
        if scores[bestIndex] != 0:
            maeImage = np.mean(np.abs(newPatchImage-originalPatchImage), -1)
            samplesToCheck.append(patchFilenames[index])
            fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 4))
            ax[0].imshow(newPatchImage)
            ax[1].imshow(originalPatchImage)
            ax[2].imshow(maeImage)
            ax[0].set_title('New')
            ax[1].set_title('Original')
            ax[2].set_title('MAE')
            fig.suptitle('Sample ' + sampleName)
            plt.tight_layout()
            filenameOutput = sampleVisualsFolder + 'PS' + sampleName + '_' + patchIndex + '_' + str(startRow) + '_' + str(startColumn) + '.tif'
            plt.savefig(filenameOutput)
            plt.close()
    return samplesToCheck


In [None]:
#Extract patches for each sample
samplesToCheck = []
for sampleIndex, sampleName in tqdm(enumerate(sampleNames), total=len(sampleNames), desc='Samples', leave=True, ascii=asciiFlag):
    
    print(sampleName)
    
    #Create subfolders for the revised patch images derived from each sample and comparisons between original and new patch images
    sampleFolder = dir_patches_outputPatches + sampleName + os.path.sep
    if not os.path.exists(sampleFolder): os.makedirs(sampleFolder)
    sampleVisualsFolder = dir_patches_outputVisuals + sampleName + os.path.sep
    if not os.path.exists(sampleVisualsFolder): os.makedirs(sampleVisualsFolder)
    
    #Get filenames, load original patches, and setup indices for such
    patchFilenames = np.asarray(patchFilenamesAll_patches)[np.where(patchSampleNamesAll_patches == sampleName)[0]]
    originalPatchIndices = np.arange(0, len(patchFilenames))    

    #Load the sample WSI (crop for even division) and original patch iamges
    imageWSI = cv2.cvtColor(cv2.imread(dir_patches_inputWSI + sampleName + '.jpg', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
    blocksY, blocksX = imageWSI.shape[0]//patchSize, imageWSI.shape[1]//patchSize
    imageWSI = imageWSI[:blocksY*patchSize, :blocksX*patchSize]
    
    #Extract patches from WSI and reduce set according to threshold criteria
    patchImages = imageWSI.reshape(blocksY, patchSize, blocksX, patchSize, 3).swapaxes(1,2)
    newPatchImages, newPatchLocations = [], []
    for posX in range(0, blocksX):
        for posY in range(0, blocksY):
            patchImage = patchImages[posY, posX]
            if np.mean(patchImage[:,:,0] >= 5) > 0.8: 
                newPatchImages.append(patchImage)
                newPatchLocations.append([posY*patchSize, posX*patchSize])
    del patchImages 
    newPatchImages = np.asarray(newPatchImages)
    
    #Find patch in WSI, storing it, and a visual of the original/new patches if they do not match exactly
    if parallelization:
        futures = [(newPatchImages, patchFilenames[indices], sampleFolders[sampleIndex], sampleVisualsFolders[sampleIndex], True) for indices in np.array_split(originalPatchIndices, numberCPUS)]
        computePool = Pool(numberCPUS)
        results = computePool.starmap_async(matchPatch, futures)
        computePool.close()
        computePool.join()
        samplesToCheckPartial = np.concatenate(results.get().copy()).tolist()
    else: 
        samplesToCheckPartial = matchPatch(newPatchImages, patchFilenames, sampleFolder, sampleVisualsFolder, False)
    samplesToCheck += samplesToCheckPartial
    