In [1]:
#==================================================================
#Program: AnnotationTransfer
#Version: 1.0
#Author(s): David Helminiak
#Date Created: 20 June 2025
#Date Last Modified: 20 June 2025
#Description: Transfer annotations from corrected WSI to uncorrected patches
#WARNING - Written to handle only one side per sample
#==================================================================

#Have the notebook fill more of the display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>.output_result { max-width:80% !important; }</style>"))

#Block resolution
resolutionHeight, resolutionWidth = 2200, 2748

#Specify input/output patch size (symmetric)
patchSize = 400




In [2]:
#Raise the maximum image size for opencv; note that this can allow for decompression bomb DOS attacks if an untrusted image ends up as an input
import os 
os.environ["OPENCV_IO_MAX_IMAGE_PIXELS"] = pow(2,40).__str__()

from IPython import get_ipython
if get_ipython().__class__.__name__ == 'ZMQInteractiveShell': jupyterNotebook = True
else: jupyterNotebook = False

import copy
import cv2
import glob
import logging
import matplotlib.pyplot as plt
import natsort
import numpy as np
import pandas as pd
import ray
import shutil
import time

from basicpy import BaSiC
from contextlib import nullcontext
from ray.util.multiprocessing import Pool

if jupyterNotebook: from tqdm.notebook import tqdm
else: from tqdm.auto import tqdm

#Pandas option to prevent terminal outputs
pd.set_option('future.no_silent_downcasting', True)

#Define logging levels and behaviors
logging.root.setLevel(logging.ERROR)
logging.raiseExceptions = False

#Ray actor for holding global progress in parallel sampling operations
@ray.remote(num_cpus=0)
class SamplingProgress_Actor:
    def __init__(self): self.current = 0.0
    def update(self): self.current += 1
    def getCurrent(self): return self.current



In [3]:
#Store directory references
dir_in_cWSI = './INPUTS/WSI_CORRECTED/'
dir_in_cPatches = './INPUTS/PATCHES_CORRECTED/'
dir_in_uBlocks = './INPUTS/BLOCKS_UNCORRECTED/'

dir_out_uWSI = './OUTPUTS/WSI_UNCORRECTED/'
dir_out_uPatches = './OUTPUTS/PATCHES_UNCORRECTED/'
dir_out_cWSI = './OUTPUTS/WSI_CORRECTED/'
dir_out_cPatches = './OUTPUTS/PATCHES_CORRECTED/'
dir_out_blockComparison = './OUTPUTS/BLOCK_COMPARISON/'

#Ensure output directories exists and are empty
if os.path.exists(dir_out_uWSI): shutil.rmtree(dir_out_uWSI)
os.makedirs(dir_out_uWSI)
if os.path.exists(dir_out_uPatches): shutil.rmtree(dir_out_uPatches)
os.makedirs(dir_out_uPatches)
if os.path.exists(dir_out_cWSI): shutil.rmtree(dir_out_cWSI)
os.makedirs(dir_out_cWSI)
if os.path.exists(dir_out_cPatches): shutil.rmtree(dir_out_cPatches)
os.makedirs(dir_out_cPatches)
if os.path.exists(dir_out_blockComparison): shutil.rmtree(dir_out_blockComparison)
os.makedirs(dir_out_blockComparison)

#Derive how blocks should be cropped to allow for even patch splits
cropHeight = resolutionHeight-(int(resolutionHeight/patchSize)*patchSize)
cropWidth = resolutionWidth-(int(resolutionWidth/patchSize)*patchSize)
cropTop, cropLeft = cropHeight//2, cropWidth//2
cropBottom, cropRight = cropTop+(cropHeight%2), cropLeft+(cropWidth%2)


In [5]:
#Obtain list of sample names of available WSI .jpg images
sampleNames = [os.path.splitext(name)[0] for name in os.listdir(dir_in_cWSI)]

#Read in all patch metadata; converting annotations for ease of manipulation
metadataFile = dir_in_cPatches+'Patches_Label.csv'
metadata = pd.read_csv(metadataFile, header=0, names=['Sample Number', 'Index', 'Row', 'Column', 'Label', 'Edge', 'Boundary'], converters={'Sample Number':int, 'Index':int, 'Row':int, 'Column':int, 'Label':str, 'Edge':str, 'Boundary':str})
metadata['Label'] = metadata['Label'].replace('N', 1)
metadata['Label'] = metadata['Label'].replace('T', 2)
metadata['Label'] = metadata['Label'].apply(pd.to_numeric)
metadata['Edge'] = metadata['Edge'].replace('N', 1)
metadata['Edge'] = metadata['Edge'].replace('Y', 2)
metadata['Edge'] = metadata['Edge'].apply(pd.to_numeric)
metadata['Boundary'] = metadata['Boundary'].replace('N', 1)
metadata['Boundary'] = metadata['Boundary'].replace('Y', 2)
metadata['Boundary'] = metadata['Boundary'].apply(pd.to_numeric)

#For each sample transfer annotations to uncorrected blocks and form crop-stitched variant of WSI
for sampleName in tqdm(natsort.natsorted(sampleNames), total=len(sampleNames), desc='Samples', leave=True): 
    
    #load WSI and create array to hold pixel-level annotations; 
    imageWSI = cv2.imread(dir_in_cWSI + sampleName + '.jpg', cv2.IMREAD_UNCHANGED)
    annotationWSI = np.zeros((imageWSI.shape[0], imageWSI.shape[1], 3))
    grayImageWSI = cv2.cvtColor(imageWSI, cv2.COLOR_BGR2GRAY)
    
    #Extract applicable patch data
    sampleMetadata = metadata.loc[metadata['Sample Number'] == int(sampleName.split('_')[0])]
    patchAnnotations = np.stack((sampleMetadata['Label'].to_numpy(), sampleMetadata['Edge'].to_numpy(), sampleMetadata['Boundary'].to_numpy())).T
    patchPositions = np.vstack((sampleMetadata['Row'].to_numpy(), sampleMetadata['Column'].to_numpy())).T
    
    #Convert annotations from patch-level to pixel-level
    for index, position in enumerate(patchPositions): annotationWSI[position[0]:position[0]+patchSize, position[1]:position[1]+patchSize] = patchAnnotations[index]
    
    #Load uncorrected sample block images
    blockFilenames = natsort.natsorted(glob.glob(dir_in_uBlocks+sampleName+'/*'))
    blockImages = [cv2.imread(filename, cv2.IMREAD_UNCHANGED) for filename in blockFilenames]
    
    #Extract WSI block-level dimensions, asuming block filenames are 1 indexed and the last indexed is the bottom right
    _, _, _, row, column = os.path.basename(blockFilenames[-1]).split('.')[0].split('_')
    blocksY, blocksX = int(row.split('R')[1]), int(column.split('C')[1])
    
    #Create corrected versions of block images
    H, S, V = np.split(np.asarray([cv2.cvtColor(image.astype('float32'), cv2.COLOR_BGR2HSV_FULL) for image in blockImages]), 3, -1)
    H, S, V = H[..., 0], S[..., 0], V[..., 0]
    basic = BaSiC(fitting_mode='approximate', optimization_tol=1e-6, reweighting_tol=1e-3, sort_intensity=True)
    V = basic.fit_transform(V)
    V = (V / V.max())*255.0
    
    blockImagesCorrected = np.asarray([np.round(cv2.cvtColor(image, cv2.COLOR_HSV2BGR_FULL)).astype('uint8') for image in np.stack([H, S, V], -1)])
    del H, S, V
    
    #Extract annotations for blocks and visualize for manually conducted comparitive audit
    blockAnnotations = []
    for index, blockImage in tqdm(enumerate(blockImages), total=len(blockImages), desc='Blocks', leave=False): 
        
        #Find block in WSI, extracting and storing its annotations
        matchMap = cv2.matchTemplate(grayImageWSI, cv2.cvtColor(blockImage, cv2.COLOR_BGR2GRAY), cv2.TM_CCOEFF_NORMED)
        startRow, startColumn = np.unravel_index(np.argmax(matchMap), matchMap.shape)
        blockAnnotations.append(annotationWSI[startRow:startRow+resolutionHeight, startColumn:startColumn+resolutionWidth])
        
        #Extract and create side-by-side comparison of the block images for visual audit
        extractedBlock = imageWSI[startRow:startRow+resolutionHeight, startColumn:startColumn+resolutionWidth]
        blockName = os.path.basename(blockFilenames[index]).split('.')[0]
        fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 4))
        ax[0].imshow(cv2.cvtColor(extractedBlock, cv2.COLOR_BGR2RGB))
        ax[1].imshow(cv2.cvtColor(blockImage, cv2.COLOR_BGR2RGB))
        ax[2].imshow(cv2.cvtColor(blockImagesCorrected[index], cv2.COLOR_BGR2RGB))
        ax[0].set_title('Original')
        ax[1].set_title('Uncorrected')
        ax[2].set_title('BaSiC Flatfield Corrected')
        fig.suptitle('Sample ' + sampleName)
        plt.tight_layout()
        filenameOutput = dir_out_blockComparison + blockName + '.tif'
        plt.savefig(filenameOutput)
        plt.close()
    
    #Crop block images and matched pixel-level annotation maps
    blockImages = np.asarray(blockImages)[:, cropTop:-cropBottom, cropLeft:-cropRight, :]
    blockImagesCorrected = np.asarray(blockImagesCorrected)[:, cropTop:-cropBottom, cropLeft:-cropRight, :]
    blockAnnotations = np.asarray(blockAnnotations)[:, cropTop:-cropBottom, cropLeft:-cropRight, :]

    #Form new complete WSI and annotation map
    blockHeight, blockWidth = blockImages[0].shape[0], blockImages[0].shape[1]
    height, width = blocksY*blockHeight,  blocksX*blockWidth
    uImageWSI = blockImages.reshape(blocksY, blocksX, blockHeight, blockWidth, 3)
    uImageWSI = uImageWSI.swapaxes(1,2).reshape(height, width, 3)
    cImageWSI = blockImagesCorrected.reshape(blocksY, blocksX, blockHeight, blockWidth, 3)
    cImageWSI = cImageWSI.swapaxes(1,2).reshape(height, width, 3)
    annotationWSI = blockAnnotations.reshape(blocksY, blocksX, blockHeight, blockWidth, 3)
    annotationWSI = annotationWSI.swapaxes(1,2).reshape(height, width, 3)
    
    #Crop WSI and annotation map to the foreground area, using red-channel values > 5 as a threshold
    x, y, w, h = cv2.boundingRect((uImageWSI[:,:,2]>=5)*np.uint8(1))
    uImageWSI = uImageWSI[y:y+h, x:x+w]
    cImageWSI = cImageWSI[y:y+h, x:x+w]
    annotationWSI = annotationWSI[y:y+h, x:x+w]
    
    #Pad (as symmetrically as possible) for an even division by the configured patch size
    padHeight = (int(np.ceil(uImageWSI.shape[0]/patchSize))*patchSize)-uImageWSI.shape[0]
    padWidth = (int(np.ceil(uImageWSI.shape[1]/patchSize))*patchSize)-uImageWSI.shape[1]
    padTop, padLeft = padHeight//2, padWidth//2
    padBottom, padRight = padTop+(padHeight%2), padLeft+(padWidth%2)
    uImageWSI = np.pad(uImageWSI, ((padTop, padBottom), (padLeft, padRight), (0, 0)))
    cImageWSI = np.pad(cImageWSI, ((padTop, padBottom), (padLeft, padRight), (0, 0)))
    annotationWSI = np.pad(annotationWSI, ((padTop, padBottom), (padLeft, padRight), (0, 0)))
    
    #Export the new WSI
    writeSuccess = cv2.imwrite(dir_out_uWSI+sampleName+'.tif', uImageWSI, params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
    writeSuccess = cv2.imwrite(dir_out_cWSI+sampleName+'.tif', cImageWSI, params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
    
    #Split the WSI and annotation map into patches and flatten
    numPatchesRow, numPatchesCol = uImageWSI.shape[0]//patchSize, uImageWSI.shape[1]//patchSize
    uImageWSI = uImageWSI.reshape(numPatchesRow, patchSize, numPatchesCol, patchSize, uImageWSI.shape[2]).swapaxes(1,2)
    uImageWSI = uImageWSI.reshape(-1, uImageWSI.shape[2], uImageWSI.shape[3], uImageWSI.shape[4])
    cImageWSI = cImageWSI.reshape(numPatchesRow, patchSize, numPatchesCol, patchSize, cImageWSI.shape[2]).swapaxes(1,2)
    cImageWSI = cImageWSI.reshape(-1, cImageWSI.shape[2], cImageWSI.shape[3], cImageWSI.shape[4])
    annotationWSI = annotationWSI.reshape(numPatchesRow, patchSize, numPatchesCol, patchSize, annotationWSI.shape[2]).swapaxes(1,2)
    annotationWSI = annotationWSI.reshape(-1, annotationWSI.shape[2], annotationWSI.shape[3], annotationWSI.shape[4])
    
    #Export patches
    patchLocations, patchNames, uPatchFilenames, cPatchFilenames = [], [], [], []
    for patchIndex, (rowNum, colNum) in tqdm(enumerate(np.ndindex((numPatchesRow,numPatchesCol))), total=numPatchesRow*numPatchesCol, desc='Patches', leave=False):
        image = imageWSI[patchIndex]
        locationRow, locationColumn= rowNum*patchSize, colNum*patchSize
        patchLocations.append([locationRow, locationColumn])
        patchName = sampleName+'_'+str(patchIndex)+'_'+str(locationRow)+'_'+str(locationColumn)
        patchNames.append(patchName)
        uPatchFilename = 'PS' + patchName + '.tif'
        cPatchFilename = 'PS' + patchName + '.tif'
        writeSuccess = cv2.imwrite(dir_out_uPatches+uPatchFilename, uImageWSI[patchIndex], params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
        writeSuccess = cv2.imwrite(dir_out_cPatches+cPatchFilename, uImageWSI[patchIndex], params=(cv2.IMWRITE_TIFF_COMPRESSION, 1))
        uPatchFilenames.append(uPatchFilename)
        cPatchFilenames.append(cPatchFilename)

    #Merge and export labels
    patchLabels = []
    
    
    break
    

Samples:   0%|          | 0/76 [00:00<?, ?it/s]

Blocks:   0%|          | 0/36 [00:00<?, ?it/s]

In [32]:
annotationValues = np.unique(annotation)

In [33]:
metadata['Label'] = metadata['Label'].replace('N', 1)
metadata['Label'] = metadata['Label'].replace('T', 2)
metadata['Label'] = metadata['Label'].apply(pd.to_numeric)
metadata['Edge'] = metadata['Edge'].replace('N', 1)
metadata['Edge'] = metadata['Edge'].replace('Y', 2)
metadata['Edge'] = metadata['Edge'].apply(pd.to_numeric)
metadata['Boundary'] = metadata['Boundary'].replace('N', 1)
metadata['Boundary'] = metadata['Boundary'].replace('Y', 2)




array([1., 2.])

In [40]:
#Label "edge" refers to patches located at the interface between tissue and background.
#Label "boundary" refers to patches situated between normal and tumor tissue.




(400, 3)

In [63]:

#Merge pixel-level class labels back to patch-level class labels
patchLabels = []
for index, annotation in enumerate(annotationWSI):
    
    #Reset tags
    tagBenign, tagMalignant, tagBoundary, tagEdge = False, False, False, False
    
    #Extract unique values presnet in each class annotation map
    dataLabel, dataEdge, dataBoundary = np.split(annotation, 3, -1)
    uniqueLabel, uniqueEdge, uniqueBoundary = np.unique(dataLabel), np.unique(dataEdge), np.unique(dataBoundary)
    
    #Ugh, just because the area encompassed originally had mixed, doesn't mean the new are is mixed
    #just because it had malignant and benign doesn't make it mixed...we don't have actual per-pixel values 
    
    
    
    #Combine and validate class labels
    
    if (2 in uniqueLabel): tagMalignant = True
    if (2 in uniqueBoundary): tagBoundary = True
    if (2 in uniqueEdge): #may not be an edge...
    
    #If it has both 
    
    if (1 in uniqueLabel): tagBenign = True
    
    if (0 in uniqueLabel): tagEdge = True
        
    if (2 in uniqueEdge): 
        tagEdge = True
        if not (0 in uniqueLabel): print('Error - Edge label was found but was not present natively for: ', index)
    
    #U
    if tagBenign and (2 in uniqueBoundary): print('Error - Boundary label was found but was not present natively for: ', index)
    
    if tagBenign and tagMalignant: 
        tagBoundary = True
        if not (2 in uniqueBoundary): 
    
        
    
    tagMalignant = 'Y' if tagMalignant else 'N'
    tagEdge = 'Y' if tagEdge else 'N'
    tagBoundary = 'Y' if tagBoundary else 'N'
    patchLabels.append([tagMalignant, tagBoundary, tagEdge])
patchLabels = np.asarray(patchLabels)


Error - Edge label was found but was not present natively for:  41
Error - Edge label was found but was not present natively for:  42
Error - Edge label was found but was not present natively for:  43
Error - Edge label was found but was not present natively for:  50
Error - Edge label was found but was not present natively for:  54
Error - Edge label was found but was not present natively for:  55
Error - Edge label was found but was not present natively for:  56
Error - Edge label was found but was not present natively for:  57
Error - Edge label was found but was not present natively for:  58
Error - Edge label was found but was not present natively for:  70
Error - Edge label was found but was not present natively for:  71
Error - Edge label was found but was not present natively for:  72
Error - Edge label was found but was not present natively for:  93
Error - Edge label was found but was not present natively for:  94
Error - Edge label was found but was not present natively for:

In [69]:
np.unique(sampleMetadata['Label'].to_numpy())

array([1], dtype=int64)

In [55]:
metadata['Label'].to_numpy()

array([1, 1, 1, ..., 2, 2, 2], dtype=int64)

In [None]:
#Salutations,

#Kept thinking about the need for raw data for classifier training/testing and was able to come up with a script to transfer the annotations back out.

#Please find links here for both new uncorrected and corrected patches/WSI/annotations for dataset2-4x (72-179).
#Annotations were transferred from Tianling's most recent generation (to include benign/malignant-label, edge, and boundary classes; 20% background thresholding).
#If needed for dataset1-4x and/or dataset2-10x, we will need to perform the same re-annotation process for those sets. 

#The processing for the uncorrected data matches current GUI acquisition behaviors (absolutely no flatfield or darkfield correction)
#Since no stitching is being performed (just some minor boundary cropping), this does add duplicate information among the extracted patches (from the overlap between blocks).
#Likely to have more false positives/negatives than the corrected/stitched data produces, but results will be more realistic. 

#For a more direct/fair 1-to-1 comparison against 'corrected' data, this same crop processing and patch extraction was done with BaSiC flatfield corrected block images.
#The exact implementation of the BaSiC algorithm being applied is different than that used in the previous/original sets, but matches exactly with what is available/possible in the new GUI (algorithm can only be applied to all blocks after scanning finishes).
#Primary differences:
# -More recently released implementation (different optimization algorithm and parameters)
# -Only applies to the Value channel (preserves saturation channel entirely)
# -Scales intensities to the correct/allowable range before conversion back to RGB
# -Removes rounding errors previously observed with MATLAB processing
# -Does not apply darkfield correction

#Broadly, the procedure for the actual annotation transfer was:
#1) Load patch-level annotations as assigned to corrected patches (extracted from corrected/stitched WSI)
#2) 'Fill' encompassed patch area with annotations in corrected/stitched WSI coordinate space
#3) Locate corrected block image inside corrected/stitched WSI
#4) Transfer annotations from corrected/stitched WSI coordinate space to that of the corrected block images (which match directly with uncorrected block images)
#5) Perform cropping/padding/WSI-combination/patch-extraction (for both uncorrected and corrected block images) exactly as currently done in acqusition GUI
#6) Export label data (exacatly the same for both exported uncorrected and corrected patch sets)
#7) Visually confirm every block image was correctly matched from the original corrected/stitched WSI


In [70]:
# Labels based on the sample level: pure normal and pure tumor first
label_Normal = {"72", "74", "80", "83", "96", "98", "100", "118", "119", "120", "122", "123",
                "124", "126", "127", "128", "129", "130", "131", "134", "137", "138", "140", "141",
                "143", "144", "145", "146", "149", "154", "165", "169", "172", "173"}

label_Tumor = {"75", "84", "88", "91", "99", "101", "105"}

In [71]:
len(label_Normal)

34

In [126]:
allSamples = np.asarray([sampleName.split('_')[0] for sampleName in natsort.natsorted(sampleNames)]).astype(int)

normalSamples = np.asarray([72, 74, 80, 83, 96, 98, 100, 118, 119, 120, 122, 123, 124, 126, 127, 128, 
                               129, 130, 131, 134, 137, 138, 140, 141, 143, 144, 145, 146, 149, 154, 165, 
                               169, 172, 173])

malignantSamples = np.asarray([75, 84, 88, 91, 99, 101, 105])


mixedSamples = np.asarray([sampleName for sampleName in [sampleName for sampleName in allSamples if sampleName not in normalSamples] if sampleName not in malignantSamples])

#[ 73,  76,  77,  78,  79,  81,  82,  85,  86,  90,  92,  93,  94, 95,  97, 103, 104, 106, 109, 110, 111, 112, 113, 114, 125, 132, 133, 135, 136, 139, 142, 160, 166, 174, 179]

#Mixed samples with avaialable WSI/blocks/patches, but no annotation maps...
#[ 73,  76,  77,  78,  79,  81,  82,  85,  86,  90,  92,  93,  94, 95,  97, 103, 104, 106, 109, 110, 111, 112, 113, 114, 125, 132, 133, 135, 136, 139, 142, 160, 166, 174, 179]




In [127]:
len(mixedSamples)

35

[73,
 75,
 76,
 77,
 78,
 79,
 81,
 82,
 84,
 85,
 86,
 88,
 90,
 91,
 92,
 93,
 94,
 95,
 97,
 99,
 101,
 103,
 104,
 105,
 106,
 109,
 110,
 111,
 112,
 113,
 114,
 125,
 132,
 133,
 135,
 136,
 139,
 142,
 160,
 166,
 174,
 179]

In [98]:
metadata.loc[metadata['Sample Number'] == 90]

Unnamed: 0,Sample Number,Index,Row,Column,Label,Edge,Boundary
PS90_1_100_2800_5600.tif,90,100,2800,5600,1,1,1
PS90_1_101_2800_6000.tif,90,101,2800,6000,1,1,1
PS90_1_102_2800_6400.tif,90,102,2800,6400,1,1,1
PS90_1_103_2800_6800.tif,90,103,2800,6800,1,1,1
PS90_1_104_2800_7200.tif,90,104,2800,7200,1,1,1
...,...,...,...,...,...,...,...
PS90_1_96_2800_4000.tif,90,96,2800,4000,1,1,1
PS90_1_97_2800_4400.tif,90,97,2800,4400,1,1,1
PS90_1_98_2800_4800.tif,90,98,2800,4800,1,1,1
PS90_1_99_2800_5200.tif,90,99,2800,5200,1,1,1


In [90]:
sampleName

'72_1'