In [2]:
#==================================================================
#Program: metadataTransfer
#Version: 1.0
#Author: David Helminiak
#Date Created: 15 April 2025
#Date Last Modified: 15 April 2025
#Description: Transfer Patch_list.xlsx to metadata_patches.csv format; use after patchDeeperExtraction.ipynb
#Was not written to handle samples with multiple sides
#Operation: Move back into main program directory before running.
#==================================================================

#Have the notebook fill more of the display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>.output_result { max-width:80% !important; }</style>"))

#Import remaining libraries
import glob
import natsort
import numpy as np
import os 
import pandas as pd


In [66]:
#Store directory references
#dir_data = '.' + os.path.sep + 'DATA' + os.path.sep
dir_data = '.' + os.path.sep + 'DATA-4-9-2025' + os.path.sep
dir_patches_data = dir_data + 'PATCHES' + os.path.sep
dir_patches_inputPatches = dir_patches_data + 'INPUT_PATCHES' + os.path.sep
dir_patches_inputWSI = dir_patches_data + 'INPUT_WSI' + os.path.sep
dir_patches_outputPatches = dir_patches_data + 'OUTPUT_PATCHES' + os.path.sep
dir_patches_outputVisuals = dir_patches_data + 'OUTPUT_patches_VISUALS' + os.path.sep
file_patches_labels = dir_patches_inputPatches + 'Patch_list.xlsx'
file_patches_labels_new = dir_patches_inputPatches + 'metadata_patches.csv'

#Get list of all the previous patches that are intended for comparison with new extractions
metadata = pd.read_excel(file_patches_labels, header=None, names=['name', 'label'], converters={'name':str,'label':str})
patchNamesAll_patches, patchLabelsAll_patches = np.array(metadata['name']), np.array(metadata['label'])

#Pull out sample names to match against patch filenames
names = [name.split('.tif')[0] for name in patchNamesAll_patches]
names = np.asarray([name.split('P')[1] for name in names])




In [74]:
#For every patch image, find its label and assemble metadata from its filename
metadata_patches = [['Sample', 'Index', 'Row', 'Column', 'Label']]
for sampleFolder in natsort.natsorted(glob.glob(dir_patches_inputPatches + '*')):
    if os.path.isdir(sampleFolder): 
        for patchName in natsort.natsorted(glob.glob(sampleFolder+ os.path.sep + '*.tif')): 
            sampleNumber, sampleIndex, row, column = os.path.basename(patchName).split('.tif')[0].split('PS')[1].split('_')
            sampleName = sampleNumber+'_'+sampleIndex
            labelLocation = np.argwhere(names == sampleName)
            if labelLocation.shape != (1,1): print('Issue with: ' + patchName)
            label = patchLabelsAll_patches[labelLocation[0, 0]]
            metadata_patches.append([sampleNumber, sampleIndex, row, column, label])

#Save results to disk; manually add sample side labels
pd.DataFrame(metadata_patches).to_csv(file_patches_labels_new, index=False, header=False)