In [None]:
#==================================================================
#Program: patchCheck
#Author: David Helminiak
#Date Created: 1 October 2024
#Date Last Modified: 4 October 2024
#Description: Quick visualization and count of patch data
#Operation: Move back into main RANDS program directory before running.
#==================================================================


#SETUP
#==================================================================

#Have the notebook fill more of the display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>.output_result { max-width:80% !important; }</style>"))

#RNG seed value to ensure run-to-run consistency (-1 to disable)
manualSeedValue = 0

#Should warnings and info messages be shown during operation
debugMode = True

#Should progress bars be visualized with ascii characters
asciiFlag = False

#Specify a placeholder configuration filename
configFileName = './CONFIG_0-TEST'


#L2-1-1: ORIGINAL CLASSIFICATION MODEL; relevant portions copied from CONFIG_0-Original.py
#******************************************************************

#If folds for XGB classifier cross validation should be manually defined (e.g. [['S1', 'S3'], ['S4', 'S2']]), else use specify number of folds to generate
#Default matches folds used in prior work (https://doi.org/10.3389/fonc.2023.1179025)
#Omits 6 available samples, with folds holding: 11, 12, 12, 12, 13 samples respectively; this may have been to better balance class distribution
#Presently, all available (non-excluded) samples (not just those in manualFolds) are currently used for training the exported/utilized final classifier
manualFolds = [['2_1', '9_3', '11_3', '16_3', '34_1', '36_2', '40_2', '54_2', '57_2', '60_1', '62_1'],
               ['17_5', '20_3', '23_3', '24_2', '28_2', '30_2', '33_3', '51_2', '52_2', '59_2', '63_3', '66_2'], 
               ['12_1', '14_2', '22_3', '26_3', '35_4', '44_1', '45_1', '47_2', '49_1', '53_2', '56_2', '68_1'], 
               ['4_4', '5_3', '8_1', '10_3', '25_3', '27_1', '29_2', '37_1', '42_3', '48_3', '50_1', '69_1'], 
               ['7_2', '15_4', '19_2', '31_1', '43_1', '46_2', '55_2', '58_2', '61_1', '64_1', '65_1', '67_1', '70_1']]


#L4-1: CLASSIFICATION; copied from CONFIG_0-Original.py
#==================================================================

#Define labels used for normal/benign tissue
#'a': normal adipose.
#'s': normal stroma tissue excluding adipose.
#'o': other normal tissue including parenchyma, adenosis, lobules, blood vessels, etc.
labelsBenign = ['a', 's', 'o', 'normal']

#Define labels used for malignant tissue
#'d': IDC tumor
#'l': ILC tumor
#'ot': other tumor areas including DCIS, biopsy site, and slightly defocused tumor regions.
labelsMalignant = ['d', 'l', 'ot', 'tumor']

#Define labels used for tissues to be excluded
#'ft': defocused but still visually tumor-like areas.
#'f': severly out-of-focusing areas. 
#'b': background. 
#'e': bubbles.
labelsExclude = ['ft', 'f', 'b', 'e', 'exclude']


#PACKAGES
#==================================================================

#Load external libraries
exec(open("./CODE/EXTERNAL.py", encoding='utf-8').read())

exec(open("./CODE/DEFINITIONS.py", encoding='utf-8').read())


#DERIVED VARIABLES; relevant portions copied from ./CODE/DERIVED.py
#==================================================================

#Define general labels and values to use
labelBenign, labelMalignant, labelExclude = '0', '1', '2'
valueBenign, valueMalignant, valueBackground = int(labelBenign), int(labelMalignant), 2


#DIRECTORIES; relevant portions copied from ./CODE/INTERNAL.py
#==================================================================

#Global
dir_data = '.' + os.path.sep + 'DATA' + os.path.sep
dir_results = '.' + os.path.sep + 'RESULTS' + os.path.sep
dir_classifier_models = dir_results + 'MODELS' + os.path.sep

#Patch classification
dir_patches_data = dir_data + 'PATCHES' + os.path.sep
dir_patches_inputPatches = dir_patches_data + 'INPUT_PATCHES' + os.path.sep
dir_patches_inputWSI = dir_patches_data + 'INPUT_WSI' + os.path.sep

#METADATA; relevant portions copied from ./CODE/RUN_CLASS.py
#==================================================================

#Attempt to load and process metadata for patch images and their specific WSI
try:
    patchSampleNames_patches, indices_patches, locations_patches, patchLabels_patches = loadMetadata_patches(dir_patches_inputPatches + 'metadata_patches.csv')
    patchLabels_patches = patchLabels_patches.astype(int)
    patchNames_patches = np.asarray([patchSampleNames_patches[index] + '_' + indices_patches[index] for index in range(0, len(patchSampleNames_patches))])
    patchFilenames_patches = np.asarray([dir_patches_inputPatches + patchSampleNames_patches[index] + os.path.sep + 'PS'+patchSampleNames_patches[index]+'_'+str(indices_patches[index])+'_'+str(locations_patches[index, 0])+'_'+str(locations_patches[index, 1])+'.tif' for index in range(0, len(patchSampleNames_patches))])
    sampleNames_patches = np.unique(patchSampleNames_patches)
    WSIFilenames_patches = np.asarray([dir_patches_inputWSI + sampleName + '.jpg' for sampleName in sampleNames_patches])
except:
    if classifierTrain or classifierExport: sys.exit('\nError - Failed to load data needed for classifierTrain\n')
    print('\nWarning - Failed to find/load data in: ' + dir_patches_inputPatches + '\n')
    patchSampleNames_patches, indices_patches, locations_patches, patchLabels_patches = np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([])

#Load and determine sample names for all WSI (not just those needed for labeled patch images)
WSIFilenames_recon = np.asarray(natsort.natsorted(glob.glob(dir_patches_inputWSI + '*.jpg'))+natsort.natsorted(glob.glob(dir_recon_inputWSI + '*.jpg')))
sampleNames_recon = np.asarray([os.path.basename(filename).split('.jpg')[0] for filename in WSIFilenames_recon])


#CHECKS; relevant portions copied from ./CODE/RUN_CLASS.py
#==================================================================

#Global number of WSI
print('All WSI with labeled patch data')
print('Number of WSI being considered: ', len(np.unique(patchSampleNames_patches)))
print('Number of Benign Patches: ', np.sum(patchLabels_patches==valueBenign))
print('Number of Malignant Patches: ', np.sum(patchLabels_patches==valueMalignant))


#Limited to samples used in the paper's original cross validation
print('\nWSI with labeled patch data, used in the original paper')
manualFoldsSampleNames = np.concatenate(manualFolds)
foldsPatchLabels = np.concatenate([patchLabels_patches[np.where(patchSampleNames_patches == sampleName)] for sampleName in manualFoldsSampleNames])
print('Number of WSI being considered: ', len(np.unique(manualFoldsSampleNames)))
print('Number of Benign Patches: ', np.sum(foldsPatchLabels==valueBenign))
print('Number of Malignant Patches: ', np.sum(foldsPatchLabels==valueMalignant))



