### Data source: https://www.bracs.icar.cnr.it/

### Import packages

In [None]:
import numpy as np
#from geojson import GeoJSON
import pandas as pd
import json
import os
import glob
import shapely
from rtree import index
from shapely.ops import cascaded_union, unary_union
from shapely.plotting import plot_polygon
from collections import Counter
import matplotlib.pyplot as plt
from openslide import OpenSlide

from tiatoolbox import utils
from tiatoolbox.wsicore import wsireader
from tiatoolbox import data
from tiatoolbox.tools import stainnorm
from tqdm import tqdm
import h5py
import cv2

from matplotlib.patches import Polygon
from matplotlib.colors import ListedColormap

from omegaconf import OmegaConf
# Load config
preproc_conf = OmegaConf.load("../conf/preproc.yaml")
preproc_conf = preproc_conf['classic_mil_on_embeddings_bag']['bracs_224_224_patches']

In [None]:
os.makedirs( preproc_conf.cv_split_dir, exist_ok=True ) 

In [None]:
preproc_conf.data_root_dir+'BRACS.xlsx', preproc_conf.cv_split_dir

In [None]:
cp_from = preproc_conf.data_root_dir+'BRACS.xlsx'
cp_to = preproc_conf.cv_split_dir
!cp -rp $cp_from $cp_to 

### Locate annotations

In [None]:
annotation_folder = preproc_conf.annotation_root_dir

In [None]:
annotation_list = np.array( sorted( [ i for i in os.listdir(annotation_folder) if 'geo' in i ] ) )
annotation_list.shape, annotation_list[:10]
# only on 387 slides are annotations

In [None]:
bracs_df = pd.read_excel(preproc_conf.cv_split_dir+'BRACS.xlsx')
bracs_df.head()

##### CHECK FOR LEAKS IN BRACS SPLIT !

In [None]:
# Group by Patient Id and count the number of unique sets they appear in
patient_set_overlap = bracs_df.groupby('Patient Id')['Set'].nunique()

# Filter the patients that appear in more than one set
leaked_patients = patient_set_overlap[patient_set_overlap > 1]
leaked_patients.index

In [None]:
bracs_df[ bracs_df['Patient Id'] == 67 ]

In [None]:
# Update the 'Set' column to 'Validation' for all rows where 'Patient Id' is 67
bracs_df.loc[bracs_df['Patient Id'] == 67, 'Set'] = 'Validation'

##### check again for leaks

In [None]:
# Group by Patient Id and count the number of unique sets they appear in
patient_set_overlap = bracs_df.groupby('Patient Id')['Set'].nunique()

# Filter the patients that appear in more than one set
leaked_patients = patient_set_overlap[patient_set_overlap > 1]
leaked_patients.index

#### extract info from their splits

In [None]:
annotation_list_training_candidate = np.array( [ bracs_df['WSI Filename'].iloc[k]+'.geojson' for k in range(bracs_df.shape[0]) if bracs_df.Set.iloc[k] == 'Training' ] )
annotation_list_training_candidate.shape, annotation_list_training_candidate[:5]

In [None]:
annotation_list_validation_candidate = np.array( [ bracs_df['WSI Filename'].iloc[k]+'.geojson' for k in range(bracs_df.shape[0]) if bracs_df.Set.iloc[k] == 'Validation' ] )
annotation_list_validation_candidate.shape, annotation_list_validation_candidate[:5]

In [None]:
annotation_list_test_candidate = np.array( [ bracs_df['WSI Filename'].iloc[k]+'.geojson' for k in range(bracs_df.shape[0]) if bracs_df.Set.iloc[k] == 'Testing' ] )
annotation_list_test_candidate.shape, annotation_list_test_candidate[:5]

#### merge this info into WSI level annotations, not ROIs, as described at their approach

we are extracting at magnification level 2.5x exactly!!

In [None]:
annotation_list_training = annotation_list_training_candidate[ np.in1d( annotation_list_training_candidate, annotation_list ) ]
annotation_list_training.shape, annotation_list_training[:5]

In [None]:
annotation_list_validation = annotation_list_validation_candidate[ np.in1d( annotation_list_validation_candidate, annotation_list ) ]
annotation_list_validation.shape, annotation_list_validation[:5]

In [None]:
annotation_list_test = annotation_list_test_candidate[ np.in1d( annotation_list_test_candidate, annotation_list ) ]
annotation_list_test.shape, annotation_list_test[:5]

### Test process

In [None]:
json_fname = annotation_folder + annotation_list_training[260]

with open(json_fname) as f:
    data = json.load(f)

data.keys()

In [None]:
json_fname

In [None]:
shapely.Polygon(np.array(data['features'][0]['geometry']['coordinates'][0])//16)

In [None]:
data['features'][0]['properties']['classification']['name']

In [None]:
data_coords_all = []
data_type_all = []
data_label_all = []
for feature in data['features']:
    data_type = feature['geometry']['type']
    data_type_all.append(data_type)
    data_coords = feature['geometry']['coordinates'][0]
    data_coords_all.append(data_coords)
    data_label = feature['properties']['classification']['name']
    data_label_all.append(data_label)

### Automate

In [None]:
annot_bracs_old = np.array(['ADH', 'ADH-sure', 'BENIGN', 'Benign sure', 'Benign-sure', 'DCIS',
       'DCIS-sure', 'FEA', 'FEA-sure', 'MALIGNANT', 'Malignant',
       'Malignant-sure', 'Pathologica benign', 'Pathological-benign',
       'Pathological-benign (Benign-sure)', 'UDH', 'UDH-sure'])


annot_bracs_new = np.array(['ADH', 'ADH', 'NORMAL', 'NORMAL', 'NORMAL', 'DCIS',
       'DCIS', 'FEA', 'FEA', 'INVASIVE-CARCINOMA', 'INVASIVE-CARCINOMA',
       'INVASIVE-CARCINOMA', 'PATHOLOGICAL-BENIGN', 'PATHOLOGICAL-BENIGN',
       'PATHOLOGICAL-BENIGN', 'UDH', 'UDH'])

annot_map = dict(zip(annot_bracs_old, annot_bracs_new))

In [None]:
def extract_json( abspath ):
    
    with open(abspath) as f:
        data = json.load(f)

    data_polygon_all = []
    data_label_all = []
    for feature in data['features']:
        

        try:
            
            data_polygon = shapely.Polygon(np.array(feature['geometry']['coordinates'][0])//16)
            data_polygon_all.append(data_polygon)
        
        except:
            print(feature)
            
        
        
        data_label = feature['properties']['classification']['name']
        data_label = annot_map[data_label] # map annots
        data_label_all.append(data_label)
    
    # then create polygons and return that at the end instead of the coords
    
    return data_polygon_all, data_label_all

In [None]:
def give_back_labels_and_polygons_for_set(annotation_list):
    polygons_all = []
    labels_all = []
    
    for i in range( len(annotation_list) ): 
        polygons, labels =  extract_json(annotation_folder+annotation_list[i])
    
        assert len(polygons) == len(labels)
        
        labels_all.append(labels)
        polygons_all.append(polygons)
        
    #labels_all = np.concatenate(labels_all)
    #polygons_all = np.concatenate(polygons_all)
    print( len(labels_all), len(polygons_all) )

    return labels_all, polygons_all

### Get whole dataset and each partition according to BRACS paper ! 

In [None]:
labels_all, polygons_all = give_back_labels_and_polygons_for_set(annotation_list) # whole set without partition !

In [None]:
labels_training, polygons_training = give_back_labels_and_polygons_for_set(annotation_list_training) # training set !
labels_validation, polygons_validation = give_back_labels_and_polygons_for_set(annotation_list_validation) # validation set !
labels_test, polygons_test = give_back_labels_and_polygons_for_set(annotation_list_test) # test set !

### Check labels

In [None]:
Counter( np.concatenate(labels_all) )

In [None]:
Counter( np.concatenate(labels_training) ) + Counter( np.concatenate(labels_validation) ) + Counter( np.concatenate(labels_test) )

### Check images and annotations

In [None]:
annotation_list[ [ len(labels_all[w]) > 50 for w in  range(385) ] ]

In [None]:
annotation_list[384], len(labels_all[384]), 

In [None]:
wsi = OpenSlide(preproc_conf.WSI_root_dir+'BRACS_773.svs')
print(wsi.level_dimensions[2])
img = np.array( wsi.read_region((0,0), 2, wsi.level_dimensions[2]).convert('RGB') )

In [None]:
# Add a subplot
fig = plt.figure(figsize=(img.shape[0]//200, img.shape[1]//200))  # Adjust the width and height as desired
ax = fig.add_subplot(111)

#transposed_image = img_to_vis.transpose(Image.TRANSPOSE)
ax.imshow(img)
ax.axis('off')

# Plot transparent polygons
for p in range(len(polygons_all[384])):
    pol = polygons_all[384][p]
    plot_polygon(pol, ax, edgecolor='blue')
    

#plt.savefig('save_bracs_slides_'+wsi.properties['aperio.Filename'], dpi=100)

#### Plotting big images

In [None]:
# Assuming you have the 'labels_all' array with string labels
# Function to create a colormap based on unique string labels
#def create_color_map(labels):
#    unique_labels = np.unique(labels)
#    num_colors = len(unique_labels)
#    color_map = plt.cm.get_cmap('Set1', num_colors)  # You can choose any colormap you like
#    label_to_color = {label: color_map(idx) for idx, label in enumerate(unique_labels)}
#    return label_to_color

def create_color_map(labels):
    unique_labels = np.unique(labels)
    num_colors = len(unique_labels)

    # Hand-picked colors that are visible on purple-red-pink background
    good_colors = ['#61d2ff', '#ff7700', '#00ffaa', '#1687f7', '#02c415','#aa00ff', '#b8c202' ]

    label_to_color = {label: good_colors[idx % len(good_colors)] for idx, label in enumerate(unique_labels)}
    return label_to_color

# Create a colormap based on string labels
label_to_color = create_color_map(np.concatenate(labels_all))

# Function to plot a polygon without nodes
def plot_polygon_without_nodes(polygon, ax, edgecolor):
    x, y = polygon.exterior.xy
    ax.add_patch(Polygon(np.c_[x, y], edgecolor=edgecolor, facecolor=edgecolor, linewidth=3, alpha=0.5))

# Add a subplot
fig = plt.figure(figsize=(20, 20))  # Adjust the width and height as desired
ax = fig.add_subplot(111)

# transposed_image = img_to_vis.transpose(Image.TRANSPOSE)

wsi = OpenSlide(preproc_conf.WSI_root_dir+'BRACS_773.svs')
print(wsi.level_dimensions[2])
img = np.array( wsi.read_region((0,0), 2, wsi.level_dimensions[2]).convert('RGB') )

ax.imshow(img)
ax.axis('off')

# Plot polygons with colors based on the string labels
for p in range(len(polygons_all[384])):
    pol = polygons_all[384][p]
    label = labels_all[384][p]
    color = label_to_color[label]  # Get the color corresponding to the label from the colormap
    plot_polygon_without_nodes(pol, ax, edgecolor=color)

plt.savefig('save_bracs_slides_' + wsi.properties['aperio.Filename']+'.png', dpi=300)
plt.savefig('save_bracs_slides_' + wsi.properties['aperio.Filename']+'.svg', dpi=300)

In [None]:
annotation_list[358], len(labels_all[358]), 

In [None]:
label_to_color

In [None]:
# Function to plot a polygon without nodes
def plot_polygon_without_nodes(polygon, ax, edgecolor):
    x, y = polygon.exterior.xy
    ax.add_patch(Polygon(np.c_[x, y], edgecolor=edgecolor, facecolor=edgecolor, linewidth=3, alpha=0.5))

# Add a subplot
fig = plt.figure(figsize=(20, 20))  # Adjust the width and height as desired
ax = fig.add_subplot(111)

# transposed_image = img_to_vis.transpose(Image.TRANSPOSE)
wsi = OpenSlide(preproc_conf.WSI_root_dir+'BRACS_311.svs')
print(wsi.level_dimensions[2])
img = np.array( wsi.read_region((0,0), 2, wsi.level_dimensions[2]).convert('RGB') )
ax.imshow(img)
ax.axis('off')

# Plot polygons with colors based on the string labels
for p in range(len(polygons_all[358])):
    pol = polygons_all[358][p]
    label = labels_all[358][p]
    color = label_to_color[label]  # Get the color corresponding to the label from the colormap
    plot_polygon_without_nodes(pol, ax, edgecolor=color)

plt.savefig('save_bracs_slides_' + wsi.properties['aperio.Filename']+'.png', dpi=300)
plt.savefig('save_bracs_slides_' + wsi.properties['aperio.Filename']+'.svg', dpi=300)

### Read out reference patches for each class

In [None]:
# Normal:
wsi = OpenSlide(preproc_conf.WSI_root_dir+'BRACS_311.svs')
x_wsi, y_wsi = wsi.level_dimensions[0] # get level0 coordinate bounds
print(x_wsi, y_wsi)
x_start = 60000
print(x_start, 46400)
y_start = np.abs( y_wsi - 46400 )
print(x_start, y_start)
x_width, y_height = 224*2**4, 224*2**4 # read out from level2
print(x_width, y_height)
img = np.array(wsi.read_region( ( x_start, y_start), 0, (x_width, y_height) ).convert('RGB') ) #.rotate(180)
plt.imshow(img)

In [None]:
label_to_color

In [None]:
def plot_image(ax, image, text, color):
    ax.imshow(image)
    ax.text(0.5, -0.1, text, transform=ax.transAxes, color=color, fontsize=50,
            horizontalalignment='center', verticalalignment='center', weight="bold")
    ax.axis('off')

# Load OpenSlide
wsi = OpenSlide(preproc_conf.WSI_root_dir+'BRACS_311.svs')

# Image dimensions
x_width, y_height = 224 * 2 ** 4, 224 * 2 ** 4

# List of image paths and corresponding offsets
images_info = [
    {'path': preproc_conf.WSI_root_dir+'BRACS_311.svs',
     'offset': (60000, 46400), 'label': 'N', 'color': label_to_color['NORMAL'] },
    {'path': preproc_conf.WSI_root_dir+'BRACS_311.svs',
     'offset': (60000, 50500), 'label': 'PB', 'color': label_to_color['PATHOLOGICAL-BENIGN']},
    {'path': preproc_conf.WSI_root_dir+'BRACS_773.svs',
     'offset': (75600, 13200), 'label': 'UDH', 'color': label_to_color['UDH']},
    {'path': preproc_conf.WSI_root_dir+'BRACS_311.svs',
     'offset': (60000, 32000), 'label': 'FEA', 'color': label_to_color['FEA']},
    {'path': preproc_conf.WSI_root_dir+'BRACS_1911.svs',
     'offset': (34000, 10000), 'label': 'ADH', 'color': label_to_color['ADH']},
    {'path': preproc_conf.WSI_root_dir+'BRACS_773.svs',
     'offset': (62000, 14000), 'label': 'DCIS', 'color': label_to_color['DCIS']},
    {'path': preproc_conf.WSI_root_dir+'BRACS_773.svs',
     'offset': (62000, 25000), 'label': 'IC', 'color': label_to_color['INVASIVE-CARCINOMA']}
]

# Create subplots for 7 images
fig, axes = plt.subplots(1, 7, figsize=(30, 12))

# Plot each image
for i, ax in enumerate(axes):
    image_info = images_info[i]
    img_path = image_info['path']
    img_label = image_info['label']
    x_start, y_start = image_info['offset']
    color = image_info['color']
    
    wsi = OpenSlide(img_path)  # Load the specific image
    img = np.array(wsi.read_region((x_start, wsi.level_dimensions[0][1] - y_start), 0, (x_width, y_height)).convert('RGB'))
    
    plot_image(ax, img, img_label, color)

plt.tight_layout()
#plt.show()
plt.savefig('paper_figures/save_bracs_patches_7classes.png', dpi=300)
plt.savefig('paper_figures/save_bracs_patches_7classes.svg', dpi=300)

In [None]:
(x_start, y_start), x_width, y_height

#### United figure

#### PLOT IDEA into paper:

- extract few of these images with and without stainnorm
- put the polygons of the extracted patches as grid into these plots

### Load all slides with annotation

In [None]:
slide_dir = os.path.join(preproc_conf.WSI_root_dir+'BRACS_WSI/')

In [None]:
data_dir = os.path.join(preproc_conf.img_dir_lvl4)
slide_list = np.array(sorted([j for j in os.listdir(data_dir) if '_level4.npy' in j]))
slide_list[:5], slide_list.shape

In [None]:
def get_slide_file_path(slide_id):
    #slide_fp = os.path.join(data_dir,'*', '*', '*', f'{slide_id}.svs')
    slide_fp = os.path.join(data_dir, f'{slide_id}_level4.npy')
    return glob.glob(slide_fp)[0]

In [None]:
get_slide_file_path( annotation_list[0].strip('.geojson') )

In [None]:
current_filename = data_dir+annotation_list[0].split('.geojson')[0]+'_level4.npy'

In [None]:
def intersect_annots_with_patches( patch_polygons, annot_polygons ):
    # Populate R-tree index with bounds of grid cells
    idx = index.Index()

    for pos, cell in enumerate(annot_polygons):

        # assuming cell is a shapely object
        idx.insert(pos, cell.bounds)

    # Loop through each Shapely polygon
    intersections_list_area = []
    intersections_list = []

    for patch in patch_polygons:
        # Merge cells that have overlapping bounding boxes
        merged_region = unary_union([annot_polygons[pos] for pos in idx.intersection(patch.bounds)])
        # Now do actual intersection
        intersections_list_area.append(patch.intersection(merged_region).area)
        intersections_list.append(patch.intersection(merged_region))
    
    return intersections_list, intersections_list_area

In [None]:
def intersect_annots_with_patches_notree( patch_polygons, annot_polygons ):
    # merge all annotation polygons into one multipolygon
    merged_region = unary_union(annot_polygons)
    
    intersections_list_area = []
    intersections_list = []
    intersection_all = []

    # Loop through each patch given as shapely polygon and check if there is overlap
    for patch in patch_polygons:
        # No overlap means invalid value -> nan (need to handle it)
        try:
            print(patch)
            intersections_list_area.append(patch.intersection(merged_region).area)
            intersections_list.append(patch.intersection(merged_region))
        except:
            pass
    
    return intersections_list, intersections_list_area

In [None]:
polygons_all[100][0].intersection( unary_union( polygons_all[100] ) )

In [None]:
def generate_patches(annotation_list_current_set, labels_current_set, polygons_current_set):
    container_all = []
    container_labels_all = []

    for n in tqdm( range( len(annotation_list_current_set) ) ):
        current_filename = get_slide_file_path( annotation_list_current_set[n].strip('.geojson') )
        #print(current_filename)
        current_img_np = np.load(current_filename)
        #print(current_img_np.shape)
        current_label = np.array(labels_current_set[n])
        current_annots = np.array(polygons_current_set[n])
        current_xdim = current_img_np.shape[1]
        current_ydim = current_img_np.shape[0]
        #print(current_xdim, current_ydim, current_annots)
    
        # generate all patches based on slide level dimensions
        grid_cells_all = []
        grid_cells_all_np = []
        for x in range(0, current_xdim//224*224, 224):
            for y in range(0, current_ydim//224*224, 224):
                grid_cells_all.append(shapely.geometry.box(x, y, x+224, y+224))
                grid_cells_all_np.append(np.array([x, y]))
                
        grid_cells_all_np = np.array(grid_cells_all_np)
        
        # do this for all types of annotation present in the current slide
        current_uqs = np.unique(current_label)
        
        #container_all = []
        #container_labels_all = []
        for u in range( current_uqs.shape[0] ):
            # logical filter for current annotation
            filt_uq = current_label == current_uqs[u]
            #print(current_label, current_uqs[u], filt_uq.sum())
            
            _, intersections_list_area = intersect_annots_with_patches( grid_cells_all, current_annots[filt_uq] )
            
            topleft_read_from = grid_cells_all_np[np.nonzero(intersections_list_area)[0]]
            #topleft_read_from = grid_cells_all_np[ np.array(intersections_list_area) > 0 ] # at least 20% overlap (224/5)**2
            #if len(topleft_read_from) == 0:
            #print('numread:', topleft_read_from, filt_uq.sum())
            
            container = np.empty( (topleft_read_from.shape[0], 224, 224, 3), dtype=np.uint8 )
            container_labels = np.repeat(current_uqs[u], container.shape[0])
            #print(n ,'into:', current_uqs[u], container_labels, container.shape)
            for p in range(topleft_read_from.shape[0]):
                #print('ppp', p, topleft_read_from.shape[0])
                #print(topleft_read_from[p,0], topleft_read_from[p,0]+224, topleft_read_from[p,1], topleft_read_from[p,1]+224)
                container[p] = current_img_np[ topleft_read_from[p,1]:topleft_read_from[p,1]+224, 
                                               topleft_read_from[p,0]:topleft_read_from[p,0]+224, :]
            container_all.append(container)
            container_labels_all.append(container_labels)
            
            #print( current_uqs[u], grid_cells_all_np[np.nonzero(np.array(intersections_list_area))[0]] )
    
    container_all = np.concatenate(container_all)
    container_labels_all = np.concatenate(container_labels_all)
    
    return container_all, container_labels_all

### Do extraction and stratification according to BRACS paper and their splits

In [None]:
container_training, container_training_labels = generate_patches(annotation_list_training, labels_training, polygons_training )
container_training.shape, container_training_labels.shape

In [None]:
container_validation, container_validation_labels = generate_patches(annotation_list_validation, labels_validation, polygons_validation)
container_validation.shape, container_validation_labels.shape

In [None]:
container_test, container_test_labels = generate_patches(annotation_list_test, labels_test, polygons_test)
container_test.shape, container_test_labels.shape

In [None]:
# was when there was leakage at patient 67: 6099 + 606 + 1058
6067 + 638 + 1058

### Save nonorm

In [None]:
base_folder = preproc_conf.img_dir_lvl4
base_folder

In [None]:
%%time
np.save(base_folder+'bracs_level4_regions_224_training_data.npy', container_training)
np.save(base_folder+'bracs_level4_regions_224_training_label.npy', container_training_labels)

np.save(base_folder+'bracs_level4_regions_224_validation_data.npy', container_validation)
np.save(base_folder+'bracs_level4_regions_224_validation_label.npy', container_validation_labels)

np.save(base_folder+'bracs_level4_regions_224_test_data.npy', container_test)
np.save(base_folder+'bracs_level4_regions_224_test_label.npy', container_test_labels)

In [None]:
training_info, validation_info, test_info = Counter( container_training_labels ), Counter( container_validation_labels ), Counter( container_test_labels )
training_info + validation_info + test_info

In [None]:
Counter( container_training_labels ), Counter( container_validation_labels ), Counter( container_test_labels )

In [None]:
# Calculate total for each set
total_training = sum(Counter( container_training_labels ).values())
total_validation = sum(Counter( container_validation_labels ).values())
total_test = sum(Counter( container_test_labels ).values())

# Calculate ratios
ratios_training = {label: count / total_training for label, count in Counter( container_training_labels ).items()}
ratios_validation = {label: count / total_validation for label, count in Counter( container_validation_labels ).items()}
ratios_test = {label: count / total_test for label, count in Counter( container_test_labels ).items()}

# Display ratios
print("Training Set Ratios:")
for label, ratio in ratios_training.items():
    print(f"{label}: {ratio * 100:.2f}%")

print("\nValidation Set Ratios:")
for label, ratio in ratios_validation.items():
    print(f"{label}: {ratio * 100:.2f}%")

print("\nTest Set Ratios:")
for label, ratio in ratios_test.items():
    print(f"{label}: {ratio * 100:.2f}%")

In [None]:
plt.imshow( container_training[100]), container_training_labels[100]

In [None]:
plt.imshow(container_training[1000]), container_training_labels[1000]

In [None]:
plt.imshow(container_training[2000]), container_training_labels[2000]

### Save Macenko norm

#### this comes from nightingale 

#### norm on bracs

#### Norm only on bracs training

In [None]:
stats_array_slides = np.zeros( (container_training.shape[0], 6), dtype=np.uint8)

# iterate over all images in training set
for i in tqdm(range(container_training.shape[0])):
    img_curr = container_training[i]
    
    b, g, r = cv2.split(img_curr)
    mean_b, mean_g, mean_r = np.mean(b), np.mean(g), np.mean(r)
    std_b, std_g, std_r = np.std(b), np.std(g), np.std(r)

    stats_array_slides[i, 0] = mean_b
    stats_array_slides[i, 1] = mean_g
    stats_array_slides[i, 2] = mean_r
    stats_array_slides[i, 3] = std_b
    stats_array_slides[i, 4] = std_g
    stats_array_slides[i, 5] = std_r
    
stats_array_all = stats_array_slides
#np.save('stats_array_all_reference_image_patches_level4_224_224_3.npy', stats_array_all)

#### Norm on bracs training and validation

In [None]:
stats_array_all[:5], stats_array_all[-5:]

In [None]:
stats_array_all.mean(0) / 255

In [None]:
median_values = np.median(stats_array_all, axis=0)

In [None]:
distances = np.linalg.norm(stats_array_all - median_values, axis=1)

In [None]:
reference_image_idx = np.argmin(distances)
reference_image_idx # this index falls into first part, into the training container

In [None]:
reference_image = container_training[reference_image_idx]
container_training_labels[reference_image_idx]

In [None]:
plt.imshow(reference_image)

In [None]:
np.save('reference_image_patches_level4_224_224_3_bracs.npy', reference_image)

#### this comes from bracs training set

In [None]:
reference_image = np.load('reference_image_patches_level4_224_224_3_bracs.npy')
stain_normalizer = stainnorm.MacenkoNormalizer()
stain_normalizer.fit(reference_image)

In [None]:
container_training_normed = np.zeros(container_training.shape, dtype=np.uint8)

for c in tqdm( range(container_training.shape[0]) ):
    container_training_normed[c] = stain_normalizer.transform(container_training[c].copy())

In [None]:
container_validation_normed = np.zeros(container_validation.shape, dtype=np.uint8)

for c in tqdm( range(container_validation.shape[0]) ):
    container_validation_normed[c] = stain_normalizer.transform(container_validation[c].copy())

In [None]:
container_test_normed = np.zeros(container_test.shape, dtype=np.uint8)

for c in tqdm( range(container_test.shape[0]) ):
    container_test_normed[c] = stain_normalizer.transform(container_test[c].copy())

In [None]:
%%time
#was before: np.save(base_folder+'bracs_level4_regions_224_data_macenkonorm_bracs.npy', container_all_normed)

np.save(base_folder+'bracs_level4_regions_224_training_data_macenkonorm_bracs.npy', container_training_normed)
np.save(base_folder+'bracs_level4_regions_224_validation_data_macenkonorm_bracs.npy', container_validation_normed)
np.save(base_folder+'bracs_level4_regions_224_test_data_macenkonorm_bracs.npy', container_test_normed)

In [None]:
# Create a figure and axis objects
fig, ax = plt.subplots(3, 2, figsize=(6, 9))

# Plot the image on the corresponding axis
ax[0, 0].imshow(container_training[11])
ax[1, 0].imshow(container_training[111])
ax[2, 0].imshow(container_training[205])
ax[0, 1].imshow(container_training_normed[11])
ax[1, 1].imshow(container_training_normed[111])
ax[2, 1].imshow(container_training_normed[205])
for i in range(3):
    for j in range(2):
        ax[i, j].axis('off')  # Turn off axis labels
        # Add a line with the specified linestyle
        #ax[i, j].plot([0, 1], [0, 1], color='red', linestyle=lines[i*2+j])

# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.02, hspace=0.02)

In [None]:
# Create a figure and axis objects
fig, ax = plt.subplots(3, 2, figsize=(6, 9))

# Plot the image on the corresponding axis
ax[0, 0].imshow(container_test[11])
ax[1, 0].imshow(container_test[105])
ax[2, 0].imshow(container_test[205])
ax[0, 1].imshow(container_test_normed[11])
ax[1, 1].imshow(container_test_normed[105])
ax[2, 1].imshow(container_test_normed[205])
for i in range(3):
    for j in range(2):
        ax[i, j].axis('off')  # Turn off axis labels
        # Add a line with the specified linestyle
        #ax[i, j].plot([0, 1], [0, 1], color='red', linestyle=lines[i*2+j])

# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.02, hspace=0.02)

In [None]:
train_nonormed_npy_for_testing = np.load(base_folder+'bracs_level4_regions_224_training_data.npy')
train_normed_npy_for_testing = np.load(base_folder+'bracs_level4_regions_224_training_data_macenkonorm_bracs.npy')
train_nonormed_npy_for_testing.shape, train_normed_npy_for_testing.shape

In [None]:
# Create a figure and axis objects
fig, ax = plt.subplots(3, 2, figsize=(6, 9))

# Plot the image on the corresponding axis
ax[0, 0].imshow(train_nonormed_npy_for_testing[11])
ax[1, 0].imshow(train_nonormed_npy_for_testing[105])
ax[2, 0].imshow(train_nonormed_npy_for_testing[205])
ax[0, 1].imshow(train_normed_npy_for_testing[11])
ax[1, 1].imshow(train_normed_npy_for_testing[105])
ax[2, 1].imshow(train_normed_npy_for_testing[205])
for i in range(3):
    for j in range(2):
        ax[i, j].axis('off')  # Turn off axis labels
        # Add a line with the specified linestyle
        #ax[i, j].plot([0, 1], [0, 1], color='red', linestyle=lines[i*2+j])

# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.02, hspace=0.02)

In [None]:
plt.hist( (train_nonormed_npy_for_testing[11]-train_normed_npy_for_testing[11]).flatten() )