# 3) Norm Tiling

This notebook runs on a batch (directory) of WSI files, not tailored to be used for a single file.

For generation of confidence heatmaps the WSI must go through a process referred to as tiling. This process involves breaking down the large WSI into small images or tiles (akin to aplying a grid to the image) and saving these tiles to a directory in a predefined order. This notebook performs this for all the images in this study and includes a color normalization step before saving by applying Reinhard color normalization. The color stats used in Reinhard color normalization are obtained from the entire WSI using pyvips.

*Notes: the calculation of the color stats used in color normalization are obtained from the entire image, including the white space. This is a source of noise as it should not be including the white space. Also, the entire image get normalized, even the white space, which is another source of error. In short I would hypothesize that the CNN is ignoring some of the color information instead of taking it into account because of this.*

In [None]:
import sys
import os
import pyvips as Vips
import numpy as np
from tqdm import tqdm_notebook
from time import time 
from os.path import join as oj
import matplotlib.pyplot as plt
sys.path.append('../modules/')
import normalize, vips_utils, utils

In [None]:
"""***Path related parameters to check before running***"""
data_dir = '/mnt/Data/'  # mount data directory to this location when running Docker container
# location of data, each dataset is in its own dir
wsi_dirs = [
    oj(data_dir, 'wsi/Dataset_1a_Development_train/'), oj(data_dir, 'wsi/Dataset_1b_Development_validation/'),
    oj(data_dir, 'wsi/Dataset_2_Hold-out/'), oj(data_dir, 'wsi/Dataset_3_CERAD-like_hold-out/'),
    oj(data_dir, 'wsi/Dataset_Emory/')
]
# corresponding daving location for each data dir in wsi_dirs
save_dirs = [
    oj(data_dir, 'norm_tiles/norm_tiles_dataset_1a/'), oj(data_dir, 'norm_tiles/norm_tiles_dataset_1b'),
    oj(data_dir, 'norm_tiles/norm_tiles_dataset_2/'), oj(data_dir, 'norm_tiles/norm_tiles_dataset_3/'),
    oj(data_dir, 'norm_tiles/norm_tiles_dataset_emory/')
]
# some images were scanned at 40x, provide a list of these to rescale to 20x before norm tiling
rescale = ["NA5005-02_AB.svs", "NA4053-02_AB.svs", "NA4129-2_AB.svs", "NA4133_2_AB.svs", "NA4156-2_AB.svs",
    "NA4167-2_AB.svs", "NA4853-02_AB17-24.svs", "NA4953-02_AB17-24.svs", "NA4954-02_AB17-24.svs",
    "NA4964-02_AB17-24.svs", "NA4974-02_AB17-24.svs"]
# reference image for Reinhard color norm - from Tang et al choice
ref_im_path = oj(data_dir, 'wsi/Dataset_1a_Development_train/NA5002_2AB.svs')

ref_thumbnail = utils.vips2numpy(Vips.Image.new_from_file(ref_im_path, level=2))
plt.imshow(ref_thumbnail)
plt.title("Reference Image ({})".format(ref_im_path.split('/')[-1]), fontsize=12)
plt.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, labelleft=False)
plt.show()

In [None]:
# normalizer is a custom Python class for applying Reinhard color normalization
ref_image = Vips.Image.new_from_file(ref_im_path, level=0)
normalizer = normalize.Reinhard()
normalizer.fit(ref_image)

# performs tiling for each WSI
stats_dict = {}  # for bookkeeping
for wsi_dir, save_dir in zip(wsi_dirs, save_dirs):
    print(wsi_dir)
    os.makedirs(save_dir, exist_ok=True)
    
    # list image files
    wsis = [file for file in os.listdir(wsi_dir) if file.endswith(".svs")]
    
    for wsi in tqdm_notebook(wsis, total=len(wsis)):
        vips_img = Vips.Image.new_from_file(os.path.join(wsi_dir, wsi), level=0)
        filename = os.path.splitext(wsi)[0]
        
        # check if rescaling is needed
        if wsi in rescale:
            vips_img = vips_img.resize(0.5)

        out = normalizer.transform(vips_img)
        out.filename = filename
        
        # skips directories already present 
        im_dir = oj(save_dir, filename)
        if not os.path.isdir(im_dir) or len(os.listdir(im_dir)) == 0:
            vips_utils.save_and_tile(out, save_dir)
        
        stats_dict[wsi] = normalizer.image_stats

stats_dict