### 1.1 Preprocessing - Reinhard Normalization and WSI Tiling

As a first preprocessing step, all slides were color normalized with respect to a reference image selected by an expert neuropathologist. Color normalization was performed using the method described by [Reinhard et. al](https://ieeexplore.ieee.org/document/946629).

The resulting color normalized whole slide images were tiled using PyVips to generate 1536 x 1536 images patches.

In [1]:
import os
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pyvips as Vips
from tqdm import tqdm

from utils import vips_utils, normalize

In [2]:
TRAIN_WSI_DIR = 'data/Dataset 1a Development_train/'              # WSIs in the training set
#VAL_WSI_DIR = 'data/Dataset 1b Development_validation/'           # WSIs in the validation set
TEST_WSI_DIR = 'data/box/'

SAVE_DIR = 'data/norm_tiles/'

In [3]:
if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

In [4]:
ref_imagename = 'NA5002_2AB.svs'
#ref_imagename = 'NA3777-02_AB.svs'

In [5]:
wsi_train = os.listdir(TRAIN_WSI_DIR)
#wsi_val = os.listdir(VAL_WSI_DIR)
wsi_test = os.listdir(TEST_WSI_DIR)

#imagenames = sorted(wsi_val + wsi_train)
imagenames = sorted(wsi_test + [ref_imagename])
#imagenames.remove('NA5005-02_AB.svs')             # this WSI was digitalized at 40x, need resize down to 20x
#imagenames.append('NA5005-02_AB.svs')
print(imagenames)

['NA3777-02_AB.svs', 'NA4077-02_AB.svs', 'NA4092-02_AB.svs', 'NA4107-02_AB.svs', 'NA4160-02_AB.svs', 'NA4195-02_AB.svs', 'NA4256-02_AB.svs', 'NA4299-02_AB.svs', 'NA4391-02_AB.svs', 'NA4450-02_AB.svs', 'NA4463-02_AB.svs', 'NA4471-02_AB.svs', 'NA4553-02_AB.svs', 'NA4626-02_AB.svs', 'NA4672-02_AB.svs', 'NA4675-02_AB.svs', 'NA4691-02_AB.svs', 'NA4695-02_AB.svs', 'NA5002_2AB.svs']


In [6]:
%%time
# Load reference image, fit Reinhard normalizer
ref_image = Vips.Image.new_from_file(TRAIN_WSI_DIR + ref_imagename, level=0)
#ref_image = Vips.Image.new_from_file(TEST_WSI_DIR + ref_imagename, level=0)

normalizer = normalize.Reinhard()
normalizer.fit(ref_image)

CPU times: user 34min 42s, sys: 1min 15s, total: 35min 58s
Wall time: 11min 9s


In [7]:
stats_dict = {}
for imagename in tqdm(imagenames[:-1]):
    try:
        vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)
        print("Loaded Image: " + TRAIN_WSI_DIR + imagename)
        #vips_img = Vips.Image.new_from_file(TEST_WSI_DIR + imagename, level=0)
    except:
        #vips_img = Vips.Image.new_from_file(VAL_WSI_DIR + imagename, level=0)
        vips_img = Vips.Image.new_from_file(TEST_WSI_DIR + imagename, level=0)
        print("Loaded Image: " + TEST_WSI_DIR + imagename)
    out = normalizer.transform(vips_img)
    out.filename = vips_img.filename
    vips_utils.save_and_tile(out, SAVE_DIR)
    stats_dict[imagename] = normalizer.image_stats

  0%|          | 0/18 [00:00<?, ?it/s]

Loaded Image: data/box/NA3777-02_AB.svs


  6%|▌         | 1/18 [17:48<5:02:39, 1068.23s/it]

Loaded Image: data/box/NA4077-02_AB.svs


 11%|█         | 2/18 [41:55<5:15:11, 1181.99s/it]

Loaded Image: data/box/NA4092-02_AB.svs


 17%|█▋        | 3/18 [1:03:22<5:03:23, 1213.53s/it]

Loaded Image: data/box/NA4107-02_AB.svs


 22%|██▏       | 4/18 [1:20:34<4:30:24, 1158.87s/it]

Loaded Image: data/box/NA4160-02_AB.svs


 28%|██▊       | 5/18 [1:38:00<4:03:47, 1125.23s/it]

Loaded Image: data/box/NA4195-02_AB.svs


 33%|███▎      | 6/18 [1:53:46<3:34:17, 1071.48s/it]

Loaded Image: data/box/NA4256-02_AB.svs


 39%|███▉      | 7/18 [2:14:26<3:25:40, 1121.86s/it]

Loaded Image: data/box/NA4299-02_AB.svs


 44%|████▍     | 8/18 [2:32:34<3:05:16, 1111.65s/it]

Loaded Image: data/box/NA4391-02_AB.svs


 50%|█████     | 9/18 [2:49:25<2:42:14, 1081.61s/it]

Loaded Image: data/box/NA4450-02_AB.svs


 56%|█████▌    | 10/18 [3:08:40<2:27:08, 1103.54s/it]

Loaded Image: data/box/NA4463-02_AB.svs


 61%|██████    | 11/18 [3:26:00<2:06:32, 1084.64s/it]

Loaded Image: data/Dataset 1a Development_train/NA4471-02_AB.svs


 67%|██████▋   | 12/18 [3:43:08<1:46:45, 1067.51s/it]

Loaded Image: data/box/NA4553-02_AB.svs


 72%|███████▏  | 13/18 [4:05:04<1:35:11, 1142.22s/it]

Loaded Image: data/box/NA4626-02_AB.svs


 78%|███████▊  | 14/18 [4:19:11<1:10:13, 1053.41s/it]

Loaded Image: data/box/NA4672-02_AB.svs


 83%|████████▎ | 15/18 [4:31:32<47:59, 959.93s/it]   

Loaded Image: data/box/NA4675-02_AB.svs


 89%|████████▉ | 16/18 [4:48:49<32:45, 982.96s/it]

Loaded Image: data/box/NA4691-02_AB.svs


 94%|█████████▍| 17/18 [5:04:20<16:07, 967.46s/it]

Loaded Image: data/box/NA4695-02_AB.svs


100%|██████████| 18/18 [5:20:38<00:00, 970.56s/it]


In [8]:
# Resize the single 40x image down to 20x
# for imagename in tqdm(imagenames[-1:]):
#     vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)
#     vips_img = vips_img.resize(0.5)
#     out = normalizer.transform(vips_img)
#     out.filename = vips_img.filename
#     vips_utils.save_and_tile(out, SAVE_DIR)
#     stats_dict[imagename] = normalizer.image_stats

In [9]:
import pandas as pd
stats = pd.DataFrame(stats_dict)

In [10]:
stats = stats.transpose()

In [11]:
stats.columns = 'means', 'stds'

In [12]:
print(stats)

                                                              means  \
NA3777-02_AB.svs  (84.87683078375069, 1.883834276086116, 6.54897...   
NA4077-02_AB.svs  (91.31315579600997, 0.7904653477819062, 0.5140...   
NA4092-02_AB.svs  (87.88871953607287, 1.1075666993212976, 0.8033...   
NA4107-02_AB.svs  (90.94294739156933, 0.5887239399026771, 0.9401...   
NA4160-02_AB.svs  (88.89899260672101, 0.9974418293237804, 0.3308...   
NA4195-02_AB.svs  (86.47197838386131, 1.6366618861228275, -0.850...   
NA4256-02_AB.svs  (87.56450328069307, 0.8650946384410637, 0.6600...   
NA4299-02_AB.svs  (86.76713213580769, 1.4506292525488813, -1.039...   
NA4391-02_AB.svs  (82.77101907232307, 1.8684124681105576, -2.024...   
NA4450-02_AB.svs  (88.854731444645, 0.6891529168668056, 0.287143...   
NA4463-02_AB.svs  (87.63791836861964, 1.2089569343004831, -2.268...   
NA4471-02_AB.svs  (91.11351936054756, 0.6167910832749283, 1.1381...   
NA4553-02_AB.svs  (89.445133374048, 0.8088347968056673, -2.10258...   
NA4626