### 1.1 Preprocessing - Reinhard Normalization and WSI Tiling

As a first preprocessing step, all slides were color normalized with respect to a reference image selected by an expert neuropathologist. Color normalization was performed using the method described by [Reinhard et. al](https://ieeexplore.ieee.org/document/946629).

The resulting color normalized whole slide images were tiled using PyVips to generate 1536 x 1536 images patches.

In [8]:
import os
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pyvips as Vips
from tqdm import tqdm

from utils import vips_utils, normalize

In [9]:
#TRAIN_WSI_DIR = 'data/Dataset 1a Development_train/'              # WSIs in the training set
#VAL_WSI_DIR = 'data/Dataset 1b Development_validation/'           # WSIs in the validation set
TEST_WSI_DIR = 'data/tests/'

SAVE_DIR = 'data/norm_tiles/'

In [10]:
if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

In [11]:
#ref_imagename = 'NA5002_2AB.svs'
ref_imagename = 'NA3777-02_AB.svs'

In [12]:
#wsi_train = os.listdir(TRAIN_WSI_DIR)
#wsi_val = os.listdir(VAL_WSI_DIR)
wsi_test = os.listdir(TEST_WSI_DIR)

#imagenames = sorted(wsi_val + wsi_train)
imagenames = sorted(wsi_test)
#imagenames.remove('NA5005-02_AB.svs')             # this WSI was digitalized at 40x, need resize down to 20x
#imagenames.append('NA5005-02_AB.svs')

In [13]:
%%time
# Load reference image, fit Reinhard normalizer
#ref_image = Vips.Image.new_from_file(TRAIN_WSI_DIR + ref_imagename, level=0)
ref_image = Vips.Image.new_from_file(TEST_WSI_DIR + ref_imagename, level=0)

normalizer = normalize.Reinhard()
normalizer.fit(ref_image)

CPU times: user 23min 57s, sys: 2min 14s, total: 26min 12s
Wall time: 2min 51s


In [14]:
stats_dict = {}
for imagename in tqdm(imagenames[:-1]):
    try:
        #vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)
        vips_img = Vips.Image.new_from_file(TEST_WSI_DIR + imagename, level=0)
    except:
        #vips_img = Vips.Image.new_from_file(VAL_WSI_DIR + imagename, level=0)
        vips_img = Vips.Image.new_from_file(TEST_WSI_DIR + imagename, level=0)
    out = normalizer.transform(vips_img)
    out.filename = vips_img.filename
    vips_utils.save_and_tile(out, SAVE_DIR)
    stats_dict[imagename] = normalizer.image_stats


0it [00:00, ?it/s][A

In [15]:
# Resize the single 40x image down to 20x
# for imagename in tqdm(imagenames[-1:]):
#     vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)
#     vips_img = vips_img.resize(0.5)
#     out = normalizer.transform(vips_img)
#     out.filename = vips_img.filename
#     vips_utils.save_and_tile(out, SAVE_DIR)
#     stats_dict[imagename] = normalizer.image_stats

In [16]:
import pandas as pd
stats = pd.DataFrame(stats_dict)

In [17]:
stats = stats.transpose()

In [18]:
stats.columns = 'means', 'stds'

ValueError: Length mismatch: Expected axis has 0 elements, new values have 2 elements