# Pre-processing images as center-surround
### a.k.a. retinal ganglion cell processing

The processing pipeline involves converting to grayscale, rescaling, CLAHE filtering, and finally whitening.

First, define the functions for the preprocessing pipeline.

## Preparing the CovidNet dataset
Now import the CovidNet data and pre-process by:
* converting to grayscale (some CovidNet images are RGB)
* apply a small-window CLAHE
* resize to standard size
* whiten

In [None]:
def img2grayscale(img):
    from skimage.color import rgb2gray
    from skimage.util import img_as_ubyte
    if len(img.shape) > 2:
        img = rgb2gray(img)
        img = img_as_ubyte(img)
    return img

import cv2
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))

def filter_img(img):
    img = clahe.apply(img)
    return img

sz = 128
def resize_img(img, sz=128):
    from skimage.transform import resize
    img = resize(img, (sz,sz))
    return img

def whiten_img(img):
    import numpy as np
    width = np.max(img) - np.min(img)
    img = img - np.min(img)
    img = img/width
    return img

We will use standard code for showing thumbnails as we progress

In [None]:
# %load show_original_decoded.py
import matplotlib.pyplot as plt
def show_grayscale(rows, columns, at, pixel_array, sz):
    import scipy
    ax = plt.subplot(rows, columns, at)
    interp_array = scipy.ndimage.zoom(pixel_array.reshape(sz,sz), 4.0, order=5)
    plt.imshow(interp_array.reshape(sz*4, sz*4), cmap='gray')
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

def show_original_decoded(original, decoded, sz, n=10):
    plt.figure(figsize=(n*2, 4))
    for i in range(n):
        show_grayscale(2, n, i+1, original[i], sz)
        show_grayscale(2, n, i+1+n, decoded[i], sz)
    plt.show(block=True)

Define helpers for keeping track of the original and processed images, as dictionaries from the original file stem

In [None]:
original_imgs = {}
processed_imgs = {} 

def show_thumbnail_progress():
    keys = list(original_imgs.keys())[-11:-1]
    print(keys)
    show_original_decoded(
        [resize_img(img2grayscale(original_imgs[key]),sz) for key in keys],
        [processed_imgs[key] for key in keys], sz)

def add_processed_img(name, original_img, processed_img, all_count):
    original_imgs[name] = original_img
    processed_imgs[name] = processed_img
    print(f'{len(processed_imgs)} of {all_count}', end = '\r')
    if len(processed_imgs) % 100 == 0:
        from IPython.display import clear_output
        clear_output(True)
        if len(processed_imgs) > 10:
            show_thumbnail_progress()

Scan data from original location, and compare to temp to see how many are left

In [None]:
import os
from pathlib import Path
chest_root = Path(os.environ['DATA_ALL']) / 'chest-nihcc'
nofindings_png_filenames = chest_root.glob('by_class/no_finding/*.png')
nofindings_png_filenames = list(nofindings_png_filenames)[:]
print(f"{len(nofindings_png_filenames)} original files")

In [None]:
clahe_temp = Path(os.environ['DATA_TEMP']) / 'anat0mixer_temp' / 'clahe_processed'
existing_clahe = list([fn.stem for fn in clahe_temp.glob("*.npy")])

remaining = len(nofindings_png_filenames) - len(existing_clahe)
print(f"pre-processing {remaining} of {len(nofindings_png_filenames)}")

# iterate over png files, reading and processing
skipped = 0
for png_filename in nofindings_png_filenames:    
    if png_filename.stem in existing_clahe:
        skipped = skipped + 1
        print(f"skipping {skipped}...", end = '\r')
        continue
        
    # read the png image
    import imageio            
    original_img = imageio.imread(png_filename)
    
    # process the image
    img = img2grayscale(original_img)
    img = filter_img(img)
    img = resize_img(img, sz)
    img = whiten_img(img)
    add_processed_img(png_filename.stem, 
                      original_img, img, 
                      len(nofindings_png_filenames) - len(existing_clahe))
    
    # and save the npy file
    import numpy as np
    np.save(clahe_temp / png_filename.stem, img)
    
print('done')