# Center-Surround Preprocessing
To assist in training, we pre-process images to create a common resolution, and apply a CLAHE filter and whiten.

First, we import the support routines.  We also get the variables that set the original data location and the location of the processed data.

In [1]:
from common.image_preprocessing import data_all, data_temp
from common.image_preprocessing import preprocess_images, temp_from_original
from common.image_preprocessing import read_imageio, read_dcm
from common.image_ops import img2grayscale, whiten_img, resize_img, clahe_img
from common.image_ops import center_surround, channelized_center_surround
from common.filter_banks import make_gauss_kernels, make_dog_kernels
from pathlib import PurePath

image_processing: DATA_ALL=G:\DataAll; DATA_TEMP=E:\Data\zebtrastack_temp


## CXR8 Processing
The CXR8 dataset is a set of PNG radiographs that are named to indicate subject and follow-up

In [2]:
sz = 128
cxr8_original_path = \
    data_all / 'NIH_Cxr8' / 'by_class' / 'no_finding'
cxr8_png_filenames = \
    list(cxr8_original_path.glob('*.png'))[:]
cxr8_png_filenames[:3]

[WindowsPath('G:/DataAll/NIH_Cxr8/by_class/no_finding/00000002_000.png'),
 WindowsPath('G:/DataAll/NIH_Cxr8/by_class/no_finding/00000005_000.png'),
 WindowsPath('G:/DataAll/NIH_Cxr8/by_class/no_finding/00000005_001.png')]

In [None]:
temp_relative_path = PurePath(f"{sz}x{sz}") / 'clahe_processed'
cxr8_temp = temp_from_original(cxr8_original_path, temp_relative_path)

In [None]:
%matplotlib inline
preprocess_images(cxr8_png_filenames, read_imageio, 
                  lambda img: \
                      center_surround(img, sz=sz), 
                  cxr8_temp)

## CXR8 Channelized

In [3]:
channelized_temp_relative_path = \
    PurePath(f"{sz}x{sz}") / 'channelized_clahe_processed'
channelized_cxr8_temp = \
    temp_from_original(cxr8_original_path,
                       channelized_temp_relative_path)

In [4]:
%matplotlib inline
dog_kernels = make_dog_kernels()
preprocess_images(cxr8_png_filenames, read_imageio, 
                  lambda img: \
                      channelized_center_surround(img, 
                                                  dog_kernels, 
                                                  sz=sz),
                  channelized_cxr8_temp,
                  show_in_notebook=False)

E:\Data\zebtrastack_temp\NIH_Cxr8\128x128\channelized_clahe_processed\00030805_000 (1024, 1024, 4) uint8 (1, 128, 128, 4)       

## LIDC Processing
LIDC is a collection of CT series, which we process using center-surround as well

In [None]:
lidc_original_path = data_all / 'TCIA-LIDC' / 'LIDC-IDRI'
lidc_dcm_filenames = list(lidc_original_path.glob('**/*.dcm'))[:]
lidc_temp = temp_from_original(lidc_original_path, temp_relative_path)

In [None]:
%matplotlib inline
preprocess_images(lidc_dcm_filenames, read_dcm, 
                  lambda img:center_surround(img, sz=sz), lidc_temp)

## eevorg processing

In [None]:
eevorg_original_path = data_temp / 'eevorg' / 'original_png'
eevorg_png_filenames = list(eevorg_original_path.glob('*.png'))[:]
eevorg_temp = data_temp / 'eevorg' / temp_relative_path
print(eevorg_original_path, eevorg_temp)

In [None]:
import numpy as np
import math
def calc_entropy(img):
    bins, _ = np.histogram(img)
    bins = bins / sum(bins)
    return sum([-b * math.log(b+1e-6) for b in bins])

In [None]:
%matplotlib inline
preprocess_images(eevorg_png_filenames, read_imageio, 
                  lambda img:center_surround(img, sz=sz), eevorg_temp,
                  select_img=lambda img:calc_entropy(img) > 0.1)