# Pre-processing images as center-surround
### a.k.a. retinal ganglion cell processing

The processing pipeline involves converting to grayscale, rescaling, CLAHE filtering, and finally whitening.

First, define the functions for the preprocessing pipeline.

## Preparing the CovidNet dataset
Now import the CovidNet data and pre-process by:
* converting to grayscale (some CovidNet images are RGB)
* apply a small-window CLAHE
* resize to standard size
* whiten

In [1]:
def img2grayscale(img):
    from skimage.color import rgb2gray
    from skimage.util import img_as_ubyte
    if len(img.shape) > 2:
        img = rgb2gray(img)
        img = img_as_ubyte(img)
    return img

import cv2
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))

def filter_img(img):
    img = clahe.apply(img)
    return img

sz = 128
def resize_img(img, sz=128):
    from skimage.transform import resize
    img = resize(img, (sz,sz))
    return img

def whiten_img(img):
    import numpy as np
    width = np.max(img) - np.min(img)
    img = img - np.min(img)
    img = img/width
    return img

We will use standard code for showing thumbnails as we progress

In [2]:
# %load show_original_decoded.py
import matplotlib.pyplot as plt
def show_grayscale(rows, columns, at, pixel_array, sz):
    import scipy
    ax = plt.subplot(rows, columns, at)
    interp_array = scipy.ndimage.zoom(pixel_array.reshape(sz,sz), 4.0, order=5)
    plt.imshow(interp_array.reshape(sz*4, sz*4), cmap='gray')
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

def show_original_decoded(original, decoded, sz, n=10):
    plt.figure(figsize=(n*2, 4))
    for i in range(n):
        show_grayscale(2, n, i+1, original[i], sz)
        show_grayscale(2, n, i+1+n, decoded[i], sz)
    plt.show(block=True)

Define helpers for keeping track of the original and processed images, as dictionaries from the original file stem

In [3]:
original_imgs = {}
processed_imgs = {} 

def show_thumbnail_progress():
    keys = list(original_imgs.keys())[-11:-1]
    print(keys)
    show_original_decoded(
        [resize_img(img2grayscale(original_imgs[key]),sz) for key in keys],
        [processed_imgs[key] for key in keys], sz)

def add_processed_img(name, original_img, processed_img, all_count):
    original_imgs[name] = original_img
    processed_imgs[name] = processed_img
    print(f'{len(processed_imgs)} of {all_count}', end = '\r')
    if len(processed_imgs) % 100 == 0:
        from IPython.display import clear_output
        clear_output(True)
        if len(processed_imgs) > 10:
            show_thumbnail_progress()

Scan data from original location, and compare to temp to see how many are left

In [5]:
import os
from pathlib import Path
chest_root = Path(os.environ['DATA_ALL']) / 'NIH_Cxr8'
nofindings_png_filenames = chest_root.glob('by_class/no_finding/*.png')
nofindings_png_filenames = list(nofindings_png_filenames)[:]
print(f"{len(nofindings_png_filenames)} original files")

60361 original files


In [7]:
clahe_temp = Path(os.environ['DATA_TEMP']) / 'chest-nihcc' / '128x128' / 'clahe_processed'
existing_clahe = list([fn.stem for fn in clahe_temp.glob("*.npy")])

remaining = len(nofindings_png_filenames) - len(existing_clahe)
print(f"pre-processing {remaining} of {len(nofindings_png_filenames)}")

# iterate over png files, reading and processing
skipped = 0
for png_filename in nofindings_png_filenames:    
    if png_filename.stem in existing_clahe:
        skipped = skipped + 1
        print(f"skipping {skipped}...", end = '\r')
        continue
        
    # read the png image
    import imageio            
    original_img = imageio.imread(png_filename)
    
    # process the image
    img = img2grayscale(original_img)
    img = filter_img(img)
    img = resize_img(img, sz)
    img = whiten_img(img)
    add_processed_img(png_filename.stem, 
                      original_img, img, 
                      len(nofindings_png_filenames) - len(existing_clahe))
    
    # and save the npy file
    import numpy as np
    np.save(clahe_temp / png_filename.stem, img)
    
print('done')

pre-processing 0 of 60361


skipping 1...skipping 2...skipping 3...skipping 4...skipping 5...skipping 6...skipping 7...skipping 8...skipping 9...skipping 10...skipping 11...skipping 12...skipping 13...skipping 14...skipping 15...skipping 16...skipping 17...skipping 18...skipping 19...skipping 20...skipping 21...skipping 22...skipping 23...skipping 24...skipping 25...skipping 26...skipping 27...skipping 28...skipping 29...skipping 30...skipping 31...skipping 32...skipping 33...skipping 34...skipping 35...skipping 36...skipping 37...skipping 38...skipping 39...skipping 40...skipping 41...skipping 42...skipping 43...skipping 44...skipping 45...skipping 46...skipping 47...skipping 48...skipping 49...skipping 50...skipping 51...skipping 52...skipping 53...skipping 54...skipping 55...skipping 56...skipping 57...skipping 58...skipping 59...skipping 60...skipping 61...skipping 62...skipping 63...skipping 64...skipping 65...skipping 66...skipping 67...skip

skipping 2941...skipping 2942...skipping 2943...skipping 2944...skipping 2945...skipping 2946...skipping 2947...skipping 2948...skipping 2949...skipping 2950...skipping 2951...skipping 2952...skipping 2953...skipping 2954...skipping 2955...skipping 2956...skipping 2957...skipping 2958...skipping 2959...skipping 2960...skipping 2961...skipping 2962...skipping 2963...skipping 2964...skipping 2965...skipping 2966...skipping 2967...skipping 2968...skipping 2969...skipping 2970...skipping 2971...skipping 2972...skipping 2973...skipping 2974...skipping 2975...skipping 2976...skipping 2977...skipping 2978...skipping 2979...skipping 2980...skipping 2981...skipping 2982...skipping 2983...skipping 2984...skipping 2985...skipping 2986...skipping 2987...skipping 2988...skipping 2989...skipping 2990...skipping 2991...skipping 2992...skipping 2993...skipping 2994...skipping 2995...skipping 2996...skipping 2997...skipping 2998...skipping 2999

skipping 3866...skipping 3867...skipping 3868...skipping 3869...skipping 3870...skipping 3871...skipping 3872...skipping 3873...skipping 3874...skipping 3875...skipping 3876...skipping 3877...skipping 3878...skipping 3879...skipping 3880...skipping 3881...skipping 3882...skipping 3883...skipping 3884...skipping 3885...skipping 3886...skipping 3887...skipping 3888...skipping 3889...skipping 3890...skipping 3891...skipping 3892...skipping 3893...skipping 3894...skipping 3895...skipping 3896...skipping 3897...skipping 3898...skipping 3899...skipping 3900...skipping 3901...skipping 3902...skipping 3903...skipping 3904...skipping 3905...skipping 3906...skipping 3907...skipping 3908...skipping 3909...skipping 3910...skipping 3911...skipping 3912...skipping 3913...skipping 3914...skipping 3915...skipping 3916...skipping 3917...skipping 3918...skipping 3919...skipping 3920...skipping 3921...skipping 3922...skipping 3923...skipping 3924.

skipping 4682...skipping 4683...skipping 4684...skipping 4685...skipping 4686...skipping 4687...skipping 4688...skipping 4689...skipping 4690...skipping 4691...skipping 4692...skipping 4693...skipping 4694...skipping 4695...skipping 4696...skipping 4697...skipping 4698...skipping 4699...skipping 4700...skipping 4701...skipping 4702...skipping 4703...skipping 4704...skipping 4705...skipping 4706...skipping 4707...skipping 4708...skipping 4709...skipping 4710...skipping 4711...skipping 4712...skipping 4713...skipping 4714...skipping 4715...skipping 4716...skipping 4717...skipping 4718...skipping 4719...skipping 4720...skipping 4721...skipping 4722...skipping 4723...skipping 4724...skipping 4725...skipping 4726...skipping 4727...skipping 4728...skipping 4729...skipping 4730...skipping 4731...skipping 4732...skipping 4733...skipping 4734...skipping 4735...skipping 4736...skipping 4737...skipping 4738...skipping 4739...skipping 4740.

skipping 5691...skipping 5692...skipping 5693...skipping 5694...skipping 5695...skipping 5696...skipping 5697...skipping 5698...skipping 5699...skipping 5700...skipping 5701...skipping 5702...skipping 5703...skipping 5704...skipping 5705...skipping 5706...skipping 5707...skipping 5708...skipping 5709...skipping 5710...skipping 5711...skipping 5712...skipping 5713...skipping 5714...skipping 5715...skipping 5716...skipping 5717...skipping 5718...skipping 5719...skipping 5720...skipping 5721...skipping 5722...skipping 5723...skipping 5724...skipping 5725...skipping 5726...skipping 5727...skipping 5728...skipping 5729...skipping 5730...skipping 5731...skipping 5732...skipping 5733...skipping 5734...skipping 5735...skipping 5736...skipping 5737...skipping 5738...skipping 5739...skipping 5740...skipping 5741...skipping 5742...skipping 5743...skipping 5744...skipping 5745...skipping 5746...skipping 5747...skipping 5748...skipping 5749

skipping 6588...skipping 6589...skipping 6590...skipping 6591...skipping 6592...skipping 6593...skipping 6594...skipping 6595...skipping 6596...skipping 6597...skipping 6598...skipping 6599...skipping 6600...skipping 6601...skipping 6602...skipping 6603...skipping 6604...skipping 6605...skipping 6606...skipping 6607...skipping 6608...skipping 6609...skipping 6610...skipping 6611...skipping 6612...skipping 6613...skipping 6614...skipping 6615...skipping 6616...skipping 6617...skipping 6618...skipping 6619...skipping 6620...skipping 6621...skipping 6622...skipping 6623...skipping 6624...skipping 6625...skipping 6626...skipping 6627...skipping 6628...skipping 6629...skipping 6630...skipping 6631...skipping 6632...skipping 6633...skipping 6634...skipping 6635...skipping 6636...skipping 6637...skipping 6638...skipping 6639...skipping 6640...skipping 6641...skipping 6642...skipping 6643...skipping 6644...skipping 6645...skipping 6646

skipping 7336...skipping 7337...skipping 7338...skipping 7339...skipping 7340...skipping 7341...skipping 7342...skipping 7343...skipping 7344...skipping 7345...skipping 7346...skipping 7347...skipping 7348...skipping 7349...skipping 7350...skipping 7351...skipping 7352...skipping 7353...skipping 7354...skipping 7355...skipping 7356...skipping 7357...skipping 7358...skipping 7359...skipping 7360...skipping 7361...skipping 7362...skipping 7363...skipping 7364...skipping 7365...skipping 7366...skipping 7367...skipping 7368...skipping 7369...skipping 7370...skipping 7371...skipping 7372...skipping 7373...skipping 7374...skipping 7375...skipping 7376...skipping 7377...skipping 7378...skipping 7379...skipping 7380...skipping 7381...skipping 7382...skipping 7383...skipping 7384...skipping 7385...skipping 7386...skipping 7387...skipping 7388...skipping 7389...skipping 7390...skipping 7391...skipping 7392...skipping 7393...skipping 7394.

skipping 8064...skipping 8065...skipping 8066...skipping 8067...skipping 8068...skipping 8069...skipping 8070...skipping 8071...skipping 8072...skipping 8073...skipping 8074...skipping 8075...skipping 8076...skipping 8077...skipping 8078...skipping 8079...skipping 8080...skipping 8081...skipping 8082...skipping 8083...skipping 8084...skipping 8085...skipping 8086...skipping 8087...skipping 8088...skipping 8089...skipping 8090...skipping 8091...skipping 8092...skipping 8093...skipping 8094...skipping 8095...skipping 8096...skipping 8097...skipping 8098...skipping 8099...skipping 8100...skipping 8101...skipping 8102...skipping 8103...skipping 8104...skipping 8105...skipping 8106...skipping 8107...skipping 8108...skipping 8109...skipping 8110...skipping 8111...skipping 8112...skipping 8113...skipping 8114...skipping 8115...skipping 8116...skipping 8117...skipping 8118...skipping 8119...skipping 8120...skipping 8121...skipping 8122.

skipping 8846...skipping 8847...skipping 8848...skipping 8849...skipping 8850...skipping 8851...skipping 8852...skipping 8853...skipping 8854...skipping 8855...skipping 8856...skipping 8857...skipping 8858...skipping 8859...skipping 8860...skipping 8861...skipping 8862...skipping 8863...skipping 8864...skipping 8865...skipping 8866...skipping 8867...skipping 8868...skipping 8869...skipping 8870...skipping 8871...skipping 8872...skipping 8873...skipping 8874...skipping 8875...skipping 8876...skipping 8877...skipping 8878...skipping 8879...skipping 8880...skipping 8881...skipping 8882...skipping 8883...skipping 8884...skipping 8885...skipping 8886...skipping 8887...skipping 8888...skipping 8889...skipping 8890...skipping 8891...skipping 8892...skipping 8893...skipping 8894...skipping 8895...skipping 8896...skipping 8897...skipping 8898...skipping 8899...skipping 8900...skipping 8901...skipping 8902...skipping 8903...skipping 8904.

skipping 9509...skipping 9510...skipping 9511...skipping 9512...skipping 9513...skipping 9514...skipping 9515...skipping 9516...skipping 9517...skipping 9518...skipping 9519...skipping 9520...skipping 9521...skipping 9522...skipping 9523...skipping 9524...skipping 9525...skipping 9526...skipping 9527...skipping 9528...skipping 9529...skipping 9530...skipping 9531...skipping 9532...skipping 9533...skipping 9534...skipping 9535...skipping 9536...skipping 9537...skipping 9538...skipping 9539...skipping 9540...skipping 9541...skipping 9542...skipping 9543...skipping 9544...skipping 9545...skipping 9546...skipping 9547...skipping 9548...skipping 9549...skipping 9550...skipping 9551...skipping 9552...skipping 9553...skipping 9554...skipping 9555...skipping 9556...skipping 9557...skipping 9558...skipping 9559...skipping 9560...skipping 9561...skipping 9562...skipping 9563...skipping 9564...skipping 9565...skipping 9566...skipping 9567.

skipping 10349...skipping 10350...skipping 10351...skipping 10352...skipping 10353...skipping 10354...skipping 10355...skipping 10356...skipping 10357...skipping 10358...skipping 10359...skipping 10360...skipping 10361...skipping 10362...skipping 10363...skipping 10364...skipping 10365...skipping 10366...skipping 10367...skipping 10368...skipping 10369...skipping 10370...skipping 10371...skipping 10372...skipping 10373...skipping 10374...skipping 10375...skipping 10376...skipping 10377...skipping 10378...skipping 10379...skipping 10380...skipping 10381...skipping 10382...skipping 10383...skipping 10384...skipping 10385...skipping 10386...skipping 10387...skipping 10388...skipping 10389...skipping 10390...skipping 10391...skipping 10392...skipping 10393...skipping 10394...skipping 10395...skipping 10396...skipping 10397...skipping 10398...skipping 10399...skipping 10400...skipping 10401...skipping 10402...skipping 10403...skipping 

skipping 11102...skipping 11103...skipping 11104...skipping 11105...skipping 11106...skipping 11107...skipping 11108...skipping 11109...skipping 11110...skipping 11111...skipping 11112...skipping 11113...skipping 11114...skipping 11115...skipping 11116...skipping 11117...skipping 11118...skipping 11119...skipping 11120...skipping 11121...skipping 11122...skipping 11123...skipping 11124...skipping 11125...skipping 11126...skipping 11127...skipping 11128...skipping 11129...skipping 11130...skipping 11131...skipping 11132...skipping 11133...skipping 11134...skipping 11135...skipping 11136...skipping 11137...skipping 11138...skipping 11139...skipping 11140...skipping 11141...skipping 11142...skipping 11143...skipping 11144...skipping 11145...skipping 11146...skipping 11147...skipping 11148...skipping 11149...skipping 11150...skipping 11151...skipping 11152...skipping 11153...skipping 11154...skipping 11155...skipping 11156...skipping 

skipping 11843...skipping 11844...skipping 11845...skipping 11846...skipping 11847...skipping 11848...skipping 11849...skipping 11850...skipping 11851...skipping 11852...skipping 11853...skipping 11854...skipping 11855...skipping 11856...skipping 11857...skipping 11858...skipping 11859...skipping 11860...skipping 11861...skipping 11862...skipping 11863...skipping 11864...skipping 11865...skipping 11866...skipping 11867...skipping 11868...skipping 11869...skipping 11870...skipping 11871...skipping 11872...skipping 11873...skipping 11874...skipping 11875...skipping 11876...skipping 11877...skipping 11878...skipping 11879...skipping 11880...skipping 11881...skipping 11882...skipping 11883...skipping 11884...skipping 11885...skipping 11886...skipping 11887...skipping 11888...skipping 11889...skipping 11890...skipping 11891...skipping 11892...skipping 11893...skipping 11894...skipping 11895...skipping 11896...skipping 11897...skipping 1

skipping 12611...skipping 12612...skipping 12613...skipping 12614...skipping 12615...skipping 12616...skipping 12617...skipping 12618...skipping 12619...skipping 12620...skipping 12621...skipping 12622...skipping 12623...skipping 12624...skipping 12625...skipping 12626...skipping 12627...skipping 12628...skipping 12629...skipping 12630...skipping 12631...skipping 12632...skipping 12633...skipping 12634...skipping 12635...skipping 12636...skipping 12637...skipping 12638...skipping 12639...skipping 12640...skipping 12641...skipping 12642...skipping 12643...skipping 12644...skipping 12645...skipping 12646...skipping 12647...skipping 12648...skipping 12649...skipping 12650...skipping 12651...skipping 12652...skipping 12653...skipping 12654...skipping 12655...skipping 12656...skipping 12657...skipping 12658...skipping 12659...skipping 12660...skipping 12661...skipping 12662...skipping 12663...skipping 12664...skipping 12665...skipping 1

skipping 13178...skipping 13179...skipping 13180...skipping 13181...skipping 13182...skipping 13183...skipping 13184...skipping 13185...skipping 13186...skipping 13187...skipping 13188...skipping 13189...skipping 13190...skipping 13191...skipping 13192...skipping 13193...skipping 13194...skipping 13195...skipping 13196...skipping 13197...skipping 13198...skipping 13199...skipping 13200...skipping 13201...skipping 13202...skipping 13203...skipping 13204...skipping 13205...skipping 13206...skipping 13207...skipping 13208...skipping 13209...skipping 13210...skipping 13211...skipping 13212...skipping 13213...skipping 13214...skipping 13215...skipping 13216...skipping 13217...skipping 13218...skipping 13219...skipping 13220...skipping 13221...skipping 13222...skipping 13223...skipping 13224...skipping 13225...skipping 13226...skipping 13227...skipping 13228...skipping 13229...skipping 13230...skipping 13231...skipping 13232...skipping 

skipping 13810...skipping 13811...skipping 13812...skipping 13813...skipping 13814...skipping 13815...skipping 13816...skipping 13817...skipping 13818...skipping 13819...skipping 13820...skipping 13821...skipping 13822...skipping 13823...skipping 13824...skipping 13825...skipping 13826...skipping 13827...skipping 13828...skipping 13829...skipping 13830...skipping 13831...skipping 13832...skipping 13833...skipping 13834...skipping 13835...skipping 13836...skipping 13837...skipping 13838...skipping 13839...skipping 13840...skipping 13841...skipping 13842...skipping 13843...skipping 13844...skipping 13845...skipping 13846...skipping 13847...skipping 13848...skipping 13849...skipping 13850...skipping 13851...skipping 13852...skipping 13853...skipping 13854...skipping 13855...skipping 13856...skipping 13857...skipping 13858...skipping 13859...skipping 13860...skipping 13861...skipping 13862...skipping 13863...skipping 13864...skipping 

skipping 14296...skipping 14297...skipping 14298...skipping 14299...skipping 14300...skipping 14301...skipping 14302...skipping 14303...skipping 14304...skipping 14305...skipping 14306...skipping 14307...skipping 14308...skipping 14309...skipping 14310...skipping 14311...skipping 14312...skipping 14313...skipping 14314...skipping 14315...skipping 14316...skipping 14317...skipping 14318...skipping 14319...skipping 14320...skipping 14321...skipping 14322...skipping 14323...skipping 14324...skipping 14325...skipping 14326...skipping 14327...skipping 14328...skipping 14329...skipping 14330...skipping 14331...skipping 14332...skipping 14333...skipping 14334...skipping 14335...skipping 14336...skipping 14337...skipping 14338...skipping 14339...skipping 14340...skipping 14341...skipping 14342...skipping 14343...skipping 14344...skipping 14345...skipping 14346...skipping 14347...skipping 14348...skipping 14349...skipping 14350...skipping 

skipping 14930...skipping 14931...skipping 14932...skipping 14933...skipping 14934...skipping 14935...skipping 14936...skipping 14937...skipping 14938...skipping 14939...skipping 14940...skipping 14941...skipping 14942...skipping 14943...skipping 14944...skipping 14945...skipping 14946...skipping 14947...skipping 14948...skipping 14949...skipping 14950...skipping 14951...skipping 14952...skipping 14953...skipping 14954...skipping 14955...skipping 14956...skipping 14957...skipping 14958...skipping 14959...skipping 14960...skipping 14961...skipping 14962...skipping 14963...skipping 14964...skipping 14965...skipping 14966...skipping 14967...skipping 14968...skipping 14969...skipping 14970...skipping 14971...skipping 14972...skipping 14973...skipping 14974...skipping 14975...skipping 14976...skipping 14977...skipping 14978...skipping 14979...skipping 14980...skipping 14981...skipping 14982...skipping 14983...skipping 14984...skipping 

skipping 15788...skipping 15789...skipping 15790...skipping 15791...skipping 15792...skipping 15793...skipping 15794...skipping 15795...skipping 15796...skipping 15797...skipping 15798...skipping 15799...skipping 15800...skipping 15801...skipping 15802...skipping 15803...skipping 15804...skipping 15805...skipping 15806...skipping 15807...skipping 15808...skipping 15809...skipping 15810...skipping 15811...skipping 15812...skipping 15813...skipping 15814...skipping 15815...skipping 15816...skipping 15817...skipping 15818...skipping 15819...skipping 15820...skipping 15821...skipping 15822...skipping 15823...skipping 15824...skipping 15825...skipping 15826...skipping 15827...skipping 15828...skipping 15829...skipping 15830...skipping 15831...skipping 15832...skipping 15833...skipping 15834...skipping 15835...skipping 15836...skipping 15837...skipping 15838...skipping 15839...skipping 15840...skipping 15841...skipping 15842...skipping 

doneping 60361...
