In [1]:
#default_exp preprocess

In [2]:
#export
from rsna_retro.imports import *
from rsna_retro.metadata import *

In [3]:
torch.cuda.set_device(3)

In [4]:
#export

def fix_pxrepr(dcm):
    if dcm.PixelRepresentation != 0 or dcm.RescaleIntercept<-100: return dcm
    
def dcm_tfm(fn):
    try:
        x = fn.dcmread()
        fix_pxrepr(x)
    except Exception as e:
        print(fn,e)
        raise SkipItemException
    if x.Rows != 512 or x.Columns != 512: x.zoom_to((512,512))
    return x.scaled_px

def save_file(o, dest, ext, tiff):
    fname,px = o
    fn = dest/Path(fname).with_suffix(f'{ext}')
#     wins = (dicom_windows.brain, dicom_windows.subdural, dicom_windows.abdomen_soft)
    wins = [(80,40),(80,200),(40,380)] # appian windows
    if tiff: px.save_tif16(fn, compress=False)
    else:    px.save_jpg(fn, wins)

def process_batch(pxs, fnames, dest, tiff, crop, resize, n_workers=4):
    ext = '.tif' if tiff else '.jpg'
    if crop or resize: pxs = TensorImage(pxs.cuda())
    if resize:
        tfm = AffineCoordTfm(size=256)
        pxs = tfm(pxs.unsqueeze(1)).squeeze()
    if crop:
        masks = pxs.mask_from_blur((80,40))
        bbs = mask2bbox(masks)
        pxs = crop_resize(pxs, bbs, 256)
    if crop or resize: pxs = pxs.cpu().squeeze()
    parallel(save_file, zip(fnames, pxs), n_workers=n_workers, progress=False, dest=dest, ext=ext, tiff=tiff)
    

In [5]:
#export
def filename(o): return os.path.splitext(os.path.basename(o))[0]

In [6]:
#export
def process_dataset(resize=False, test=False, tiff=False, crop=False, trial=False, bs=128, n_workers=8):
    df = df_tst if test else df_comb
    path_dcm = path_tst if test else path_trn
    fns = [path_dcm/f'{filename(o)}.dcm' for o in df.fname.values]
    fns = [f for f in fns if not f.exists()]
    dest_fn = ('tst_'  if test else
               'crop_' if crop else
               'nocrop_')
    dest_fn += 'tif' if tiff else 'jpg'
    if resize: dest_fn += '256'
    dest = path/dest_fn
    dest.mkdir(exist_ok=True)

    dsrc = DataSource(fns, [[dcm_tfm],[os.path.basename]])
    dl = TfmdDL(dsrc, bs=bs, num_workers=1)

    for i,b in enumerate(progress_bar(dl)):
        process_batch(*b, dest=dest, tiff=tiff, crop=crop, resize=resize, n_workers=n_workers)
        if trial and i==2: break

In [7]:
process_dataset(resize=True)

In [8]:
process_dataset(test=True)

In [9]:
process_dataset()

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [10]:
process_dataset(test=True, resize=True)

## Creating a script

In [None]:
#export
@call_parse
def preprocess_script(
    resize:Param("Resize to 256px"   , bool)=False,
    test  :Param("Process test set"  , bool)=False,
    tiff  :Param("Save TIFF format"  , bool)=False,
    crop  :Param("Crop to brain area", bool)=False,
    trial :Param("Just do 2 batches" , bool)=False,
    bs    :Param("Batch size"        , int)=256,
    n_workers:Param("Number of workers", int)=8,
):
    print('resize,test,tiff,crop,trial,bs,n_workers')
    print(resize,test,tiff,crop,trial,bs,n_workers)
    process_dataset(resize,test,tiff,crop,trial,bs,n_workers)

To Run this script:
`python -m rsna_retro.preprocess.preprocess_script `

In [1]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 01_data_01_metadata_stage2.ipynb.
Converted 01_data_02_preprocess_windows.ipynb.
Converted 02_train_01_train.ipynb.
Converted 04_orig_replace_ashaw_refactor.ipynb.
Converted 04_replace_ashaw_refactor.ipynb.
Converted 04b_orig_replace_ashaw_refactor.ipynb.
Converted 10_qure.ipynb.
Converted 12_merge.ipynb.
Converted 14_xgboost.ipynb.
Converted 16_slice_e2e-shallow.ipynb.
Converted 16b_orig_slice_e2e-shallow.ipynb.
This cell doesn't have an export destination and was ignored:
e
Converted 17_slice_model-deep.ipynb.
Converted 21_cleanup-nocrop2.ipynb.
Converted 26_submit_final.ipynb.
Converted 27_ensemble_tabular_nn.ipynb.
Converted 99_index.ipynb.
Converted cleanup-combine-qure.ipynb.
Converted delete_03b_cleanup-tif.ipynb.
Converted submit.ipynb.
Converted walkthru.ipynb.
Converted x00_tcia-ct-segm-prep.ipynb.
Converted x00_tcia-ct-segm-train.ipynb.
