In [1]:
import os
import sys
import glob
import shutil
from ipywidgets import interact
from copy import deepcopy
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
sys.path.append('../..')

In [None]:
from lung_cancer import CTImagesMaskedBatch as CTIMB
from lung_cancer.dataset import *

### Dirs for Index and dump

In [None]:
DIR_LUNA = '/notebooks/data/MRT/luna/s*/*.mhd'
DIR_DUMP = '/notebooks/data/MRT/luna_nodules_high_res_64/'

In [None]:
os.listdir('/notebooks/data/MRT/luna_leshas/1.3.6.1.4.1.14519.5.2.1.6279.6001.171667800241622018839592854574/')

In [None]:
# delete DIR_DUMP if exists
# think thoroughly before running the cell

if os.path.exists(DIR_DUMP):
    shutil.rmtree(DIR_DUMP)

### Create dataset index and Dataset

In [None]:
# index for the whole luna dataset
ind = FilesIndex(path=DIR_LUNA, no_ext=True)

In [None]:
len(ind.index)

In [None]:
ds = Dataset(index=ind, batch_class=CTIMB)

In [None]:
# if preprocess and dump is needed, do not cv_split
ds.cv_split([0.01, 0.8])

### Run preprocess on train (on $\approx$ 15 scans)

In [None]:
# lazy
args_resize = dict(n_workers=6, shape=(128, 256, 256))

workflow = (ds.train.pipeline().load(fmt='raw').
            resize(**args_resize).dump(dst=DIR_DUMP))

In [None]:
# run itself
BATCH_SIZE = 8
workflow.run(batch_size=BATCH_SIZE, shuffle=False)

### Load dumped scans, build masks

In [None]:
nodules = pd.read_csv('/notebooks/data/MRT/luna/CSVFILES/annotations.csv')
nodules.head(2)

In [None]:
ind_dumped = FilesIndex(path=DIR_DUMP + '*', dirs=True)

In [None]:
batch_dumped = CTIMB(ind_dumped.create_subset(ind_dumped.index[0 : 4]))

In [None]:
batch_dumped.load(fmt='blosc')

In [None]:
batch_dumped.fetch_nodules_info(nodules)

In [None]:
batch_dumped.create_mask()

### Check the whole thing

In [None]:
def plot_arr_slices(height, *arrays, clim=(-1200, 300)):
    fig, axes = plt.subplots(1, len(arrays), figsize=(14, len(arrays)*8))
    
    for arr, i in zip(arrays, range(len(arrays))):
        depth = arr.shape[0]
        n_slice = int(depth * height)
        
        kwargs = dict()
        if np.max(arr) - np.min(arr) > 2.0:
            kwargs.update(clim=clim)
        else:
            kwargs.update(clim=(0, 1))
    
        axes[i].imshow(arr[n_slice], cmap=plt.cm.gray, **kwargs)
    plt.show()

In [None]:
n_pat = 0

In [None]:
interact(lambda height: plot_arr_slices(height, batch_dumped[n_pat], batch_dumped.get_mask(n_pat)), 
         height=(0.01, 0.99, 0.01))

In [None]:
help(CTIMB.unify_spacing)