In [2]:
import os
import sys
import glob
import shutil
import pandas as pd
import numpy as np
from ipywidgets import interact
from copy import deepcopy
import matplotlib.pyplot as plt
print('OK')

OK


In [3]:
sys.path.append('../..')

In [4]:
from lung_cancer.dataset import FilesIndex, Dataset
from lung_cancer import CTImagesMaskedBatch as CTIMB

### Build histo of nodules' positions

* Dataset: Luna, unified spacing

In [5]:
DIR_USP = '/data/unified_spacing/'
bloscix = FilesIndex(path=DIR_USP + '*', dirs=True)
bloscset = Dataset(index=bloscix, batch_class=CTIMB)

In [6]:
os.listdir(DIR_USP + bloscix.indices[0])

['images.blk', 'spacing.cpkl', 'images_shape.cpkl', 'origin.cpkl']

* Nodules csv-database

In [7]:
nodules = pd.read_csv('/notebooks/data/MRT/luna/CSVFILES/annotations.csv')

* Empty histo

In [24]:
# a bit of hardcode
SHAPE = [384, 448, 448]
ranges = list(zip([0]*3, SHAPE))
histo = list(np.histogramdd(np.random.randn(0, 3), range=ranges, bins=4))

* Update histo using all info from dataset

In [25]:
ppl = (bloscset.p
       .load(fmt='blosc', src_blosc=['spacing', 'origin'])
       .fetch_nodules_info(nodules, images_loaded=False)
       .update_nodules_histo(histo)
      )

In [27]:
ppl.run(n_epochs=1, batch_size=100)

<lung_cancer.dataset.dataset.pipeline.Pipeline at 0x7efd7d4adb00>

### Use the histo to sample nodules

In [78]:
NOD_SHAPE = (64, 128, 128)

pplnods = (bloscset.p
           .load(fmt='blosc', src_blosc=['images', 'spacing', 'origin'])
           .fetch_nodules_info(nodules)
           .create_mask()
           .sample_nodules(batch_size=10, nodule_size=NOD_SHAPE, share=0.5,
                           histo=histo, variance=(81, 300, 300)))

In [79]:
gennods = pplnods.gen_batch(batch_size=6)

In [80]:
nods = next(gennods)

### Plots

In [81]:
def plot_arr_slices(height, *arrays, clim=(-1200, 300)):
    fig, axes = plt.subplots(1, len(arrays), figsize=(14, len(arrays)*8))
    
    for arr, i in zip(arrays, range(len(arrays))):
        depth = arr.shape[0]
        n_slice = int(depth * height)
        
        kwargs = dict()
        if np.max(arr) - np.min(arr) > 2.0:
            kwargs.update(clim=clim)
        else:
            kwargs.update(clim=(0, 1))
    
        axes[i].imshow(arr[n_slice], cmap=plt.cm.gray, **kwargs)
    plt.show()

In [82]:
nods.images.shape

(640, 128, 128)

In [125]:
nodnum = 2

In [126]:
nodix = nods.indices[nodnum]

In [127]:
interact(lambda height: plot_arr_slices(height, 
                                        nods.get(nodix, 'images'),
                                        nods.get(nodix, 'masks'),
                                        nods.get(nodix, 'masks'), clim=[0, 255]),
         height=(0.01, 0.99, 0.01))

<function __main__.<lambda>>

### Get only cancerous nodules

In [138]:
bch = CTIMB(bloscix.create_subset(bloscix.indices[[100, 110, 120, 130, 140]]))

In [139]:
bch = bch.load(fmt='blosc', src_blosc=['images', 'spacing', 'origin'])

In [140]:
bch = bch.fetch_nodules_info(nodules)
bch = bch.create_mask()

In [166]:
only_cancer = bch.sample_nodules(nodule_size=NOD_SHAPE, all_cancerous=True,
                                 variance=(49, 196, 196))

In [188]:
nodnum = 5

In [189]:
nodix = only_cancer.indices[nodnum]

In [190]:
interact(lambda height: plot_arr_slices(height, 
                                        only_cancer.get(nodix, 'images'),
                                        only_cancer.get(nodix, 'masks'),
                                        only_cancer.get(nodix, 'masks'), clim=[0, 255]),
         height=(0.01, 0.99, 0.01))

<function __main__.<lambda>>