In [2]:
from subprocess import CalledProcessError
import h5py

from collections import OrderedDict
from pathlib import Path

import numpy as np
import pandas as pd

from batchgenerators.augmentations.utils import pad_nd_image
from nnunet.preprocessing.preprocessing import PreprocessorFor2D
from tqdm import tqdm

# from brats.preprocessing.hdbet_wrapper import hd_bet

import nibabel as nib
from nibabel.processing import conform

In [3]:
PREP_DATA_DIR = Path('/home/jupyter/data/AIBL/AIBL_dissertacao_prep')
DATASET_FPATH = Path('/home/jupyter/data/AIBL/AIBL_slices_fix_2mm_split.hdf5')

SPLIT_CSV_FPATH = Path('/home/jupyter/data/AIBL/csv_dataset_dissertacao.csv')

DOWNSIZE = True

tmpdir = Path('.tmpdir')

In [4]:
def load_preprocess_for_nnunet(
        img_fpath,
        tmpdir='.tmpdir',
        patch_size=[192, 160],
        input_shape_must_be_divisible_by=[32, 32],
        normalization_schemes=OrderedDict([(0, 'nonCT')]),
        use_mask_for_norm=OrderedDict([(0, True)]),
        transpose_forward=[0, 1, 2],
        intensity_properties=None,
        target_spacing=[1., 1., 1.],
    ):
    brain_img_fpath, _ = hd_bet(img_fpath, tmpdir, mode='fast')

    prep = PreprocessorFor2D(normalization_schemes, use_mask_for_norm,
                                transpose_forward, intensity_properties)
    brain, _, prop = prep.preprocess_test_case([str(brain_img_fpath),], target_spacing)

    # get only slices with meaningful brain info
    crop_lb = prop['crop_bbox'][0][0]
    lb = int(35 / target_spacing[0]) - crop_lb
    ub = int(115 / target_spacing[0]) - crop_lb
    brain = brain[:,lb:ub]

    padded_brain, _ = pad_nd_image(
        brain,
        new_shape=patch_size,
        mode='constant',
        kwargs={'constant_values': 0},
        return_slicer=True,
        shape_must_be_divisible_by=input_shape_must_be_divisible_by,
    )

    return padded_brain

In [20]:
tmpdir.mkdir(exist_ok=True)

target_shape = (80, 192, 160)

if DOWNSIZE:
    target_shape = (40, 96, 96)

# split data
df = pd.read_csv(SPLIT_CSV_FPATH)
df['path'] = df['path'] + '.gz'
test_fpaths = df.path.unique()

In [21]:
import os
import nibabel as nib
import numpy as np
nib.load("/home/jupyter/data/AIBL/AIBL_dissertacao_prep/I1075847__80.nii.gz")

<nibabel.nifti1.Nifti1Image at 0x7fafd783f9d0>

In [24]:
img_fpath.split('.nii')[0]

'/home/jupyter/data/AIBL/AIBL_dissertacao_prep/I450389__71'

In [31]:
img_fpath.name

'I450389__71.nii.gz'

In [29]:
# update_dataset(test_fpaths, i_test, 'test')
for img_fpath in tqdm(test_fpaths):
        age = int(img_fpath.split('.nii.gz')[0].split('__')[-1])

        img = nib.load(img_fpath)
        dsz_img = conform(img, out_shape=tuple(np.array(img.shape) // 2), voxel_size=(2.,2.,2.))
        img_fpath = tmpdir/img_fpath
#         print(img_fpath)
#         nib.save(dsz_img, str(img_fpath))
        break
print(dsz_img)

  0%|          | 0/724 [00:00<?, ?it/s]

<class 'nibabel.nifti1.Nifti1Image'>
data shape (120, 120, 77)
affine: 
[[   2.    0.   -0. -119.]
 [   0.    2.   -0. -128.]
 [   0.    0.    2.  -67.]
 [   0.    0.    0.    1.]]
metadata:
<class 'nibabel.nifti1.Nifti1Header'> object, endian='<'
sizeof_hdr      : 348
data_type       : b''
db_name         : b''
extents         : 0
session_error   : 0
regular         : b'r'
dim_info        : 0
dim             : [  3 120 120  77   1   1   1   1]
intent_p1       : 0.0
intent_p2       : 0.0
intent_p3       : 0.0
intent_code     : none
datatype        : float32
bitpix          : 32
slice_start     : 0
pixdim          : [1. 2. 2. 2. 1. 1. 1. 1.]
vox_offset      : 0.0
scl_slope       : nan
scl_inter       : nan
slice_end       : 0
slice_code      : unknown
xyzt_units      : 2
cal_max         : 0.0
cal_min         : 0.0
slice_duration  : 0.0
toffset         : 0.0
glmax           : 0
glmin           : 0
descrip         : b''
aux_file        : b''
qform_code      : unknown
sform_code      : ali




In [None]:
if __name__ == '__main__':
    assert PREP_DATA_DIR.exists(), f"`{PREP_DATA_DIR}` doesn't exist"
    assert DATASET_FPATH.parent.exists(), f"`{DATASET_FPATH.parent}` doesn't exist"

    tmpdir.mkdir(exist_ok=True)

    target_shape = (80, 192, 160)

    if DOWNSIZE:
        target_shape = (40, 96, 96)

    # split data
    df = pd.read_csv(SPLIT_CSV_FPATH)  
    test_fpaths = df.path.unique()

    i_test = 0

    # create dataset
    if DATASET_FPATH.exists():
        # check if there's any progress already
        with h5py.File(DATASET_FPATH, 'r') as h:
            # overwrite the last image just to be sure
            n_test = max((h['test']['y'].shape[0] // target_shape[0]) -1,0)
        
        test_fpaths = test_fpaths[n_test-1:]
#         print('AQUI',test_fpaths[0])
        i_test = n_test * target_shape[0]
    else:
        with h5py.File(DATASET_FPATH, 'w') as h:

            test = h.create_group('test')
            X_test = test.create_dataset(
                'X',
                (0,target_shape[1],target_shape[2]),
                maxshape=(None,target_shape[1],target_shape[2]),
                dtype='float32',
                chunks=(1,target_shape[1],target_shape[2]),
                compression='gzip',
            )
            y_test = test.create_dataset(
                'y',
                (0,),
                maxshape=(None,),
                dtype='uint8',
            )

    def update_dataset(imgs_fpaths, i, ds_name):
        for img_fpath in tqdm(imgs_fpaths):
            age = int(img_fpath.split('.nii.gz')[0].split('__')[-1])

            img = nib.load(img_fpath)
            dsz_img = conform(img, out_shape=tuple(np.array(img.shape) // 2), voxel_size=(2.,2.,2.))
            img_fpath = tmpdir/img_fpath.name
            nib.save(dsz_img, str(img_fpath))

            try:
                brain = load_preprocess_for_nnunet(img_fpath, patch_size=[96,80], target_spacing=[2., 2., 2.], tmpdir=str(tmpdir.resolve()))
            except CalledProcessError:
                print("WARNING!")
                # img_fpath.unlink()
                continue

            if brain[0].shape == target_shape:
                with h5py.File(DATASET_FPATH, 'r+') as h:
                    X = h[ds_name]['X']
                    y = h[ds_name]['y']

                    X.resize(i + target_shape[0], axis=0)
                    y.resize(i + target_shape[0], axis=0)

                    X[i:i+target_shape[0]] = brain[0]
                    y[i:i+target_shape[0]] = age
                i += target_shape[0]
            else:
                img_fpath.unlink()


    print('Working on test images')
    update_dataset(test_fpaths, i_test, 'test')