Purpose: create a database, containing T1W images and disease region masks for model training (testing).

Images are normalized by 3 standard deviation of image (volume) intensity distribution.

Input: 
- path to folder, containing images;
- path to folder, containing masks;
- list, containing scan ids;
- path to folder to store the generated database;

Output:
- database file in the hdf5 format is stored in the specified folder;
- a text file, containing information about data, normalization and the resultant arrays
  size is stored in the specified folder;

In [None]:
import os
import tables
import nibabel as nib
from db_modify_array import *

In [None]:
## Provide path to folder, containing T1W images and disease region masks
image_path = ''
mask_path = ''

## Provide path to store the generated database
store_path = ''

## Specify scan ids to use to create a database (list of integers)
scans = [1012, 1013]
        
## Provide name for the database
db_name = 'example_database'

## Open a text file to write the information about the database 
## (if needed, modify the information below)
file = open(store_path + db_name + '_info.txt', 'w')

In [None]:
## Initialize a counting variable
i = 0

## ------------------------------------------------------------- Loop over scan ids in the list
for scan in scans:
    
    ## Increment counting variable
    i += 1
    
    ## Load current nifti image and retrieve numpy array, [H,W,N];
    ## modify array dimensions, dimensions order and datatype;
    ## Output: 4D numpy array of size [N,1,H,W], data type float32; 
    image = nib.load(image_path + str(scan) + '_T1W.nii.gz')
    image = image.get_fdata()
    image = modify_array(image)
    
    ## Load current nifti mask and retrieve numpy array, [H,W,N];
    ## modify array dimensions, dimensions order and datatype;
    ## Output: 4D numpy array of size [N,1,H,W], data type float32;
    mask = nib.load(mask_path + str(scan) + '_disease_region.nii.gz')
    mask = mask.get_fdata()
    mask = modify_array(mask)
    
    print('Current scan: ', scan, image.shape, mask.shape)

    ## Normalize image intensities by 3 standard deviation of image intensity distribution
    std = np.std(image)
    normalized_image = image / (3 * std)

    ## Determine length of image array 1st to 3d dimensions and data type 
    dims = normalized_image.shape[1:]
    dtype = normalized_image.dtype

    ## First iteration: create a database and specify filters (compression library and level)
    if i == 1:
        filters = tables.Filters(complevel = 6, complib = 'zlib')
        
        db = tables.open_file(store_path + db_name, mode = 'w')

        ## Create storage for images (extendable numpy array, [:,1,H,W])
        image_storage = db.create_earray('/',
                              name = 'Images',
                              atom = tables.Atom.from_dtype(dtype),
                              filters = filters,
                              shape = np.append([0], dims),
                              chunkshape = np.append([1], dims)
                             )

        ## Create storage for masks (extendable numpy array, [:,1,H,W])
        mask_storage = db.create_earray('/',
                                  name = 'Masks',
                                  atom = tables.Atom.from_dtype(dtype),
                                  filters = filters,
                                  shape = np.append([0], dims),
                                  chunkshape = np.append([1], dims)
                                 )
    
    ## Store current image and mask to the database
    image_storage.append(normalized_image)
    mask_storage.append(mask)
#### ---------------------------------------------------------------- End loop over scan ids

## Extract the resulted (image) array 1st dimension length (number of stored image/mask slices)
keys = list(db.root._v_children.keys())
dims = db.root[keys[0]].shape

## Write information to the txt file
info = '''
Following scans were used to create the database: %s.
Database contains T1W images and corresponding masks of disease region, each stored into
a 4D numpy array of size: %s
Each image (volume) was normalized by 3 standard deviation of its intensity distribution.
'''%(scans, dims)

file.write(info)
file.close()

## Close the database
db.close()