In [48]:
import tensorflow as tf
import pydicom as dicom
import glob
import matplotlib.pyplot as plt
import os
import nibabel as nib
from skimage.io import imsave
import numpy as np
from tensorflow.data.experimental import sample_from_datasets

In [9]:
def get_folders():
    """
    Returns a list of all the folders in the HGG and LGG directories.
    """
    HGG_Folders= os.listdir(r'C:\Users\Joe Krinke\Desktop\BraTS 2019 Data\MICCAI_BraTS_2019_Data_Training\HGG')
    LGG_Folders = os.listdir(r'C:\Users\Joe Krinke\Desktop\BraTS 2019 Data\MICCAI_BraTS_2019_Data_Training\LGG')
    return(HGG_Folders, LGG_Folders)

In [10]:
def load_nii_data(filename):
    """
    Load in an nii file and return an array containing the pixel data. 
    inputs:
        filename (string): The name of the nii file you want to load.
    outputs:
        array (uint8 numpy array): A numpy array of the given image.
    """
    image = nib.load(filename)
    array = image.get_fdata()
    array = array.astype(np.uint8)
    return(array)

In [44]:
def LGG_data_generator():
    """
    Create dataset of LGG data.
    inputs:
        path (string): The path where the folders containing the images are located.
    outputs:
        Yields image, mask tuples. Each mask appears with its corresponding image. Masks appear multiple times (once for each image type: t1,t2, etc.).
        
    """
    path = r'C:/Users/Joe Krinke/Desktop/BraTS 2019 Data/MICCAI_BraTS_2019_Data_Training/'
    image_types = ['flair', 't1', 't2', 't1ce']
    HGG_Folders, LGG_Folders = get_folders()
                         
    for i in LGG_Folders:
        mask = load_nii_data(path + 'LGG/' + i + '/' + i + '_' + 'seg' + '.nii.gz')
        for types in image_types:
            data = load_nii_data(path + 'LGG/' + i + '/' + i + '_' + types + '.nii.gz')
            img_counter = 0
            for j in range(data.shape[2]):
                img_counter += 1 
                if types == 'flair':
                    yield(data[:,:,j], mask[:,:,j])
                if types == 't1':
                    yield(data[:,:,j], mask[:,:,j])
                if types == 't2':
                    yield(data[:,:,j], mask[:,:,j])
                else:
                    yield(data[:,:,j], mask[:,:,j])

def HGG_data_generator():
    """ 
    Create dataset of HGG data.
    inputs:
        path (string): The path where the folders containing the images are located.
    outputs:
        Yields image, mask tuples. Each mask appears with its corresponding image. Masks appear multiple times (once for each image type: t1,t2, etc.).
        
    """
    path = r'C:/Users/Joe Krinke/Desktop/BraTS 2019 Data/MICCAI_BraTS_2019_Data_Training/'
    image_types = ['flair', 't1', 't2', 't1ce']
    HGG_Folders, LGG_Folders = get_folders()
    for i in HGG_Folders:
        mask = load_nii_data(path + 'HGG/' + i + '/' + i + '_' + 'seg' + '.nii.gz')
        for types in image_types:
            data = load_nii_data(path + 'HGG/'+ i + '/'+ i + '_' + types + '.nii.gz')
            img_counter = 0
            for j in range(data.shape[2]):
                img_counter += 1 
                if types == 'flair':
                    yield(data[:,:,j], mask[:,:,j])
                if types == 't1':
                    yield(data[:,:,j], mask[:,:,j])
                if types == 't2':
                    yield(data[:,:,j], mask[:,:,j])
                else:
                    yield(data[:,:,j], mask[:,:,j])

In [49]:
# Create both datasets
HGG_dataset = tf.data.Dataset.from_generator(HGG_data_generator, output_types = (tf.uint8, tf.uint8), output_shapes = (tf.TensorShape([240,240]), tf.TensorShape([240,240])))
LGG_dataset = tf.data.Dataset.from_generator(LGG_data_generator, output_types = (tf.uint8, tf.uint8), output_shapes = (tf.TensorShape([240,240]), tf.TensorShape([240,240])))

In [50]:
# Create master dataset with examples from both datasets
full_dataset = tf.data.experimental.sample_from_datasets([HGG_dataset, LGG_dataset], seed =123)