In [1]:
import os, sys, random, glob
import nibabel as nib
import numpy as np
from openpyxl import load_workbook
import pandas as pd
from itertools import islice

In [5]:
wb = load_workbook ( 'TCIA_LGG_cases_159.xlsx')
ws = wb.active
data = ws.values
cols = next(data)[1:]
data = list(data)
idx = [r[0] for r in data]
data = ( islice ( r,1,None) for r in data)

df = pd.DataFrame(data,index=idx,columns=cols)
df

Unnamed: 0,1p/19q,Grade,Type
LGG-210,n/n,2,Oligoastrocytoma
LGG-216,d/d,2,Oligoastrocytoma
LGG-219,n/n,3,Astrocytoma
LGG-220,d/d,2,Oligodendroglioma
LGG-223,d/d,3,Oligodendroglioma
LGG-225,d/d,2,Oligoastrocytoma
LGG-229,d/d,2,Oligodendroglioma
LGG-233,d/d,2,Oligodendroglioma
LGG-240,n/n,3,Astrocytoma
LGG-254,d/d,3,Oligodendroglioma


In [33]:
def lgg_generator ( dataPath, workbookFilename, batchSize=16, seed=1234 ):
    """
    A generator returning T1 slices as the 'X', and segmentations as the 'Y' and status as 'status'.
    Only slices with segmentation data are returned.
    
    Basic algorithm is 
      1. find all sub-directies of `dataPath`
      2. randomly shuffle the order
      3. load the image and segmentation
      4. find segmented slices
      5. fill the bathchX and batchY appropriately
      6. yield batchX and batchY when full
      7. continue forever
    """
    
    wb = load_workbook ( workbookFilename )
    
    import random
    random.seed(seed)
    # run forever:
    batchIdx = 0
    batchX = np.zeros ( ( batchSize, 1, 256, 256 ) )
    batchY = np.zeros ( ( batchSize, 1, 256, 256 ) )
    batchStatus = np.zeros ( (batchSize, 1 ) )
    while True:
        dirs = glob.glob ( os.path.join (dataPath, '*') )
        random.shuffle ( dirs )
        for dir in dirs:
            imageGlob = glob.glob ( os.path.join ( dir, '*T1*.nii.gz'))
            segmentationGlob = glob.glob ( os.path.join (dir, '*Segmentation.nii.gz') )
            
            if len(imageGlob) != 1 or len(segmentationGlob) != 1:
                print ( "skipping {}".format(dir))
                continue
            imageFilename = imageGlob[0]
            segmentationFilename = segmentationGlob[0]
            
            image = nib.load ( imageFilename )
            imageData = np.squeeze ( image.get_data() )
            segmentation = nib.load ( segmentationFilename )
            segmentationData = np.squeeze ( segmentation.get_data() )
            sliceIndex = np.nonzero ( np.sum ( segmentationData, axis=(0,1) ) )[0]
            
            # A few of the images are 256, so resample
            if imageData.shape[0] == 512 and segmentationData.shape[0] == 512:
                imageData = imageData[::2,::2,:]
                segmentationData = segmentationData[::2,::2,:]
            
            if imageData.shape[0] != 256:
                print ( "skipping {}, data is wrong shape.  Expected 256x256, got {}".format ( imageFilename, imageData.shape ))
                continue
            
            case = os.path.basename ( dir )
            if df.loc[case]['1p/19q'] == 'd/d':
                status = 1
            else:
                status = 0
                    
            
            for s in sliceIndex:
                batchX[batchIdx,0,:,:] = imageData[:,:,s]
                batchY[batchIdx,0,:,:] = segmentationData[:,:,s]
                batchStatus[batchIdx,0] = status
                batchIdx += 1
                if batchIdx == batchSize:
                    batchIdx = 0
                    yield batchX, batchY, batchStatus
  


In [None]:
def lgg_status_generator ( dataPath, workbookFilename, batchSize=16, seed=1234 ):
    """
    A generator returning T1 slices as the 'X', and 1p/19q deletion status as the 'Y'.
    Only slices with segmentation data are returned.
    
    Calls lgg_status_generator
    """
    gen = lgg_generator ( dataPath, workbookFilename, batchSize=batchSize, seed=seed )
    while True:
        x,y,status = next(gen)
        yield x, status
