In [35]:
import os

import gzip
from collections import Counter

import numpy as np
import pandas as pd

import SimpleITK as sitk

In [66]:
def im_path_to_arr(im_path):
    return sitk.GetArrayFromImage(sitk.ReadImage(im_path))


def j(path, fname):
    return os.path.join(path, fname)


def create_labels(directory):
    # hardcoded for Rembrandt dataset
    shapes = set()
    labels = set()
    if 'edema.nii.gz' in os.listdir(directory):
        with gzip.open(j(directory, 'edema.nii.gz'), 'rb') as fgz:
            with open(j(directory, 'edema.nii'), 'wb') as f:
                f.write(fgz.read())
        edema = im_path_to_arr(j(directory, 'edema.nii'))
        os.remove(j(directory, 'edema.nii'))
        shapes.add(edema.shape)
        labels.add('edema')
    if 'necrosis.nii.gz' in os.listdir(directory):
        with gzip.open(j(directory, 'necrosis.nii.gz'), 'rb') as fgz:
            with open(j(directory, 'necrosis.nii'), 'wb') as f:
                f.write(fgz.read())
        necrosis = im_path_to_arr(j(directory, 'necrosis.nii'))
        os.remove(j(directory, 'necrosis.nii'))
        shapes.add(necrosis.shape)
        labels.add('necrosis')
    if 'active.nii.gz' in os.listdir(directory):
        with gzip.open(j(directory, 'active.nii.gz'), 'rb') as fgz:
            with open(j(directory, 'active.nii'), 'wb') as f:
                f.write(fgz.read())
        active = im_path_to_arr(j(directory, 'active.nii'))
        os.remove(j(directory, 'active.nii'))
        shapes.add(active.shape)
        labels.add('active')
    assert(len(shapes) == 1)
    # return tuple(labels)
    return shapes.pop()

def create_data(directory):
    flair = im_path_to_arr(j(directory, 'flair_t1_bcorr_brain.nii'))
    t1post = im_path_to_arr(j(directory, 't1-post_pre_bcorr_brain.nii'))
    t1pre = im_path_to_arr(j(directory, 't1-pre_bcorr_brain.nii'))
    t2 = im_path_to_arr(j(directory, 't2_t1_bcorr_brain.nii'))
    
    shapes = set()
    shapes.add(flair.shape)
    shapes.add(t2.shape)
    shapes.add(t1pre.shape)
    shapes.add(t1post.shape)
    
    assert(len(shapes) == 1)
    return shapes.pop()

In [55]:
data_dir = '/labs/gevaertlab/data/tumor_segmentation/REMBRANDTVerified/'

# get the raw names of the folders to classify
all_files_path = []
patients = []
for patient in os.listdir(data_dir):
    patient_path = os.path.join(data_dir, patient)
    if os.path.isdir(patient_path):
        patients.append(patient_path)
        for filename in os.listdir(patient_path):
            filename_path = os.path.join(patient_path, filename)
            all_files_path.append(filename_path)

In [58]:
all_labels = []
for ex_pat in patients:
    all_labels.append(create_labels(ex_pat))
Counter(all_labels)

Counter({(): 1,
         ('active', 'edema'): 9,
         ('active', 'necrosis'): 1,
         ('active', 'necrosis', 'edema'): 31,
         ('edema',): 14,
         ('necrosis', 'edema'): 3})

In [67]:
all_shapes = []
for ex_pat in patients:
    if 'HF0899' not in ex_pat: # this patient has no tumor, needs to be removed
        lab = create_labels(ex_pat)
        dat = create_data(ex_pat)
        assert(lab == dat)
        all_shapes.append(dat)
Counter(all_shapes)

Counter({(30, 256, 256): 1,
         (34, 512, 512): 1,
         (48, 256, 256): 1,
         (50, 256, 256): 4,
         (51, 256, 256): 3,
         (52, 256, 256): 2,
         (53, 256, 256): 3,
         (54, 256, 256): 9,
         (55, 256, 256): 5,
         (56, 256, 256): 11,
         (57, 256, 256): 4,
         (58, 256, 256): 6,
         (59, 256, 256): 2,
         (60, 256, 256): 5,
         (64, 256, 256): 1})