In [6]:
import os
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
from random import sample
import utils

In [195]:
if __name__ == '__main__':
    data_dir = '/mnt/qb/baumgartner/cschmidt77_data/acdc_challenge/train'
    preproc_dir = '/home/baumgartner/cschmidt77/devel/ralis/acdc/preproc_data/'
    dS_ratio = 0.03
    dT_ratio = 0.09
    dR_ratio = 0.11
    dV_ratio = 0.77
    dS, dT, dR, dV = create_splits('/mnt/qb/baumgartner/cschmidt77_data/acdc_challenge/train', dS_ratio, dT_ratio, dR_ratio)
    data_splits = create_splits_array(dS, dT, dR, dV)
    npy_file = create_ref_data_splits_file(data_dir, preproc_dir, data_splits, '2D', (256, 256), (1.36719, 1.36719),force_overwrite=True)

In [161]:
def create_splits(data_dir, dS_ratio, dT_ratio, dR_ratio):  
    '''divides patients in four data splits with given ratio'''
    '''returns four splits with patient ids'''
    #create numpy array with patients
    patients = np.asarray([int(folder[-3:]) for folder in os.listdir(data_dir)])
    # random shuffle patients data set and create splits
    np.random.shuffle(patients)
    # define indices for np.split function
    dS_ind = int(dS_ratio*len(patients))
    dT_ind = dS_ind+int(dT_ratio*len(patients))
    dR_ind = dT_ind+int(dR_ratio*len(patients))
    #splits the random set in four splits, where dV is the rest
    dS, dT, dR, dV = np.split(patients, [dS_ind, dT_ind, dR_ind])
    return dS, dT, dR, dV

In [192]:
def create_splits_array(dS, dT, dR, dV):
    keys = ['d_S', 'd_T', 'd_R', 'd_V']
    values = [dS, dT, dR, dV]
    data_splits = np.array(dict(zip(keys, values)))
    return data_splits

In [194]:
def create_ref_data_splits_file(data_dir,
              preproc_dir, data_splits,
              mode,
              size,
              target_resolution,
              force_overwrite):
    '''Creates a file, where data splits will be defined '''
    # create folder for processed data with config naming
    size_str = '_'.join([str(s) for s in size])
    res_str = '_'.join([str(res) for res in target_resolution])

    data_file_name = 'data_%s_size_%s_res_%s_al_splits.npy' % (mode, size_str, res_str)
    data_file_path = os.path.join(preproc_dir, data_file_name)
    #print(data_file_path)
    
    # create new npy file to save data
    npy_file = np.save(data_file_name, data_splits)  
    return npy_file

In [5]:
print(os.listdir(data_dir))

['patient001', 'patient002', 'patient003', 'patient004', 'patient005', 'patient006', 'patient007', 'patient008', 'patient009', 'patient010', 'patient011', 'patient012', 'patient013', 'patient014', 'patient015', 'patient016', 'patient017', 'patient018', 'patient019', 'patient020', 'patient021', 'patient022', 'patient023', 'patient024', 'patient025', 'patient026', 'patient027', 'patient028', 'patient029', 'patient030', 'patient031', 'patient032', 'patient033', 'patient034', 'patient035', 'patient036', 'patient037', 'patient038', 'patient039', 'patient040', 'patient041', 'patient042', 'patient043', 'patient044', 'patient045', 'patient046', 'patient047', 'patient048', 'patient049', 'patient050', 'patient051', 'patient052', 'patient053', 'patient054', 'patient055', 'patient056', 'patient057', 'patient058', 'patient059', 'patient060', 'patient061', 'patient062', 'patient063', 'patient064', 'patient065', 'patient066', 'patient067', 'patient068', 'patient069', 'patient070', 'patient071', 'pati

In [83]:
###################################deprecated
def create_splits_slowly(data_dir, dS_ratio, dT_ratio, dR_ratio, dV_ratio):
    '''create data splits depending on ratios'''
    patients = []
    #iterate over all folders in data directory (patient001, patient002,..., patient100)
    for pat_folder in os.listdir(data_dir):
        #patient folder path
        folder_path = os.path.join(data_dir, pat_folder)        
        if os.path.isdir(folder_path):
            #add patient names to list
            patients.append(pat_folder)
    
    num_patients = len(patients)
    num_dS = int(dS_ratio*num_patients)
    num_dT = int(dT_ratio*num_patients)
    num_dR = int(dR_ratio*num_patients)
    num_dV = num_patients - (num_dS+num_dT+num_dR)

    #sample without replacement from patients and delete the already sampled     
    dS = np.random.choice(patients, num_dS, replace=False)
    for elem in dS:
        patients.remove(elem)
    
    dT = np.random.choice(patients, num_dT, replace=False)
    for elem in dT:
        patients.remove(elem)
  
    dR = np.random.choice(patients, num_dR, replace=False)
    for elem in dR:
        patients.remove(elem)
  
    dV = np.random.choice(patients, num_dV, replace=False)
    return dS, dT, dR, dV

In [85]:

dS, dT, dR, dV = create_splits_slowly('/mnt/qb/baumgartner/cschmidt77_data/acdc_challenge/train', dS_ratio, dT_ratio, dR_ratio, dV_ratio)

In [157]:
'''
Idea for how to prepare data:
- Iterate over patient folder and store image 4D arrays (.nii.gz files) together with Info.cfg to one patient
- corresponding Info.cfg for each image file?
- all images belonging to one patient also in one data split? 
'''

In [158]:
dS, dT, dR, dV = create_splits('/mnt/qb/baumgartner/cschmidt77_data/acdc_challenge/train', dS_ratio, dT_ratio, dR_ratio)

In [196]:
#print(subdir): '/mnt/qb/baumgartner/cschmidt77_data/acdc_challenge/train'
#print(dirs): 'patient001', 'patient002',...
#print(files):  Info.cfg, patient001_4d.nii.gz, patient001_frame01.nii.gz
#patient001_frame01_gt.nii.gz, patient001_frame12.nii.gz, patient001_frame12_gt.nii.gz
# Attention! 2 times 'Info.cfg'
def prepare_data(data_dir):
    #dictionary with infos from info file for each patient
    info = {}
    # contains all info dictionaries
    infos_list = []
    
    diag_list = []
    for subdir, dirs, files in os.walk(data_dir): 
        for file in files:
            #print(os.path.join(subdir, file))  
            #iterate over info files and save infos in dictionary
            if 'Info.cfg' in file:
                for line in open(os.path.join(subdir, 'Info.cfg')):
                    label, value = line.split(':')
                    # to avoid storing empty labels
                    if label:
                        info[label] = value.rstrip('\n').lstrip(' ')
                infos_list.append(info)
                
                
                info = {}
            else:
                patient_id = dirs.lstrip('patient')
                print(patient_id)        
                
    return infos_list

In [197]:
prepare_data(data_dir)

AttributeError: 'list' object has no attribute 'lstrip'