In [1]:
import os
import sys

In [4]:
def search_file_in_folder_list(folder_list, file_name):
    """ 
    search a file with a part of name in a list of folders
    :param folder_list: a list of folders
    :param file_name: a substring of a file
    :param output: the full file name
    """
    file_exist = False
    for folder in folder_list:
        full_file_name = os.path.join(folder, file_name)
        if(os.path.isfile(full_file_name)):
            file_exist = True
            break
    if(file_exist == False):
        raise ValueError('file not exist: {0:}'.format(file_name))
    return full_file_name

def write_data_list(output_file, data_folder_list, subject_list, img_postfix='_Image', label_postfix='_Label'):
    """ 
    create list of all file paths 
    :param data_folder_list: list of directories to search
    :param subject_list: list of subject prefix to search (expected filename: <subject_prefix><postfix>.nii.gz)
    :param img_postfix: postfix for image filenames 
    :param label_postfix: postfix for label filenames
    :return list of paths to existing files with matched name
    """
    
    if isinstance(data_folder_list, str):
        data_folder_list = [data_folder_list]
    if isinstance(subject_list, str):
        subject_list = [subject_list]
    
    full_list = []
    for scan_list in subject_list:
        with open(scan_list) as f:
            for line in f:
                subject = line.rstrip()
                image_basename = "{}{}.nii.gz".format(subject, img_postfix)
                image_filename = search_file_in_folder_list(data_folder_list, image_basename)
                label_basename = "{}{}.nii.gz".format(subject, label_postfix)
                label_filename = search_file_in_folder_list(data_folder_list, label_basename)
                if os.path.isfile(image_filename) and os.path.isfile(label_filename):
                    full_list.append({'img': image_filename, 'seg': label_filename})
                    
    of = open(output_file,"w+")
    for i in range(len(full_list)):
         of.write("{},{}\n".format(full_list[i]['img'], full_list[i]['seg']))

    of.close()
    return full_list
    
# list folders to search for the data
data_root = ["/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset/GroupA", 
             "/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset/GroupB1",
             "/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset/GroupB2", 
             "/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupC",
             "/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupD",
             "/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupE",
             "/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupF"]

# list of subject IDs to search for data
list_root = "/mnt/data/mranzini/Desktop/GIFT-Surg/Retraining_with_expanded_dataset/config/file_names"
training_list = os.path.join(list_root, "list_train_files.txt")
validation_list = [os.path.join(list_root, "list_validation_h_files.txt"),
                   os.path.join(list_root, "list_validation_p_files.txt")]

output_file_train = "/mnt/data/mranzini/Desktop/GIFT-Surg/FBS_Monai/train_monaifbs/list_training_files_monaifbs_dynUnet.txt"
output_file_valid = "/mnt/data/mranzini/Desktop/GIFT-Surg/FBS_Monai/train_monaifbs/list_validation_files_monaifbs_dynUnet.txt"
# 
train_files = write_data_list(output_file=output_file_train,
                              data_folder_list=data_root, 
                              subject_list=training_list, 
                              img_postfix='_Image', 
                              label_postfix='_Label')

print(len(train_files))
print(train_files[0])
print(train_files[-1])

val_files = write_data_list(output_file=output_file_valid,
                            data_folder_list=data_root, 
                            subject_list=validation_list, 
                            img_postfix='_Image', 
                            label_postfix='_Label')
print(len(val_files))
print(val_files[0])
print(val_files[-1])

316
{'img': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset/GroupA/a01_02_Image.nii.gz', 'seg': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset/GroupA/a01_02_Label.nii.gz'}
{'img': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupE/E18_02_Image.nii.gz', 'seg': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupE/E18_02_Label.nii.gz'}
50
{'img': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset/GroupA/a04_02_Image.nii.gz', 'seg': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset/GroupA/a04_02_Label.nii.gz'}
{'img': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupE/E11_08_Image.nii.gz', 'seg': '/mnt/data/mranzini/Desktop/GIFT-Surg/Data/NeuroImage_dataset_extension/GroupE/E11_08_Label.nii.gz'}
