In [None]:
'''
1. Make folder download/ACDC
2. Download dataset .zip file into download/ACDC
3. Make folder download/ACDC/database/data
4. Extract dataset.zip into download/ACDC
    archive.zip
        |- database
            |- training
                |-patientxxx
                    |-*.nii
            |- testing
                |-patientxxx
                    |-*.nii
5. Move all patient folder to download/ACDC/database/data
6. Move all file _.nii(3D image) into download/ACDC/image
7. Move all file _gt.nii(Groundtruth) into download/ACDC/label
8. Change name of each 3D-image of each patient into numerical order
    patient001
        |- patient001_frame01.nii => patient001_frame01.nii
        |- patient001_frame14.nii => patient001_frame02.nii
        |- patient001_frame01_gt.nii => patient001_frame01_gt.nii
        |- patient001_frame14_gt.nii => patient001_frame02_gt.nii
9. Change file .nii into file .h5 using SimpleITK library
    * Train, Val: convert into slices
        patient003_frame01.nii => |- patient003_frame01_slice_1.h5
                                  |- patient003_frame01_slice_2.h5
                                  |- ...
                                  |- patient003_frame01_slice_9.h5
                                  |- patient003_frame01_slice_10.h5
    * Test: Remain frames
        patient001_frame01.nii => patient001_frame01.h5
10. Move folder download/ACDC/data into data/ACDC

'''

Code for only linux

In [None]:
%mkdir download
%mkdir download/ACDC
%cd download/ACDC

# Download dataset ACDC
!gdown 1sB0geMpsve2Sz3HCbpIpx-JqiPdCHGTA

# Unzip
!unzip -q archive.zip

# Move all patient folders to one folder "data"
%mkdir database/data
!mv database/training/* database/data
!mv database/testing/* database/data

# Filter image and label to two folder
%mkdir image
%mkdir label
import os
import glob
patients = os.listdir('./database/data')
patients.remove('MANDATORY_CITATION.md')
for patient in patients:
    files = os.listdir(f'./database/data/{patient}')
    masks = sorted([file for file in files if file.count('_gt.nii') > 0])
    imgs = sorted([file for file in files if file.count('.nii') > 0 and file not in masks and file.count('_4d') == 0])
    for idx in range(len(imgs)):
        new_frame = '%s_frame%02d.nii'%(patient, idx + 1)
        new_gt_frame = '%s_frame%02d_gt.nii'%(patient, idx + 1)
        print(f'Old: {imgs[idx]} => New: {new_frame}')
        print(f'Old: {masks[idx]} => New: {new_gt_frame}')
        
        os.system(f'mv ./database/data/{patient}/{imgs[idx]} ./image/{new_frame}')
        os.system(f'mv ./database/data/{patient}/{masks[idx]} ./label/{new_gt_frame}')

In [None]:
# Install lib for processing data
%pip install SimpleITK

# Processing data
%mkdir data

import glob
import os

import h5py
import numpy as np
import SimpleITK as sitk

with open('../../data/ACDC/train.list', 'r') as f:
    train = f.readlines()
train = [item.replace('\n', '') for item in train]
with open('../../data/ACDC/val.list', 'r') as f:
    val = f.readlines()
val = [item.replace('\n', '') for item in val]
with open('../../data/ACDC/test.list', 'r') as f:
    test = f.readlines()
test = [item.replace('\n', '') for item in test]

slice_num = 0
mask_path = sorted(glob.glob("./image/*.nii"))

train_path = []
for sample in train:
    train_path += [path for path in mask_path if path.count(sample) > 0]
    
val_test_path = []
for sample in val + test:
    val_test_path += [path for path in mask_path if path.count(sample) > 0]

for case in train_path:
    img_itk = sitk.ReadImage(case)
    origin = img_itk.GetOrigin()
    spacing = img_itk.GetSpacing()
    direction = img_itk.GetDirection()
    image = sitk.GetArrayFromImage(img_itk)
    msk_path = case.replace("image", "label").replace(".nii", "_gt.nii")
    if os.path.exists(msk_path):
        print(msk_path)
        msk_itk = sitk.ReadImage(msk_path)
        mask = sitk.GetArrayFromImage(msk_itk)
        image = (image - image.min()) / (image.max() - image.min())
        print(image.shape)
        image = image.astype(np.float32)
        item = case.split("/")[-1].split(".")[0]
        if image.shape != mask.shape:
            print("Error")
        print(item)
        for slice_ind in range(image.shape[0]):
            f = h5py.File(
                './data/{}_slice_{}.h5'.format(item, slice_ind + 1), 'w')
            f.create_dataset(
                'image', data=image[slice_ind], compression="gzip")
            f.create_dataset('label', data=mask[slice_ind], compression="gzip")
            f.close()
            slice_num += 1
            
for case in val_test_path:
    img_itk = sitk.ReadImage(case)
    origin = img_itk.GetOrigin()
    spacing = img_itk.GetSpacing()
    direction = img_itk.GetDirection()
    image = sitk.GetArrayFromImage(img_itk)
    msk_path = case.replace("image", "label").replace(".nii", "_gt.nii")
    if os.path.exists(msk_path):
        print(msk_path)
        msk_itk = sitk.ReadImage(msk_path)
        mask = sitk.GetArrayFromImage(msk_itk)
        image = (image - image.min()) / (image.max() - image.min())
        print(image.shape)
        image = image.astype(np.float32)
        item = case.split("/")[-1].split(".")[0]
        if image.shape != mask.shape:
            print("Error")
        print(item)
        f = h5py.File(
                './data/{}.h5'.format(item), 'w')
        f.create_dataset(
                'image', data=image, compression="gzip")
        f.create_dataset('label', data=mask, compression="gzip")
        f.close()
print("Converted all ACDC volumes to 2D slices")
print("Total {} slices".format(slice_num))

# Move folder data to data/ACDC/
!mv data ../../data/ACDC

In [1]:
from zipfile import ZipFile 
import os 
  
def get_all_file_paths(directory): 
  
    # initializing empty file paths list 
    file_paths = [] 
  
    # crawling through directory and subdirectories 
    for root, directories, files in os.walk(directory): 
        for filename in files: 
            # join the two strings in order to form the full filepath. 
            filepath = os.path.join(root, filename) 
            file_paths.append(filepath) 
  
    # returning all file paths 
    return file_paths 

directory = './data/ACDC'
  
# calling function to get all file paths in the directory 
file_paths = get_all_file_paths(directory) 
  
# printing the list of all files to be zipped 
print('Following files will be zipped:') 
for file_name in file_paths: 
    print(file_name) 
  
# writing files to a zipfile 
with ZipFile('ACDC.zip','w') as zip: 
    # writing each file one by one 
    for file in file_paths: 
        zip.write(file) 
  
print('All files zipped successfully!')  

Following files will be zipped:
./data/ACDC/all_slices.list
./data/ACDC/val.list
./data/ACDC/train.list
./data/ACDC/train_slices.list
./data/ACDC/test.list
./data/ACDC/data/patient038_frame02_slice_7.h5
./data/ACDC/data/patient006_frame02_slice_7.h5
./data/ACDC/data/patient060_frame02_slice_3.h5
./data/ACDC/data/patient071_frame02_slice_4.h5
./data/ACDC/data/patient021_frame01_slice_2.h5
./data/ACDC/data/patient047_frame01_slice_6.h5
./data/ACDC/data/patient056_frame01_slice_1.h5
./data/ACDC/data/patient086_frame01_slice_7.h5
./data/ACDC/data/patient079_frame01_slice_6.h5
./data/ACDC/data/patient035_frame01_slice_11.h5
./data/ACDC/data/patient034_frame01_slice_8.h5
./data/ACDC/data/patient061_frame01_slice_5.h5
./data/ACDC/data/patient016_frame01_slice_6.h5
./data/ACDC/data/patient070_frame01_slice_2.h5
./data/ACDC/data/patient021_frame01_slice_10.h5
./data/ACDC/data/patient069_frame02_slice_7.h5
./data/ACDC/data/patient096_frame02_slice_6.h5
./data/ACDC/data/patient020_frame02_slice_4