## ACDC Preprocess Not Normalized!
1. Random split original training set to 80 training and 20 validation; and keep the 50 for testing
2. Extract ED and ES pair and their corresponding segmentation from the sequence 
3. Crop the image into 128 x 128 for each slice
4. No normalization is used at this point!!!

In [None]:
import numpy as np

def centre_crop(img, size, centre):
    img_new = np.zeros((size,size))
    h1 = np.amin([size//2, centre[0]])
    h2 = np.amin([size//2, img.shape[0]-centre[0]])
    w1 = np.amin([size//2, centre[1]])
    w2 = np.amin([size//2, img.shape[1]-centre[1]])
    # print(centre[1]-w1)
    # print(centre[1]+w2)
    img_new[size//2-h1:size//2+h2,size//2-w1:size//2+w2] = img[centre[0]-h1:centre[0]+h2,centre[1]-w1:centre[1]+w2]
    return img_new

In [5]:
import random
import os

# Place your training and testing data in the following directories
training_dir = '../../Dataset/ACDC/database/training/' 

patient_list = os.listdir(training_dir)
random.shuffle(patient_list)

train_list = patient_list[:80]
val_list = patient_list[80:]

testing_dir = '../../Dataset/ACDC/database/testing/'
test_list = os.listdir(testing_dir)

In [14]:
import os
import numpy as np
from scipy.io import loadmat, savemat
import nibabel as nib
from scipy import ndimage



def preprocess_ACDC(data_path, dest_path, name_list):
    numOfSamples = len(name_list)

    for i in range(numOfSamples):
        file_dir = os.path.join(data_path, name_list[i])

        with open(os.path.join(file_dir, 'Info.cfg')) as f:
            line1 = f.readline()
            line2 = f.readline()

        ED_idx = '{:02d}'.format(int(line1.split(':')[1]))
        ES_idx = '{:02d}'.format(int(line2.split(':')[1])) # type str

        im_ED = nib.load(os.path.join(file_dir, name_list[i]+'_frame'+ED_idx+'.nii.gz')).get_fdata()
        im_ES = nib.load(os.path.join(file_dir, name_list[i]+'_frame'+ES_idx+'.nii.gz')).get_fdata()

        seg_ED = nib.load(os.path.join(file_dir, name_list[i]+'_frame'+ED_idx+'_gt.nii.gz')).get_fdata()
        seg_ES = nib.load(os.path.join(file_dir, name_list[i]+'_frame'+ES_idx+'_gt.nii.gz')).get_fdata()

        myo_ED = (seg_ED == 2)
        myo_ES = (seg_ES == 2)

        # extract the center slice myocardium centroid to the original
        numOfSlices = im_ED.shape[-1]
        center_ED = np.round(ndimage.measurements.center_of_mass(myo_ED[:,:,numOfSlices//2])).astype(np.uint8)

        for z_idx in range(numOfSlices):
            im_ED_slice = centre_crop(im_ED[:,:,z_idx], size=128, centre=center_ED)
            im_ES_slice = centre_crop(im_ES[:,:,z_idx], size=128, centre=center_ED)

            myo_ED_slice = centre_crop(myo_ED[:,:,z_idx], size=128, centre=center_ED)
            myo_ES_slice = centre_crop(myo_ES[:,:,z_idx], size=128, centre=center_ED)

            file = {'im_ED': im_ED_slice, 'im_ES': im_ES_slice, 'myo_ED': myo_ED_slice, 'myo_ES': myo_ES_slice}

            file_name = name_list[i] + '_slice_' + str(z_idx) + '.mat'

            savemat(os.path.join(dest_path, file_name), file)



In [15]:
train_dest_dir = '../../Dataset/ACDC/train'
if not os.path.exists(train_dest_dir):
    os.makedirs(train_dest_dir)
preprocess_ACDC(training_dir, train_dest_dir, train_list)

In [16]:
val_dest_dir = '../../Dataset/ACDC/val'
if not os.path.exists(val_dest_dir):
    os.makedirs(val_dest_dir)
preprocess_ACDC(training_dir, val_dest_dir, val_list)

In [17]:
test_dest_dir = '../../Dataset/ACDC/test'
if not os.path.exists(test_dest_dir):
    os.makedirs(test_dest_dir)
preprocess_ACDC(testing_dir, test_dest_dir, test_list)

## CAMUS Preprocess Not Normalized!
1. Random split the original 500 images to 60% training, 20 validation, 20 testing
2. Extract ED/ES and segmentation for 2CH and 4CH
3. Resize the image to 128 x 128
4. No normalization is used at this point!!!

In [18]:
import random
import os

# Place your downloaded in the following directory
data_dir = '../../Dataset/CAMUS/database_nifti/'

patient_list = os.listdir(data_dir)
random.shuffle(patient_list)

train_list = patient_list[:300]
val_list = patient_list[300:400]
test_list = patient_list[400:]

In [24]:
import numpy as np
import nibabel as nib
from skimage.transform import resize

def preprocess_CAMUS(data_path, dest_path, name_list):
    numOfSamples = len(name_list)

    for i in range(numOfSamples):
        folder_dir = os.path.join(data_path, name_list[i])

        im_ED_2CH = nib.load(os.path.join(folder_dir, name_list[i]+'_2CH_ED.nii.gz')).get_fdata()
        im_ES_2CH = nib.load(os.path.join(folder_dir, name_list[i]+'_2CH_ES.nii.gz')).get_fdata()

        im_ED_4CH = nib.load(os.path.join(folder_dir, name_list[i]+'_4CH_ED.nii.gz')).get_fdata()
        im_ES_4CH = nib.load(os.path.join(folder_dir, name_list[i]+'_4CH_ES.nii.gz')).get_fdata()

        seg_ED_2CH = nib.load(os.path.join(folder_dir, name_list[i]+'_2CH_ED_gt.nii.gz')).get_fdata()
        seg_ES_2CH = nib.load(os.path.join(folder_dir, name_list[i]+'_2CH_ES_gt.nii.gz')).get_fdata()     

        seg_ED_4CH = nib.load(os.path.join(folder_dir, name_list[i]+'_4CH_ED_gt.nii.gz')).get_fdata()
        seg_ES_4CH = nib.load(os.path.join(folder_dir, name_list[i]+'_4CH_ES_gt.nii.gz')).get_fdata()

        out_shape = (128, 128)

        im_ED_2CH_resize = resize(im_ED_2CH, out_shape)
        im_ES_2CH_resize = resize(im_ES_2CH, out_shape)

        im_ED_4CH_resize = resize(im_ED_4CH, out_shape)
        im_ES_4CH_resize = resize(im_ES_4CH, out_shape)


        myo_ED_2CH = (seg_ED_2CH == 2)
        myo_ES_2CH = (seg_ES_2CH== 2)

        myo_ED_4CH = (seg_ED_4CH == 2)
        myo_ES_4CH = (seg_ES_4CH == 2)

        myo_ED_2CH_resize = np.round(resize(myo_ED_2CH, out_shape))
        myo_ES_2CH_resize = np.round(resize(myo_ES_2CH, out_shape))

        myo_ED_4CH_resize = np.round(resize(myo_ED_4CH, out_shape))
        myo_ES_4CH_resize = np.round(resize(myo_ES_4CH, out_shape))

        file_2CH = {'im_ED': im_ED_2CH_resize, 'im_ES': im_ES_2CH_resize, 'myo_ED': myo_ED_2CH_resize, 'myo_ES':myo_ES_2CH_resize}
        file_name_2CH = name_list[i] + '_2CH.mat'

        file_4CH = {'im_ED': im_ED_4CH_resize, 'im_ES': im_ES_4CH_resize, 'myo_ED': myo_ED_4CH_resize, 'myo_ES':myo_ES_4CH_resize}
        file_name_4CH = name_list[i] + '_4CH.mat'


        savemat(os.path.join(dest_path, file_name_2CH), file_2CH)

        savemat(os.path.join(dest_path, file_name_4CH), file_4CH)

In [25]:
train_dest_dir = '../../../Dataset/CAMUS/train'
if not os.path.exists(train_dest_dir):
    os.makedirs(train_dest_dir)
preprocess_CAMUS(data_dir, train_dest_dir, train_list)

In [None]:
val_dest_dir = '../../../Dataset/CAMUS/val'
if not os.path.exists(val_dest_dir):
    os.makedirs(val_dest_dir)
preprocess_CAMUS(data_dir, val_dest_dir, val_list)

In [None]:
test_dest_dir = '../../../Dataset/CAMUS/test'
if not os.path.exists(test_dest_dir):
    os.makedirs(test_dest_dir)
preprocess_CAMUS(data_dir, test_dest_dir, test_list)