# Pre-processing

In [1]:
import sys, os, glob, shutil
sys.path.append('../')

from MRIsegm.utils import get_slice, get_slice_info, get_rois, make_mask, mask_slice
from MRIsegm.processing import denoise_slice

from PIL import Image
from tqdm.notebook import trange



## Loading Data

In [2]:
src = '/Users/giuseppefilitto/Pazienti_anonym_sorted'

patients = os.listdir(src)
if '.DS_Store' in patients:
    patients.remove('.DS_Store')

bad_patients = [ 'BO9', 'BO17', 'BO28', 'BO36', 'BO37', 'BO39', 'BO40', 'BO54', 'BO72', 'BO77', 'BO86']

good_patients = list(set(patients) - set(bad_patients))

# ! Removing for test
good_patients.remove('BO38')
print("Number of good patients:",len(good_patients))

Number of good patients: 36


## Saving Train and Test images

### Training

In [3]:
progress_bar = trange(len(good_patients), desc="Progress")

for z, patient in enumerate(good_patients):

    #! slices
    folder = 'T2'
    slice_path = os.path.join(src, patient, folder)

    if not os.path.isdir(slice_path):
        slice_path =  slice_path + "AX"

        if not os.path.isdir(slice_path):
            slice_path = os.path.join(src, patient, 'T25mm')

            if not os.path.isdir(slice_path):
                slice_path = os.path.join(src, patient, 't2DEF')

    if z == 0:
        if len(good_patients) > 1:
            print(f'patient: {patient}', end=', ', flush=True )
        else:
            print(f'patient: {patient}', end='.', flush=True )

    elif z == len(good_patients) - 1 and len(good_patients) > 1:
        print(f'{patient}', end='.', flush=True )
    else:
        print(f'{patient}', end=', ', flush=True )

    slice_raw = get_slice(dir_path=slice_path)  

    alpha = 5
    slice = denoise_slice(slice_raw, alpha = alpha)

    roi_folder = 'T2ROI'
    roi_path = os.path.join(src, patient, roi_folder)

    #! ROIS
    roi = get_rois(roi_path=roi_path)
    

    slice_of_masks = mask_slice(slice=slice, rois=roi)
    

    positions = [roi[j].get('position') - 1 for j in range(len(roi))]

    
    for i in set(positions):

        img = Image.fromarray(slice[i, : ,:])
        filename = patient + '_layer_' + str(i) + '.png'
        dst = '../data/training/img/img'
        output =  os.path.join(dst, filename)

        if os.path.isfile(output):
            os.remove(output)
        
        img.save(output)

        mask = Image.fromarray(slice_of_masks[i, : ,:])
        filename = patient + '_mask_' + str(i) + '.png'
        dst = '../data/training/mask/img'
        output =  os.path.join(dst, filename)
        if os.path.isfile(output):
            os.remove(output)

        mask.save(output)
 
    progress_bar.update(1)    

progress_bar.close()    

Progress:   0%|          | 0/36 [00:00<?, ?it/s]

patient: BO75, BO56, BO78, BO90, BO74, BO52, BO64, BO66, BO18, BO68, BO51, BO48, BO2, BO43, BO31, BO85, BO47, BO42, BO45, BO26, BO49, BO61, BO1, BO11, BO16, BO71, BO63, BO82, BO35, BO60, BO44, BO32, BO76, BO29, BO33, BO50.

In [4]:
special_patients = ['BO38']

for z, patient in enumerate(special_patients):
   
    folders = ['T2AXAlta', 'T2AXBassa']
    roi_folders = ['T2ROIalta', 'T2ROIbassa']

    print(f'patient: {patient}', end=' ', flush=True )
        
    for k in range(len(folders)):

        slice_path = os.path.join(src, patient, folders[k])

        if k == len(folders) - 1:
            print(f'{folders[k]}', end='.', flush=True )
        else:
            print(f'folder: {folders[k]}', end=', ', flush=True )


        slice_raw = get_slice(dir_path=slice_path)  

        alpha = 5
        slice = denoise_slice(slice_raw, alpha = alpha)
 
        roi_path = os.path.join(src, patient, roi_folders[k])

        #! ROIS
        roi = get_rois(roi_path=roi_path)
        
        slice_of_masks = mask_slice(slice=slice, rois=roi)
        
        positions = [roi[j].get('position') - 1 for j in range(len(roi))]
     
        for i in set(positions):

            img = Image.fromarray(slice[i, : , :])
            filename = patient + '_' + str(folders[k]) + '_layer_' + str(i) + '.png'
            dst = '../data/training/img/img'
            output =  os.path.join(dst, filename)

            if os.path.isfile(output):
                os.remove(output)
            
            img.save(output)

            mask = Image.fromarray(slice_of_masks[i, : , :])
            filename = patient + '_' + str(roi_folders[k]) + '_mask_' + str(i) + '.png'
            dst = '../data/training/mask/img'
            output =  os.path.join(dst, filename)
            if os.path.isfile(output):
                os.remove(output)

            mask.save(output)



patient: BO38 folder: T2AXAlta, folder: T2AXBassa.

In [5]:
train_dir = '../data/training/img/img'
train_files = glob.glob(train_dir + '/*.png')

mask_dir = '../data/training/mask/img'
mask_files = glob.glob(mask_dir + '/*.png')



# ! validation
validation_dir = '../data/validation/img/img'
mask_validation_dir = '../data/validation/mask/img'

n_validation = 42

print("before validation split:", len(train_files))
print("validation images:", n_validation)
print("Training images after split:", len(train_files) - n_validation)

before validation split: 486
validation images: 42
Training images after split: 444


### Validation

In [6]:
for i in range(n_validation):
    
    dir, file = os.path.split(train_files[i])
    validation_img = os.path.join(validation_dir, file)

    img = train_files[i]
    if os.path.isfile(validation_img):
        os.remove(validation_img)

    shutil.move(img, validation_img)

    mask_dir = '../data/training/mask/img'
    mask_file = file.replace('_layer_','_mask_')
    
    mask = os.path.join(mask_dir, mask_file)
    mask_validation = os.path.join(mask_validation_dir, mask_file)
    if os.path.isfile(mask_validation):
        os.remove(mask_validation)

    shutil.move(mask, mask_validation)

### info

In [7]:
train_files = glob.glob(train_dir + '/*.png')
valid_files = glob.glob(validation_dir + '/*.png')

print("Training images:", len(train_files))
print("Validation images:", len(valid_files))


Training images: 444
Validation images: 42
