# 2D Image Segmentation of MRI scans

Steps:
1. Get dataset
1. Preprocess 2D images
    1. do a
    1. do b
1. Split dataset into train/val(/test) set
1. Setup a model
1. Set training and model parameters
1. Train model
1. Validate result


## Get dataset


In [45]:
import os
import random
import re
from PIL import Image
from pathlib import Path

DATASET = "/Amsterdam_GE3T"
DATA_PATH = 'data_2d' + DATASET
SCAN_TYPE = 'T1'
FRAME_PATH = DATA_PATH
MASK_PATH = DATA_PATH

# Data path
# data_2d/<dataset>/<scan_id>/
# data_2d/<dataset>/<scan_id>/wmh_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/orig/3DT1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/orig/T1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/orig/FLAIR_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/pre/3DT1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/pre/T1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/pre/FLAIR_<slice_id>.tif


# Create folders to hold images and masks

folders = ['train_frames', 'train_masks', 'val_frames', 'val_masks', 'test_frames', 'test_masks']


# for folder in folders:
#       os.makedirs(DATA_PATH + folder)


# Get all frames and masks, sort them, shuffle them to generate data sets.

''' Recursively find all Images from a filepath matching the description 

    @return tuple (frames, masks) with list of all frames an accompanying mask
'''
def getAllImages(dataPath, scanType = "T1", preprocessed=True):
    if preprocessed:
        imageType = 'pre'
    else:
        imageType = 'orig'
    
    frameExpression = '**/{}/{}*.tif'.format(imageType, scanType)
    maskExpression = '**/wmh_*.tif' 
    a =  Path(DATA_PATH).glob(frameExpression)
    b =  Path(DATA_PATH).glob(maskExpression)

    while True:
        yield next(a), next(b)
    
    
    
# all_frames = os.listdir(FRAME_PATH)
# for frame in Path(DATA_PATH).glob('**/pre/{}*.tif'.format(SCAN_TYPE)):
#     print(frame)

# all_masks = os.listdir(MASK_PATH)

In [54]:
a = getAllImages(DATA_PATH)
all_frames = [str(x) for (x,_) in getAllImages(DATA_PATH)]
all_masks = [str(x) for (_,x) in getAllImages(DATA_PATH)]

  
  This is separate from the ipykernel package so we can avoid doing imports until


In [55]:
print(len(all_frames))
print(len(all_masks))
for i in range(10):
    print(all_frames[i])
    print(all_masks[i])
    print()

   

1020
1020
data_2d/Amsterdam_GE3T/100/pre/T1_34.tif
data_2d/Amsterdam_GE3T/100/wmh_35.tif

data_2d/Amsterdam_GE3T/100/pre/T1_16.tif
data_2d/Amsterdam_GE3T/100/wmh_16.tif

data_2d/Amsterdam_GE3T/100/pre/T1_17.tif
data_2d/Amsterdam_GE3T/100/wmh_17.tif

data_2d/Amsterdam_GE3T/100/pre/T1_18.tif
data_2d/Amsterdam_GE3T/100/wmh_18.tif

data_2d/Amsterdam_GE3T/100/pre/T1_19.tif
data_2d/Amsterdam_GE3T/100/wmh_19.tif

data_2d/Amsterdam_GE3T/100/pre/T1_20.tif
data_2d/Amsterdam_GE3T/100/wmh_20.tif

data_2d/Amsterdam_GE3T/100/pre/T1_21.tif
data_2d/Amsterdam_GE3T/100/wmh_21.tif

data_2d/Amsterdam_GE3T/100/pre/T1_22.tif
data_2d/Amsterdam_GE3T/100/wmh_22.tif

data_2d/Amsterdam_GE3T/100/pre/T1_23.tif
data_2d/Amsterdam_GE3T/100/wmh_23.tif

data_2d/Amsterdam_GE3T/100/pre/T1_24.tif
data_2d/Amsterdam_GE3T/100/wmh_24.tif



In [57]:
all_frames.sort(key=lambda var:[int(x) if x.isdigit() else x 
                                for x in re.findall(r'[^0-9]|[0-9]+', var)])
all_masks.sort(key=lambda var:[int(x) if x.isdigit() else x 
                               for x in re.findall(r'[^0-9]|[0-9]+', var)])
for i in range(10):
    print(all_frames[i])
    print(all_masks[i])
    print()

    
random.seed(230)
random.shuffle(all_frames)


# Generate train, val, and test sets for frames

train_split = int(0.7*len(all_frames))
val_split = int(0.9 * len(all_frames))

train_frames = all_frames[:train_split]
val_frames = all_frames[train_split:val_split]
test_frames = all_frames[val_split:]


# Generate corresponding mask lists for masks

train_masks = [f for f in all_masks if f in train_frames]
val_masks = [f for f in all_masks if f in val_frames]
test_masks = [f for f in all_masks if f in test_frames]

data_2d/Amsterdam_GE3T/100/pre/T1_16.tif
data_2d/Amsterdam_GE3T/100/.ipynb_checkpoints/wmh_16-checkpoint.tif

data_2d/Amsterdam_GE3T/100/pre/T1_17.tif
data_2d/Amsterdam_GE3T/100/wmh_16.tif

data_2d/Amsterdam_GE3T/100/pre/T1_18.tif
data_2d/Amsterdam_GE3T/100/wmh_17.tif

data_2d/Amsterdam_GE3T/100/pre/T1_19.tif
data_2d/Amsterdam_GE3T/100/wmh_18.tif

data_2d/Amsterdam_GE3T/100/pre/T1_20.tif
data_2d/Amsterdam_GE3T/100/wmh_19.tif

data_2d/Amsterdam_GE3T/100/pre/T1_21.tif
data_2d/Amsterdam_GE3T/100/wmh_20.tif

data_2d/Amsterdam_GE3T/100/pre/T1_22.tif
data_2d/Amsterdam_GE3T/100/wmh_21.tif

data_2d/Amsterdam_GE3T/100/pre/T1_23.tif
data_2d/Amsterdam_GE3T/100/wmh_22.tif

data_2d/Amsterdam_GE3T/100/pre/T1_24.tif
data_2d/Amsterdam_GE3T/100/wmh_23.tif

data_2d/Amsterdam_GE3T/100/pre/T1_25.tif
data_2d/Amsterdam_GE3T/100/wmh_24.tif



In [58]:
#Add train, val, test frames and masks to relevant folders


def add_frames(dir_name, image):
  
    img = Image.open(FRAME_PATH+image)
    img.save(DATA_PATH+'/{}'.format(dir_name)+'/'+image)
  
  

def add_masks(dir_name, image):  
    img = Image.open(MASK_PATH+image)
    img.save(DATA_PATH+'/{}'.format(dir_name)+'/'+image)


  
  
frame_folders = [(train_frames, 'train_frames'), (val_frames, 'val_frames'), 
                 (test_frames, 'test_frames')]

mask_folders = [(train_masks, 'train_masks'), (val_masks, 'val_masks'), 
                (test_masks, 'test_masks')]

# Add frames

for folder in frame_folders:
    array = folder[0]
    name = [folder[1]] * len(array)

    list(map(add_frames, name, array))
         
    
# Add masks

for folder in mask_folders:
    array = folder[0]
    name = [folder[1]] * len(array)
    
    list(map(add_masks, name, array))

FileNotFoundError: [Errno 2] No such file or directory: 'data_2d/Amsterdam_GE3Tdata_2d/Amsterdam_GE3T/107/pre/T1_53.tif'