# 2D Image Segmentation of MRI scans

Steps:
1. Get dataset
1. Preprocess 2D images
    1. do a
    1. do b
1. Split dataset into train/val(/test) set
1. Setup a model
1. Set training and model parameters
1. Train model
1. Validate result


## Get dataset


In [18]:
!pip3 install natsort

Collecting natsort
  Downloading https://files.pythonhosted.org/packages/e7/13/a66bfa0ebf00e17778ca0319d081be686a33384d1f612fc8e0fc542ac5d8/natsort-6.0.0-py2.py3-none-any.whl
Installing collected packages: natsort
Successfully installed natsort-6.0.0


In [19]:
import os
import random
import re
from PIL import Image
from pathlib import Path

DATASET = "/Amsterdam_GE3T"
DATA_PATH = 'data_2d' + DATASET
SCAN_TYPE = 'T1'
FRAME_PATH = DATA_PATH
MASK_PATH = DATA_PATH

# Data path
# data_2d/<dataset>/<scan_id>/
# data_2d/<dataset>/<scan_id>/wmh_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/orig/3DT1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/orig/T1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/orig/FLAIR_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/pre/3DT1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/pre/T1_<slice_id>.tif
# data_2d/<dataset>/<scan_id>/pre/FLAIR_<slice_id>.tif


# Create folders to hold images and masks

folders = ['train_frames', 'train_masks', 'val_frames', 'val_masks', 'test_frames', 'test_masks']


# for folder in folders:
#       os.makedirs(DATA_PATH + folder)


# Get all frames and masks, sort them, shuffle them to generate data sets.

''' Recursively find all Images from a filepath matching the description 

    @return tuple (frames, masks) with list of all frames an accompanying mask
'''
def getAllImages(dataPath, scanType = "T1", preprocessed=True):
    if preprocessed:
        imageType = 'pre'
    else:
        imageType = 'orig'
    
    frameExpression = '**/{}/{}*.tiff'.format(imageType, scanType)
    maskExpression = '**/wmh_*.tiff' 
    a =  Path(DATA_PATH).glob(frameExpression)
    b =  Path(DATA_PATH).glob(maskExpression)

    while True:
        yield next(a), next(b)
    
    
    
# all_frames = os.listdir(FRAME_PATH)
# for frame in Path(DATA_PATH).glob('**/pre/{}*.tif'.format(SCAN_TYPE)):
#     print(frame)

# all_masks = os.listdir(MASK_PATH)

In [34]:
a = getAllImages(DATA_PATH)
all_frames = [str(x) for (x,_) in getAllImages(DATA_PATH) if 'checkpoint' not in str(x) ]

all_masks = [str(x) for (_,x) in getAllImages(DATA_PATH) if 'checkpoint' not in str(x)] 

from natsort import natsorted, ns
all_frames = natsorted(all_frames, alg=ns.IGNORECASE)
all_masks = natsorted(all_masks, alg=ns.IGNORECASE)

  
  after removing the cwd from sys.path.


In [54]:
print(len(all_frames))
print(len(all_masks))
arr = [random.randint(0,len(all_frames)) for _ in range(10)]
for i in arr:
    print('Random entry in data:')
    print(f'frame {all_frames[i]}, mask  {all_masks[i]}')


   

1020
1020
Random entry in data:
frame data_2d/Amsterdam_GE3T/105/pre/T1_45.tiff, mask  data_2d/Amsterdam_GE3T/105/wmh_45.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/126/pre/T1_28.tiff, mask  data_2d/Amsterdam_GE3T/126/wmh_28.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/132/pre/T1_39.tiff, mask  data_2d/Amsterdam_GE3T/132/wmh_39.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/100/pre/T1_22.tiff, mask  data_2d/Amsterdam_GE3T/100/wmh_22.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/116/pre/T1_28.tiff, mask  data_2d/Amsterdam_GE3T/116/wmh_28.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/126/pre/T1_25.tiff, mask  data_2d/Amsterdam_GE3T/126/wmh_25.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/103/pre/T1_26.tiff, mask  data_2d/Amsterdam_GE3T/103/wmh_26.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/115/pre/T1_16.tiff, mask  data_2d/Amsterdam_GE3T/115/wmh_16.tiff
Random entry in data:
frame data_2d/Amsterdam_GE3T/108/pre/T1_

In [58]:
# all_frames.sort(key=lambda var:[int(x) if x.isdigit() else x 
#                                 for x in re.findall(r'[^0-9]|[0-9]+', var)])
# all_masks.sort(key=lambda var:[int(x) if x.isdigit() else x 
#                                for x in re.findall(r'[^0-9]|[0-9]+', var)])
# for i in range(10):
#     print(all_frames[i])
#     print(all_masks[i])
#     print()

# Get all indices     
idxs = [x for x in range(len(all_frames))]


10

## Split dataset into train, val and testset

In [63]:
random.seed(230)
random.shuffle(idxs)

# Generate train, val, and test sets for frames

train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1


train_split = int(0.7*len(all_frames))
val_split = int(0.9 * len(all_frames))

train_idxs = idxs[:train_split]
val_idxs = idxs[train_split:val_split]
test_idxs = idxs[val_split:]

In [74]:
# Get corresponding frames
train_frames = [all_frames[idx] for idx in train_idxs]
val_frames = [all_frames[idx] for idx in val_idxs]
test_frames = [all_frames[idx] for idx in test_idxs]

# Get corresponding masks
train_masks = [all_masks[idx] for idx in train_idxs]
val_masks = [all_masks[idx] for idx in val_idxs]
test_masks = [all_masks[idx] for idx in test_idxs]


In [75]:
x= 100
print(idxs[x])
print(all_frames[idxs[x]])
print(train_frames[x])
print(all_masks[idxs[x]])
print(train_masks[x])

715
data_2d/Amsterdam_GE3T/115/pre/T1_17.tiff
data_2d/Amsterdam_GE3T/115/pre/T1_17.tiff
data_2d/Amsterdam_GE3T/115/wmh_17.tiff
data_2d/Amsterdam_GE3T/115/wmh_17.tiff


In [58]:
#Add train, val, test frames and masks to relevant folders


def add_frames(dir_name, image):
  
    img = Image.open(FRAME_PATH+image)
    img.save(DATA_PATH+'/{}'.format(dir_name)+'/'+image)
  
  

def add_masks(dir_name, image):  
    img = Image.open(MASK_PATH+image)
    img.save(DATA_PATH+'/{}'.format(dir_name)+'/'+image)


  
  
frame_folders = [(train_frames, 'train_frames'), (val_frames, 'val_frames'), 
                 (test_frames, 'test_frames')]

mask_folders = [(train_masks, 'train_masks'), (val_masks, 'val_masks'), 
                (test_masks, 'test_masks')]

# Add frames

for folder in frame_folders:
    array = folder[0]
    name = [folder[1]] * len(array)

    list(map(add_frames, name, array))
         
    
# Add masks

for folder in mask_folders:
    array = folder[0]
    name = [folder[1]] * len(array)
    
    list(map(add_masks, name, array))

FileNotFoundError: [Errno 2] No such file or directory: 'data_2d/Amsterdam_GE3Tdata_2d/Amsterdam_GE3T/107/pre/T1_53.tif'