This notebook will prepare the training data
for each input scan, get all of the images and prepare a label for them
for all input scans, stack them together so that we can feed into the model

In [18]:
#read nii files
import os
import numpy as np
import nibabel as nib

import pandas as pd
from matplotlib import pyplot as plt
from skimage.transform import resize

In [19]:
label_file_name = 'summary_label.csv'
nifti_folder = 'Nifti_Files'
new_size = (224, 224)
#the following 2 parameters can not be 1 at the same time
normalize_img = 0
#if 1, normalize to 0~255
#if 0, don't normalize, use raw numbers
three_channels = 1
#if 1, create 3 channels instead of the original 1 channel
#if 0, use the original 1 channle
#for the 3 channels, we use the following window:
#brain window: (l = 40, w = 80), i.e. 0~80 (everything <0 is 0, everything >80 is 255)
#bone window: (l = 500, w = 3000), i.e. -1000~2000
#subdural window: (l = 175, w = 50), i.e. 150~200
#l is the center of the luminance and w is the width
channel_param = ((0, 80), (-1000, 2000), (150, 200))

In [23]:
def create_channel(img, channel_min, channel_max):
    #this function will create channeled data based on the original single values data
    
    img_channel = img.copy()
    img_channel[img_channel <= channel_min] = 0
    img_channel[img_channel >= channel_max] = channel_max
    img_channel = 255 * (img_channel-np.amin(img_channel))/(np.amax(img_channel) - np.amin(img_channel))
    return img_channel

    

In [30]:
#we will define a function, given a numpy array of scan numbers, 
#it will prepare the image data and label for us
def prepare_image_and_label(scan_list, filename, label_file_name=label_file_name, nifti_folder=nifti_folder,
                           new_size=new_size, normalize_img=normalize_img, three_channels=three_channels):
    #input:
    #scan_list: a list of scan numbers, e.g. range(0, 3)
    #file name: a string, will be the name of the saved numpy array for both images and labels
    #label_file_name, the name of the label file
    #nifti_folder: the folder of all of the nifti files
    #new_size: the size of the resized images
    #normalize_img: whether normalize to 0~1 or keep the original range
    
    #get a list of all of the nifti file names
    #we will need to change the names here!
    scan_fn_list = []
    for i in scan_list:
        scan_fn_list.append(nifti_folder + '/CQ500-CT-' + str(i) + '/CQ500-CT-' + str(i) + '.nii.gz')

    #load in the label file
    labels = pd.read_csv(label_file_name)

    #use the scan name as indes
    labels.set_index('name', inplace=True)
    
    if 'img_input' in locals():
        del img_input

    if 'label_input' in locals():
        del label_input

    for i in range(0, len(scan_list)):
        #for each scan read in the nifti file
        scan_fn = scan_fn_list[i]
        img = nib.load(scan_fn)
        img_data = img.get_data()

        #make sure to clear the processed img from previous scans
        if 'scan_imgs' in locals():
            del scan_imgs
        
        if three_channels == 1:
            #4d
            scan_imgs=np.empty((new_size[0], new_size[1], len(channel_param), img_data.shape[-1]))
        else:
            #3d
            scan_imgs=np.empty((new_size[0], new_size[1], img_data.shape[-1]))
                

        #for each slice in the scan, resize it to the new size
        for j in range(0, img_data.shape[-1]):
            #resize
            one_img_resize = resize(img_data[:, :, j], new_size)

            if normalize_img == 1:
                #normalize to 0~255
                img_processed = 255 * (one_img_resize-np.amin(one_img_resize))/(np.amax(one_img_resize) - np.amin(one_img_resize))      
                scan_imgs[:,:,j] = img_processed
                
            else:
                if three_channels == 1:
                    #be careful! now each image will be 3D and we will be dealing with 4D data
                    #when they are stacked together
                
                    #first create an empty numpy array for the channeled ones
                    img_processed=np.empty((new_size[0], new_size[1], len(channel_param)))
            
                    for c in range(0, len(channel_param)):
                        img_processed[:, :, c] = create_channel(one_img_resize, channel_param[c][0], channel_param[c][1])
                        
                    scan_imgs[:,:,:,j] = img_processed
                else:
                    img_processed = one_img_resize
                    scan_imgs[:,:,j] = img_processed
            
        #also, pull out the corresponding label for each scan 
        #create a numpy array for all of the slices
        current_label = np.zeros((img_data.shape[-1], 1)) + labels['ICH']['CQ500-CT-'+str(i)]
        
        #finally, stack the images from multiple scans into a single numpy array
        if 'img_input' in locals():
            img_input = np.concatenate((img_input, scan_imgs), axis=-1)
        else:
            img_input = scan_imgs.copy()

        if 'label_input' in locals():
            label_input = np.concatenate((label_input, current_label), axis=0)
        else:
            label_input = current_label.copy()
    
    
    #save the prepared image and label input with a certain name 'filename'
    np.save(filename + '_img', img_input)
    np.save(filename + '_label', label_input)
    
    return img_input, label_input

In [31]:
img_input, label_input = prepare_image_and_label([1,2], 'train')

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [33]:
img_input = np.load('train_img.npy')
img_input.shape
label_input

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]])

The following are the original non-function code

In [114]:
#get a list of all of the nifti file names
#we will need to change the names here!
scan_list=range(0, 3)
scan_fn_list = []
for i in scan_list:
    scan_fn_list.append(nifti_folder + '/nifti_test' + str(i) + '.nii')
    
#load in the label file
labels = pd.read_csv(label_file_name)

#use the scan name as indes
labels.set_index('name', inplace=True)

In [115]:
if 'img_input' in locals():
    del img_input

if 'label_input' in locals():
    del label_input
    
for i in scan_list:
    #for each scan read in the nifti file
    scan_fn = scan_fn_list[i]
    img = nib.load(scan_fn)
    img_data = img.get_data()
    
    #create an empty numpy array for the processed images
    img_processed=np.empty((new_size[0], new_size[1], img_data.shape[-1]))
    
    #for each slice in the scan, resize it to the new size
    for i in range(0, img_data.shape[-1]):
        #resize
        one_img_resize = resize(img_data[:, :, i], new_size)
        
        if normalize_img == 1:
            #normalize to 0~1
            one_img_resize_norm = (one_img_resize-np.amin(one_img_resize))/(np.amax(one_img_resize) - np.amin(one_img_resize))

        
        #then stack back to the 36 images list
        img_processed[:,:,i] = one_img_resize
        
    
    #also, pull out the corresponding label for each scan 
    #create a numpy array for all of the slices
    current_label = np.zeros((img_data.shape[-1], 1)) + labels['ICH']['CQ500-CT-'+str(i)]
    
    #finally, stack the images from multiple scans into a single numpy array
    if 'img_input' in locals():
        img_input = np.dstack((img_input, img_processed))
    else:
        img_input = img_processed.copy()
        
    if 'label_input' in locals():
        label_input = np.concatenate((label_input, current_label), axis=0)
    else:
        label_input = current_label.copy()
        

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
