# Transfer Learning - VGG16 Model


- Alzhemimer's Disease Neuroimaging Initiative (ADNI) dataset
- Three classes:
    - Normal control
    - Mild cognitive impairment
    - Alzheimer's disease
- 6 hospitals

* Paths to AD and CN classes must be set before running

In [1]:
import numpy as np
import tensorflow as tf
import os
import nibabel as nib
from scipy.ndimage import zoom
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
# Utility functions
def zoom_out(image, width=128, height=128):
    new_img = zoom(image, (width / image.shape[0], height / image.shape[1],1))
    return new_img

def read_nifti_file(filepath):
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    if scan.ndim == 3:
        scan = scan[:,:,:,np.newaxis]
    return scan

def normalize(volume):
    volume = 255*(volume - volume.min()) / (volume.max() - volume.min())
    volume = volume.astype("float32")
    return volume

def process_scan(path,nslices, H, W):
    volume = read_nifti_file(path)
    volume = normalize(volume)
    volume = np.concatenate((volume[:-2],volume[1:-1], volume[2:]), axis = -1)
    volume_slices = np.zeros((nslices,H,W,volume.shape[3]),dtype = np.float32)
    for (ii,jj) in enumerate(range(int(volume.shape[2] / 2 - nslices/2),int(volume.shape[2] / 2 + nslices/2))): 
        volume_slices[ii] = zoom_out(volume[:,:,jj,:], H, W)
    #volume_slices = normalize(volume_slices)
    return volume_slices

In [3]:
# Listing AD and NC subjects/volumes
root = '/path/to/AD'
ad_paths = []
for path, subdirs, files in os.walk(root):
    aux = path
    for name in files:
        if '.nii' in name:
            if name[0] != '.':
                ad_paths.append(aux + '/' + name)

root = '/path/to/CN'
nc_paths = []
for path, subdirs, files in os.walk(root):
    aux = path
    for name in files:
        if '.nii' in name:
            if name[0] != '.':
                nc_paths.append(aux + '/' + name)
                
print(len(ad_paths))       
print(len(nc_paths)) 

594
1129


In [4]:
nslices = 10 # Slices per volume to extract
H, W = 256, 256 # Resize dimensions
class_names = ["AD", "NC"]

all_paths = ad_paths + nc_paths 
y_subjects = [0]*len(ad_paths) + [1]*len(nc_paths) 

In [5]:
# Split train/val/test sets
paths_train, paths_test, y_subjects_train, y_subjects_test = \
train_test_split(all_paths, y_subjects,test_size=0.2, random_state=0,stratify =y_subjects)

paths_train, paths_val, y_subjects_train, y_subjects_val = \
train_test_split(paths_train, y_subjects_train, test_size=0.2, random_state=0,stratify =y_subjects_train)

hospitals = ['GE_15', 'GE_3', 'Philips_15', 'Philips_3' ,'Siemens_15', 'Siemens_3']

paths_train = np.array([f.split("\\")[6] for f in paths_train])
paths_val = np.array([f.split("\\")[6] for f in paths_val])
paths_test = np.array([f.split("\\")[6] for f in paths_test])

center_train = np.zeros(paths_train.size, dtype = int) 
center_val = np.zeros(paths_val.size, dtype = int)
center_test = np.zeros(paths_test.size, dtype = int)


for (counter,ii) in enumerate(hospitals):
    center_train[paths_train == ii] = counter
    center_val[paths_val == ii] = counter
    center_test[paths_test == ii] = counter
    
center_train = np.repeat(center_train,nslices)
center_val = np.repeat(center_val,nslices)
center_test = np.repeat(center_test,nslices)

In [6]:
X_train = np.zeros((nslices*len(paths_train),H,W,3), dtype = np.float32)
Y_train = np.repeat(y_subjects_train,nslices)

counter = 0
for img_file in paths_train:
    temp_img = process_scan(img_file,nslices, H, W)
    X_train[counter*nslices:(counter+1)*nslices] = temp_img
    counter+=1

In [7]:
X_val = np.zeros((nslices*len(paths_val),H,W,3), dtype = np.float32)
Y_val = np.repeat(y_subjects_val,nslices)

counter = 0
for img_file in paths_val:
    temp_img = process_scan(img_file,nslices, H, W)
    X_val[counter*nslices:(counter+1)*nslices] = temp_img
    counter+=1

In [None]:
X_test = np.zeros((nslices*len(paths_test),H,W,3), dtype = np.float32)
Y_test = np.repeat(y_subjects_test,nslices)

counter = 0
for img_file in paths_test:
    temp_img = process_scan(img_file,nslices, H, W)
    X_test[counter*nslices:(counter+1)*nslices] = temp_img
    counter+=1
print(X_test.min(),X_test.max())

In [None]:
# One-hot encoding
Y_train_oh = tf.keras.utils.to_categorical(Y_train, 2)
Y_val_oh = tf.keras.utils.to_categorical(Y_val, 2)
Y_test_oh = tf.keras.utils.to_categorical(Y_test, 2)

In [None]:
root = './adni/'

np.save(root + "X_train_ax.npz",X_train)
np.save(root + "X_val_ax.npz",X_val)
np.save(root + "X_test_ax.npz",X_test)

np.save(root + "Y_train_ax.npz",Y_train)
np.save(root + "Y_val_ax.npz",Y_val)
np.save(root + "Y_test_ax.npz",Y_test)

np.save(root + "center_train",center_train)
np.save(root + "center_val",center_val)
np.save(root + "center_test",center_test)