# All settings and configurations

In [None]:
import numpy as np

p = {
    'caffe_root': '/home/xubiker/dev/caffe_official/',
    'C3D_root': '/home/xubiker/dev/C3D-v1.1/',
    'caffe_dev_root': '/home/xubiker/dev/caffe/',
    'caffe_dev_modified_root': '/home/xubiker/dev/caffe_modified/',
    'DTYPE': np.float32,
    'ADNI2_DIR': '/home/xubiker/ADNI2/dataset/',
    'ADNI2_MODIFIED_DIR': '/home/xubiker/ADNI2_modified/dataset/',
    'DATA_EXT': '.nii',
    'TF_DATA_DIR': '/home/xubiker/dev/alzheimer/alzheimer_tf/data',
    'TF_DATA_EXT': '.tfrecords',
    'DATA_SPLIT_PRC': 0.6,
    'DO_CROP': True,
    'CROP_PRC': (0.05, 0.05, 0.05),
    'SHIFT_PRC': (0, 0, -0.05),
    'LMDB_SHUFFLE': True,
    'LMDB_SHUFFLE_SAME': True,
    'LMDB_PREFIX': 'alz'
}

p['IDIMS'] = [109, 131, 109] if p['DO_CROP'] else [121, 145, 121]
p['IDIMSFLT'] = p['IDIMS'][0] * p['IDIMS'][1] * p['IDIMS'][2]

label_code = {
    'AD': 2,
    'MCI': 1,
    'NC': 0
}

In [None]:
# function to load caffe
def load_caffe(caffe_version):
    """
    caffe_version:
    0 - official caffe,
    1 - C3D from facebook,
    2 - dev caffe,
    3 - modified dev caffe
    """
    import sys
    root = {
        0: p['caffe_root'],
        1: p['C3D_root'],
        2: p['caffe_dev_root'],
        3: p['caffe_dev_modified_root']
    }[caffe_version]
    pcr = root + "/python"
    if not pcr in sys.path:
        sys.path.append(pcr)

# Load caffe

In [None]:
load_caffe(3)
import caffe
print('caffe', caffe.__version__, 'loaded')

# Main code

In [None]:
from pylab import *
%matplotlib inline

Create LMDB from ADNI2 **[uncomment to use]**

In [None]:
# augmentation = {'n': 1, 'max_shift': 2, 'max_blur': 7}
# create_lmdb_from_adni2(p['ADNI2_MODIFIED_DIR'], p['LMDB_PREFIX'], augm_params = None, crop_prc=p['CROP_PRC'], shift_prc=p['SHIFT_PRC'])
# validate_lmdb_from_adni2(p['LMDB_PREFIX'], preview_only=False)

max_augm = {'shift': 2, 'blur': 1.2}
(train_lists, valid_lists, test_lists) = generate_lists_from_adni2(adni_root=p['ADNI2_MODIFIED_DIR'], max_augm_params=max_augm, augm_factor=5, shuffle_data=True, debug=True)


Calculate mean images for training databases and store them in binaryproto format **[uncomment to use]**

In [None]:
# mean_MRI = calc_lmdb_mean(p['LMDB_PREFIX'] + '_MRI_train', reshape_4D=True)
# mean_MD = calc_lmdb_mean(p['LMDB_PREFIX'] + '_MD_train', reshape_4D=True)
# array_to_proto(mean_MRI, p['LMDB_PREFIX'] + '_MRI_mean.binaryproto')
# array_to_proto(mean_MD, p['LMDB_PREFIX'] + '_MD_mean.binaryproto')

### Net parameters

In [None]:
train_net_path     = p['LMDB_PREFIX'] + '_train.prototxt'
test_net_path      = p['LMDB_PREFIX'] + '_test.prototxt'
solver_config_path = p['LMDB_PREFIX'] + '_solver.prototxt'
mean_MRI           = p['LMDB_PREFIX'] + '_MRI_mean.binaryproto'
mean_MD            = p['LMDB_PREFIX'] + '_MD_mean.binaryproto'

### Functions to create custom net and solver

In [None]:
def custom_net_straight(mri_lmdb, batch_size):

    import caffe
    from caffe import layers as L, params as P
    
    mri_train_data_lmdb = p['LMDB_PREFIX'] + '_MRI_train'
    
    n = caffe.NetSpec()
    
    sc = 1.0 / 255.0

    pool_type = P.Pooling.MAX #P.Pooling.AVE
    filler_type = dict(type='gaussian')#dict(type='xavier')
    
    # --- data layers ---
    n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=mri_lmdb, transform_param={'scale':sc, 'mean_file':mean_MRI}, ntop=2)
    n.resh1  = L.Reshape(n.data, reshape_param={'shape':{'dim': [batch_size, 1]+p['IDIMS']}})

    # --- first group of convolutional layers ---
    n.conv1 = L.Convolution(n.resh1, kernel_size=7, num_output=8, weight_filler=filler_type)
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=pool_type)
    n.relu1 = L.ReLU(n.pool1)
    
    # --- second group of convolutional layers ---
    n.conv2 = L.Convolution(n.relu1, kernel_size=5, num_output=16, weight_filler=filler_type)
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=pool_type)
    n.relu2 = L.ReLU(n.pool2)
    
    # --- third group of convolutional layers ---
    n.conv3 = L.Convolution(n.relu2, kernel_size=3, num_output=32, weight_filler=filler_type)
    n.pool3 = L.Pooling(n.conv3, kernel_size=2, stride=2, pool=pool_type)
    n.relu3 = L.ReLU(n.pool3)
    
    # --- fourth group of convolutional layers ---
    n.conv4 = L.Convolution(n.relu3, kernel_size=3, num_output=64, weight_filler=filler_type)
    n.pool4 = L.Pooling(n.conv4, kernel_size=2, stride=2, pool=pool_type)
    n.relu4 = L.ReLU(n.pool4)
    
    # --- first fully connected layer ---
    n.fc1      = L.InnerProduct(n.relu4, num_output=64)
    n.fcrelu1  = L.ReLU(n.fc1)
    
#     # --- second fully connected layer ---
#     n.fc2      = L.InnerProduct(n.fcrelu1, num_output=16)
#     n.fcrelu2  = L.ReLU(n.fc2)

    n.score    = L.InnerProduct(n.fcrelu1, num_output=3)
    n.loss     = L.SoftmaxWithLoss(n.score, n.label)
    
    return n.to_proto()

In [None]:
def custom_net_siamese(mri_lmdb, md_lmdb, batch_size):

    import caffe
    from caffe import layers as L, params as P
    
    mri_train_data_lmdb = p['LMDB_PREFIX'] + '_MRI_train'
    md_train_data_lmdb = p['LMDB_PREFIX'] + '_MD_train'
    
    n = caffe.NetSpec()
    
    sc = 1.0 / 255.0

    ## indicate engines if needed
    ## engine for convolution: engine=P.Convolution.CAFFE, p.Convolution.CUDNN
    ## engine for pooling: engine=P.Pooling.CAFFE, P.Pooling.CUDNN
    
    pool_type = P.Pooling.MAX #P.Pooling.AVE
    
    # --- data layers ---
    n.data_MRI, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=mri_lmdb, transform_param={'scale':sc, 'mean_file':mean_MRI}, ntop=2)
    n.data_MD = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=md_lmdb, transform_param={'scale':sc, 'mean_file':mean_MD}, ntop=1)
    n.resh1_MRI  = L.Reshape(n.data_MRI, reshape_param={'shape':{'dim': [batch_size, 1]+p['IDIMS']}})
    n.resh1_MD   = L.Reshape(n.data_MD, reshape_param={'shape':{'dim': [batch_size, 1]+p['IDIMS']}})

    # --- first group of convolutional layers ---
    n.conv1_MRI = L.Convolution(n.resh1_MRI, kernel_size=7, num_output=8, weight_filler=dict(type='xavier'))
    n.pool1_MRI = L.Pooling(n.conv1_MRI, kernel_size=2, stride=2, pool=pool_type)
    n.relu1_MRI = L.ReLU(n.pool1_MRI)

    n.conv1_MD = L.Convolution(n.resh1_MD, kernel_size=7, num_output=8, weight_filler=dict(type='xavier'))
    n.pool1_MD = L.Pooling(n.conv1_MD, kernel_size=2, stride=2, pool=pool_type)
    n.relu1_MD = L.ReLU(n.pool1_MD)
    
    # --- second group of convolutional layers ---
    n.conv2_MRI = L.Convolution(n.relu1_MRI, kernel_size=5, num_output=16, weight_filler=dict(type='xavier'))
    n.pool2_MRI = L.Pooling(n.conv2_MRI, kernel_size=2, stride=2, pool=pool_type)
    n.relu2_MRI = L.ReLU(n.pool2_MRI)

    n.conv2_MD = L.Convolution(n.relu1_MD, kernel_size=5, num_output=16, weight_filler=dict(type='xavier'))
    n.pool2_MD = L.Pooling(n.conv2_MD, kernel_size=2, stride=2, pool=pool_type)
    n.relu2_MD = L.ReLU(n.pool2_MD)
    
    # --- third group of convolutional layers ---
    n.conv3_MRI = L.Convolution(n.relu2_MRI, kernel_size=3, num_output=32, weight_filler=dict(type='xavier'))
    n.pool3_MRI = L.Pooling(n.conv3_MRI, kernel_size=2, stride=2, pool=pool_type)
    n.relu3_MRI = L.ReLU(n.pool3_MRI)

    n.conv3_MD = L.Convolution(n.relu2_MD, kernel_size=3, num_output=32, weight_filler=dict(type='xavier'))
    n.pool3_MD = L.Pooling(n.conv3_MD, kernel_size=2, stride=2, pool=pool_type)
    n.relu3_MD = L.ReLU(n.pool3_MD)
    
    # --- fourth group of convolutional layers ---
    n.conv4_MRI = L.Convolution(n.relu3_MRI, kernel_size=3, num_output=64, weight_filler=dict(type='xavier'))
    n.pool4_MRI = L.Pooling(n.conv4_MRI, kernel_size=2, stride=2, pool=pool_type)
    n.relu4_MRI = L.ReLU(n.pool4_MRI)

    n.conv4_MD = L.Convolution(n.relu3_MD, kernel_size=3, num_output=64, weight_filler=dict(type='xavier'))
    n.pool4_MD = L.Pooling(n.conv4_MD, kernel_size=2, stride=2, pool=pool_type)
    n.relu4_MD = L.ReLU(n.pool4_MD)
    
    # --- concatenation layer ---
    n.join = L.Concat(n.relu4_MRI, n.relu4_MD)
    
    # --- first fully connected layer ---
    n.fc1      = L.InnerProduct(n.join, num_output=64)
    n.fcrelu1  = L.ReLU(n.fc1)
    
    # --- second fully connected layer ---
    n.fc2      = L.InnerProduct(n.fcrelu1, num_output=16)
    n.fcrelu2  = L.ReLU(n.fc2)

    n.score    = L.InnerProduct(n.fcrelu2, num_output=3)
    n.loss     = L.SoftmaxWithLoss(n.score, n.label)
    
    return n.to_proto()

In [None]:
def custom_solver(train_net_path, test_net_path):

    import caffe
    from caffe.proto import caffe_pb2
    
    s = caffe_pb2.SolverParameter()

    s.random_seed = 0xCAFFE # Set a seed for reproducible experiments

    # Specify locations of the train and (maybe) test networks.
    s.train_net = train_net_path
    s.test_net.append(test_net_path)
    s.test_interval = 10  # Test after every _ training iterations.
    s.test_iter.append(100) # Test on _ batches each time we test.

    s.max_iter = 1000     # no. of times to update the net (training iterations)

    s.type = "Nesterov" # Adam", "Nesterov", ...

    s.base_lr = 0.1 # Set the initial learning rate
    s.momentum = 0.9 # Set momentum to accelerate learning
    s.weight_decay = 5e-4 # Set weight decay to regularize and prevent overfitting

    # Set `lr_policy` to define how the learning rate changes during training.
    s.lr_policy = 'inv'
    s.gamma = 0.1
    s.power = 0.5

    # Display the current training loss and accuracy every _ iterations.
    s.display = 10

    # Snapshots are files used to store networks we've trained.
    # We'll snapshot every _ iterations -- twice during training.
    s.snapshot = 100
    s.snapshot_prefix = 'alz_net'

    # Train on the GPU
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    return s

To get an idea of the architecture of the net, we can check the dimensions of the intermediate features (blobs) and parameters.

In [None]:
def check_blobs_dimensions(solver):
    print('The network is represented with the following blobs:')
    info = [(k, v.data.shape) for k, v in solver.net.blobs.items()]
    for (n, i) in info: print(n, '-', i)    

In [None]:
def configure_net_and_write_to_prototxt():
    # --- create train and test nets and load the solver ---
    pfx = p['LMDB_PREFIX']
    # --- for siamese:
#     with open(train_net_path, 'w') as f:
#         f.write(str(custom_net_siamese(pfx + '_MRI_train', pfx + '_MD_train')))    
#     with open(test_net_path, 'w') as f:
#         f.write(str(custom_net_siamese(pfx + '_MRI_test', pfx + '_MD_test')))
    # --- for straight:
    with open(train_net_path, 'w') as f:
        f.write(str(custom_net_straight(pfx + '_MRI_train', 7)))    
    with open(test_net_path, 'w') as f:
        f.write(str(custom_net_straight(pfx + '_MRI_test', 1)))
    with open(solver_config_path, 'w') as f:
        f.write(str(custom_solver(train_net_path, test_net_path)))

In [None]:
def run():
    
    import numpy as np

    caffe.set_device(0)
    caffe.set_mode_gpu()
    solver = None
    solver = caffe.get_solver(solver_config_path)
    
    check_blobs_dimensions(solver)

    # -------------------- solve --------------------
    print('Solving...')
    
    niter = 200
    test_interval = 10
    ntest = 100

    loss = np.zeros(niter).astype(np.int)
    acc = np.zeros(int(np.ceil(niter / test_interval)))

    # the main solver loop
    for it in range(niter):
        solver.step(1)
        loss[it] = solver.net.blobs['loss'].data
        if it % test_interval == 0:
            print('Iteration', it, 'testing...')
            confusion = np.zeros((3, 3)).astype(np.int)
            for test_it in range(ntest):
                solver.test_nets[0].forward()
                predicted_label = solver.test_nets[0].blobs['score'].data.argmax(1)[0]
                real_label = int(solver.test_nets[0].blobs['label'].data[0])
                confusion[predicted_label, real_label] += 1
            print(confusion)
            correct = np.trace(confusion)
            moment_accuracy = correct / ntest
            print('correct %d out of %d (%f)' % (correct, ntest, moment_accuracy))
            acc[it // test_interval] = correct / ntest

    _, ax1 = subplots()
    ax2 = ax1.twinx()
    ax1.plot(arange(niter), loss)
    ax2.plot(test_interval * arange(len(acc)), acc, 'r')
    ax1.set_xlabel('iteration')
    ax1.set_ylabel('train loss')
    ax2.set_ylabel('test accuracy')
    ax2.set_title('Custom Test Accuracy: {:.2f}'.format(acc[-1]))

### Configure and run the net

In [None]:
# configure_net_and_write_to_prototxt()
# run()