# Imports

In [3]:
##############
# basic libs #
##############

import warnings
warnings.filterwarnings('ignore')
from __future__ import print_function
import os, dicom, sys, csv

###########
# science #
###########

import scipy as sp
import numpy as np
from scipy.stats import norm
from scipy.misc import imresize
from scipy import ndimage

#######
# ML #
######

from skimage.restoration import denoise_tv_chambolle
from keras.utils.generic_utils import Progbar
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.core import Activation, Dense, Flatten, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
from keras import backend as K

In [7]:
%reload_ext watermark
%watermark -a "Ken Cavagnolo" -n -u -v -m -h -g -p numpy,scipy,keras,theano,scikit-learn,scikit-image

Ken Cavagnolo 
Last updated: Thu Feb 18 2016 

CPython 2.7.10
IPython 4.0.3

numpy 1.10.4
scipy 0.17.0
keras 0.3.2
theano 0.8.0.dev0
scikit-learn 0.17
scikit-image 0.11.3

compiler   : GCC 5.2.1 20151010
system     : Linux
release    : 4.2.0-23-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 4
interpreter: 64bit
host name  : ubuntu
Git hash   : d789250ba37d65094a9a2958979a70b6344f7cba


# Functions

In [17]:
def crps(true, pred):
    return np.sum(np.square(true - pred)) / true.size

In [18]:
def real_to_cdf(y, sigma=1e-10):
    cdf = np.zeros((y.shape[0], 600))
    for i in range(y.shape[0]):
        cdf[i] = norm.cdf(np.linspace(0, 599, 600), y[i], sigma)
    return cdf

In [19]:
def preprocess(X):
    progbar = Progbar(X.shape[0])
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False)
        progbar.add(1)
    return X

In [20]:
def rotation_augmentation(X, angle_range):
    progbar = Progbar(X.shape[0])
    X_rot = np.copy(X)
    for i in range(len(X)):
        angle = np.random.randint(-angle_range, angle_range)
        for j in range(X.shape[1]):
            X_rot[i, j] = ndimage.rotate(X[i, j], angle, reshape=False, order=1)
        progbar.add(1)
    return X_rot

In [21]:
def shift_augmentation(X, h_range, w_range):
    progbar = Progbar(X.shape[0])
    X_shift = np.copy(X)
    size = X.shape[2:]
    for i in range(len(X)):
        h_random = np.random.rand() * h_range * 2. - h_range
        w_random = np.random.rand() * w_range * 2. - w_range
        h_shift = int(h_random * size[0])
        w_shift = int(w_random * size[1])
        for j in range(X.shape[1]):
            X_shift[i, j] = ndimage.shift(X[i, j], (h_shift, w_shift), order=0)
        progbar.add(1)
    return X_shift

# Pre-process

In [4]:
def crop_resize(img):
    if img.shape[0] < img.shape[1]:
        img = img.T
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
    img = crop_img
    img = imresize(img, img_shape)
    return img

In [5]:
def load_images(from_dir, verbose=True):
    print('-'*50)
    print('Loading all DICOM images from {0}...'.format(from_dir))
    print('-'*50)

    current_study_sub = ''  # saves the current study sub_folder
    current_study = ''  # saves the current study folder
    current_study_images = []  # holds current study images
    ids = []  # keeps the ids of the studies
    study_to_images = dict()  # dictionary for studies to images
    total = 0
    images = []  # saves 30-frame-images
    
    from_dir = from_dir if from_dir.endswith('/') else from_dir + '/'
    for subdir, _, files in os.walk(from_dir):
        subdir = subdir.replace('\\', '/')  # windows path fix
        subdir_split = subdir.split('/')
        study_id = subdir_split[-3]
        if "sax" in subdir:
            for f in files:
                image_path = os.path.join(subdir, f)
                if not image_path.endswith('.dcm'):
                    continue

                image = dicom.read_file(image_path)
                image = image.pixel_array.astype(float)
                image /= np.max(image)  # scale to [0,1]
                if img_resize:
                    image = crop_resize(image)

                if current_study_sub != subdir:
                    x = 0
                    try:
                        while len(images) < 30:
                            images.append(images[x])
                            x += 1
                        if len(images) > 30:
                            images = images[0:30]

                    except IndexError:
                        pass
                    current_study_sub = subdir
                    current_study_images.append(images)
                    images = []

                if current_study != study_id:
                    study_to_images[current_study] = np.array(current_study_images)
                    if current_study != "":
                        ids.append(current_study)
                    current_study = study_id
                    current_study_images = []
                images.append(image)
                if verbose:
                    if total % 1000 == 0:
                        print('Images processed {0}'.format(total))
                total += 1
    x = 0
    try:
        while len(images) < 30:
            images.append(images[x])
            x += 1
        if len(images) > 30:
            images = images[0:30]
    except IndexError:
        pass

    print('-'*50)
    print('All DICOM in {0} images loaded.'.format(from_dir))
    print('-'*50)

    current_study_images.append(images)
    study_to_images[current_study] = np.array(current_study_images)
    if current_study != "":
        ids.append(current_study)

    return ids, study_to_images

In [6]:
def map_studies_results():
    id_to_results = dict()
    train_csv = open('data/train.csv')
    lines = train_csv.readlines()
    i = 0
    for item in lines:
        if i == 0:
            i = 1
            continue
        id, diastole, systole = item.replace('\n', '').split(',')
        id_to_results[id] = [float(diastole), float(systole)]
    return id_to_results

In [7]:
def write_train_npy():
    print('-'*50)
    print('Writing training data to .npy file...')
    print('-'*50)

    study_ids, images = load_images('data/train')  # load images and their ids
    studies_to_results = map_studies_results()  # load the dictionary of studies to targets
    X = []
    y = []

    for study_id in study_ids:
        study = images[study_id]
        outputs = studies_to_results[study_id]
        for i in range(study.shape[0]):
            X.append(study[i, :, :, :])
            y.append(outputs)

    X = np.array(X, dtype=np.uint8)
    y = np.array(y)
    np.save('data/X_train.npy', X)
    np.save('data/y_train.npy', y)
    print('Done.')

In [8]:
def write_validation_npy():
    print('-'*50)
    print('Writing validation data to .npy file...')
    print('-'*50)

    ids, images = load_images('data/validate')
    study_ids = []
    X = []

    for study_id in ids:
        study = images[study_id]
        for i in range(study.shape[0]):
            study_ids.append(study_id)
            X.append(study[i, :, :, :])

    X = np.array(X, dtype=np.uint8)
    np.save('data/X_validate.npy', X)
    np.save('data/ids_validate.npy', study_ids)
    print('Done.')

In [10]:
img_resize = True
img_shape = (64, 64)

write_train_npy()
write_validation_npy()

--------------------------------------------------
Writing training data to .npy file...
--------------------------------------------------
--------------------------------------------------
Loading all DICOM images from data/train...
--------------------------------------------------
--------------------------------------------------
All DICOM in data/train/ images loaded.
--------------------------------------------------
Done.
--------------------------------------------------
Writing validation data to .npy file...
--------------------------------------------------
--------------------------------------------------
Loading all DICOM images from data/validate...
--------------------------------------------------
--------------------------------------------------
All DICOM in data/validate/ images loaded.
--------------------------------------------------
Done.


# Model

In [12]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

In [13]:
def center_normalize(x):
    return (x - K.mean(x)) / K.std(x)

In [14]:
def get_model():
    model = Sequential()
    model.add(Activation(activation=center_normalize, input_shape=(30, 64, 64)))

    model.add(Convolution2D(64, 3, 3, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 3, 3, border_mode='valid'))
    model.add(Activation('relu'))
    model.add(ZeroPadding2D(padding=(1, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(96, 3, 3, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(96, 3, 3, border_mode='valid'))
    model.add(Activation('relu'))
    model.add(ZeroPadding2D(padding=(1, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(128, 2, 2, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(128, 2, 2, border_mode='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(1024, W_regularizer=l2(1e-3)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))

    adam = Adam(lr=0.0001)
    model.compile(optimizer=adam, loss=root_mean_squared_error)
    return model

# Training

In [22]:
def load_train_data():
    X = np.load('data/X_train.npy')
    y = np.load('data/y_train.npy')
    X = X.astype(np.float32)
    X /= 255
    seed = np.random.randint(1, 10e6)
    np.random.seed(seed)
    np.random.shuffle(X)
    np.random.seed(seed)
    np.random.shuffle(y)
    return X, y

In [23]:
def split_data(X, y, split_ratio=0.2):
    split = X.shape[0] * split_ratio
    X_test = X[:split, :, :, :]
    y_test = y[:split, :]
    X_train = X[split:, :, :, :]
    y_train = y[split:, :]
    return X_train, y_train, X_test, y_test

In [24]:
def train():
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

In [None]:
train()

# Submit

In [26]:
def load_validation_data():
    X = np.load('data/X_validate.npy')
    ids = np.load('data/ids_validate.npy')

    X = X.astype(np.float32)
    X /= 255

    return X, ids

In [27]:
def accumulate_study_results(ids, prob):
    sum_result = {}
    cnt_result = {}
    size = prob.shape[0]
    for i in range(size):
        study_id = ids[i]
        idx = int(study_id)
        if idx not in cnt_result:
            cnt_result[idx] = 0.
            sum_result[idx] = np.zeros((1, prob.shape[1]), dtype=np.float32)
        cnt_result[idx] += 1
        sum_result[idx] += prob[i, :]
    for i in cnt_result.keys():
        sum_result[i][:] /= cnt_result[i]
    return sum_result

In [28]:
def submission():
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading models weights...')
    model_systole.load_weights('weights_systole_best.hdf5')
    model_diastole.load_weights('weights_diastole_best.hdf5')

    # load val losses to use as sigmas for CDF
    with open('val_loss.txt', mode='r') as f:
        val_loss_systole = float(f.readline())
        val_loss_diastole = float(f.readline())

    print('Loading validation data...')
    X, ids = load_validation_data()

    print('Pre-processing images...')
    X = preprocess(X)

    batch_size = 32
    print('Predicting on validation data...')
    pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    # real predictions to CDF
    cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole)
    cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('data/sample_submission_validate.csv'))
    f = open('submission.csv', 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(fi.next())
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')

In [None]:
submission()

# AWS GPU Compute

* [AWS console](https://console.aws.amazon.com)
* [A how-to guide here](http://markus.com/install-theano-on-aws)
* [Some optimizations](http://techblog.netflix.com/2014/02/distributed-neural-networks-with-gpus.html)
* [64-bit cuda Ubuntu repo](http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64)

ID: i-481b01cd

ZN: us-east-1b

DI: i-481b01cd:/dev/sda1

```shell
# access instance
ssh -i ~/.ssh/aws.pem ubuntu@[DNS]

# update instance
sudo apt-get update
sudo apt-get -y dist-upgrade
sudo apt-get install -y gcc g++ gfortran build-essential git wget linux-image-generic libopenblas-dev python-dev python-pip python-nose emacs24 tcsh lynx zip libfreetype6-dev libxft-dev libblas-dev liblapack-dev libatlas-base-dev reptyr screen libhdf5-dev

# install cuda to make GPU accel possible
sudo wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_7.5-18_amd64.deb
sudo dpkg -i cuda-repo-ubuntu1404_7.5-18_amd64.deb
sudo apt-get update
sudo apt-get install -y cuda

# install cudnn; speeds process by ~20%
wget https://s3.amazonaws.com/emolson/pydata/cudnn-7.0-linux-x64-v3.0-prod.tgz
tar -xzvf cudnn-7.0-linux-x64-v3.0-prod.tgz
sudo cp cuda/lib64/* /usr/local/cuda/lib64
sudo cp cuda/include/* /usr/local/cuda/include

# get cuda into cshrc
echo -e "setenv CUDA_HOME /usr/local/cuda" >> ~/.cshrc
echo -e "set path=($path $CUDA_HOME/bin)" >> ~/.cshrc
echo -e "setenv LD_LIBRARY_PATH $CUDA_HOME/lib64" >> ~/.cshrc

# reboot and test
sudo reboot
ssh -i ~/.ssh/aws.pem ubuntu@[DNS]
cuda-install-samples-7.5.sh ~/
cd NVIDIA_CUDA-7.5_Samples/1_Utilities/deviceQuery; make; ./deviceQuery

# install bleeding edge theano and deps
sudo pip install cython h5py pyyaml ipython pydicom
sudo pip install --upgrade git+git://github.com/Theano/Theano.git
sudo pip install keras

# opt theanorc
emacs ~/.theanorc

[global]
floatX = float32
device = gpu
allow_gc = False
[mode] = FAST_RUN
optimizer_including = cudnn

[lib]
cnmem = 0.9

[nvcc]
fastmath = True

[cuda]
root = /usr/local/cuda

# set cookies for lynx
emacs ~/.lynxrc 

SET_COOKIES:TRUE
ACCEPT_ALL_COOKIES:TRUE
PERSISTENT_COOKIES:TRUE
COOKIE_FILE:~/.lynx_cookies
COOKIE_SAVE_FILE:~/.lynx_cookies
```

**Above improvements took compute time from 3.6 min per iteration to 1.9 min per, a 2x improvement.** Test script is below

In [None]:
# test the gpu is functioning
# copy code to test.py and run
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 768
iters = 1000
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in range(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')

To get data off Kaggle to AWS:

```shell
lynx
lynx -cfg=~/.lynxrc www.kaggle.com
#<navigate to login, check 'Remember me' and exit>
mv ~/.lynx_cookies ~/cookies
wget -c --load-cookies=/home/ubuntu/cookies https://www.kaggle.com/c/second-annual-data-science-bowl/download/validate.zip
wget -c --load-cookies=/home/ubuntu/cookies https://www.kaggle.com/c/second-annual-data-science-bowl/download/train.zip
wget -c --load-cookies=/home/ubuntu/cookies https://www.kaggle.com/c/second-annual-data-science-bowl/download/train.csv.zip
wget -c --load-cookies=/home/ubuntu/cookies https://www.kaggle.com/c/second-annual-data-science-bowl/download/sample_submission_validate.csv.zip
```

Run the ml pipeline:
```shell
screen -S theano
tcsh
cd kaggle_comp/
python data.py
python train.py
python submission.py
```

Forget to <code>screen</code> before running the above?
```shell
ps aux | grep python # note the pid
reptyr <pid>
```

If above throws an operation denied error, run the below and re-try the above:
```shell
sudo emacs -nw /etc/sysctl.d/10-ptrace.conf
# set kernel.yama.ptrace_scope = 0
sudo sysctl -p /etc/sysctl.d/10-ptrace.conf
```