# Model Building Template

Copy this template to build and evaluate models.

This assumes that you've already saved processed data to disk.

In [None]:
def load_data(data_dir: str) -> typing.Dict[str, np.ndarray]:
    """Returns a dictionary which maps patient ids
    to patient pixel data."""
    data_dict = {}
    for filename in os.listdir(data_dir):
        patient_id = filename[:-4] # remove .npy extension
        data_dict[patient_id] = np.load(pathlib.Path(data_dir) / filename)
    return data_dict

In [None]:
# LOAD YOUR PROCESSED DATA
processed_dict = ...

In [None]:
labels_df = pd.read_csv('<PATH/TO/PROCESSED/LABELS>',
                        index_col='patient_id')

## Preprocessing: Part III

The loaded data is a dictionary, we need to convert it to a numpy
array of the right dimensions before training.

We also need to apply some final transformations (normalization, shuffling, etc.)

See https://keras.io/utils/ and https://keras.io/preprocessing/image/#imagedatagenerator-class.

In [None]:
import random

In [None]:
def as_numpy_arrays(data: typing.Dict[str, np.ndarray],
                    labels: pd.DataFrame) -> (np.ndarray, np.ndarray):
    shuffled_ids = list(data.keys())
    random.shuffle(shuffled_ids)
    X_list = []
    y_list = []
    for id_ in shuffled_ids:
        X_list.append(data[id_])
        y_list.append(labels.loc[id_])
    return np.stack(X_list), np.stack(y_list)

In [None]:
# Here's an example
X_train, y_train = as_numpy_arrays(processed_dict, labels_df)
y_train = np_utils.to_categorical(y_train, 2)
print(X_train.shape, y_train.shape)

datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen.fit(X_train)

## Model Definition

Put your model definition below. Don't forget to update
the [architecture spreasheet](https://docs.google.com/spreadsheets/u/1/d/1frBnIXvA8B2b0im9cGipa8ndfpU5VhaZYpFTw-gQ35Q/edit#gid=0).

In [None]:
from keras import applications
from keras import optimizers
from keras import models, layers, objectives
from keras import backend as K

In [1]:
def sensitivity(y_true, y_pred):
    """We need high sensitivity as well as good 
    accuracy.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return true_positives / (possible_positives + K.epsilon())

In [None]:
model = models.Sequential()
# IMPLEMENT YOUR MODEL HERE
model.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy', sensitivity])

In [None]:
model.summary()

In [None]:
epochs = 100
batch_size = 32

In [None]:
history = model.fit(X, y,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_split=0.2)

## Model Evaluation

This contains code to evaluate the effectiveness of your model.

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt 

In [None]:
plt.figure()
plt.plot(history.history['acc'], 'orange', label='Training accuracy') 
plt.plot(history.history['val_acc'], 'blue', label='Validation accuracy') 
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.plot(history.history['loss'], 'red', label='Training loss') 
plt.plot(history.history['val_loss'], 'green', label='Validation loss') 
plt.legend()
plt.show()

The code below plots an image and it's label.
- TODO: Plot multiple at once.
- TODO: Configure to work with both 4D and 5D input X

In [None]:
def plot_Xy(X, y, i):
    plt.imshow(mip(X[i,:, :, 20:40, 0]))
    plt.title(f'predicted: {model.predict_classes(X[i:i+1])} actual: {y[i, 0]}')

In [None]:
for i in reversed(range(879)):
    plot_Xy(X, y, i)
    plt.show()
    input()

- TODO: Activation visualization
- TODO: Weight visualization
- TODO: Tensorboard

## Saving your Model

In [None]:
# Uncomment to save your model
# model.save('<PATH/TO/MODEL>.hdf5' )