# Imports

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import math
import os

from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.initializations import get_fans, normal
from keras.layers import Convolution2D, Dense, Dropout, Flatten, Input, MaxPooling2D, merge
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras.utils.np_utils import to_categorical
import keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import tensorflow as tf

# from breast_cancer import input_data

plt.rcParams['figure.figsize'] = (10, 6)

# Settings

In [None]:
# os.environ['CUDA_VISIBLE_DEVICES'] = ""  #0"
# c = tf.ConfigProto()
# c.gpu_options.visible_device_list="0"
# sess = tf.Session(config=c)
# K.set_session(sess)

In [None]:
SIZE = 256
CHANNELS = 3
FEATURES = SIZE * SIZE * CHANNELS
CLASSES = 3
p = 0.01
val_p = 0.01
batch_size = 32  # 1/2 per GPU
num_gpus = 2
use_caching = False

In [None]:
EXPERIMENT = "lenet-{p}%-multi-gpu-{batch_size}-batch-size-sanity".format(p=p, batch_size=batch_size)
# EXPERIMENT = "lenet-{p}%-multi-gpu-{batch_size}-batch-size".format(p=p, batch_size=batch_size)

# Setup experiment directory

In [None]:
def get_run_dir(path):
  """Create a new directory for this training run."""
  os.makedirs(path, exist_ok=True)
  run = len(os.listdir(path))  # run 0, 1, 2, ...
  run_dir = os.path.join(path, str(run))
  os.makedirs(run_dir)
  return run_dir

def get_experiment_dir(experiment):
  """Create an experiment directory for this experiment."""
  base_dir = os.path.join("experiments", "keras", experiment)
  exp_dir = get_run_dir(base_dir)
  return exp_dir

exp_dir = get_experiment_dir(EXPERIMENT)
print(exp_dir)

# Create train & val data generators

In [None]:
tr_save_dir = "images/{stage}/{p}".format(stage="train", p=p)
val_save_dir = "images/{stage}/{p}".format(stage="val", p=val_p)
print(tr_save_dir, val_save_dir)

In [None]:
train_datagen = ImageDataGenerator()
val_datagen = ImageDataGenerator()
train_generator_orig = train_datagen.flow_from_directory(tr_save_dir, batch_size=batch_size)
val_generator_orig = val_datagen.flow_from_directory(val_save_dir, batch_size=batch_size)

In [None]:
# def preprocess_input(x):
#   """
#   Preprocesses a tensor encoding a batch of images.

#   Adapted from keras/applications/imagenet_utils.py

#   # Arguments
#       x: input Numpy tensor, 4D of shape (N, H, W, C).
#   # Returns
#       Preprocessed tensor.
#   """
#   # Zero-center by subtracting mean pixel value per channel
#   # based on means from a 50%, evenly-distributed sample.
#   x[:, :, :, 0] -= 193.84669495
#   x[:, :, :, 1] -= 144.05839539
#   x[:, :, :, 2] -= 180.20623779
#   # 'RGB'->'BGR' 
#   #x = x[:, :, :, ::-1]
#   x /= 255
#   return x

def preprocess_input(x):
    x /= 255.
    x -= 0.5
    x *= 2.
    return x

# Multi-GPU exploitation
def split(x, num_splits):
  """Split batch into K equal-sized batches."""
  # Split tensors evenly, even if it means throwing away a few examples.
  samples = x.shape[0] // num_splits
  x_splits = [arr[:samples] for arr in np.array_split(x, num_splits)]
  return x_splits

def gen_preprocessed_batch(batch_generator):
  """Yield preprocessed batches of x,y data."""
  for xs, ys in batch_generator:
    yield split(preprocess_input(xs), num_gpus), split(ys, num_gpus)

In [None]:
# Create train & val preprocessed generators
train_generator = gen_preprocessed_batch(train_generator_orig)
val_generator = gen_preprocessed_batch(val_generator_orig)

## Get number of samples

In [None]:
tc = train_generator_orig.nb_sample
vc = val_generator_orig.nb_sample
print(tc, vc)

## Generate class weights for training

In [None]:
class_counts = np.bincount(train_generator_orig.classes)
class_weights = dict(zip(range(CLASSES), min(class_counts) / class_counts))
print(class_counts)
print(class_weights)

## Fetch random image

In [None]:
def show_random_image(save_dir):
  c = np.random.randint(1, 4)
  class_dir = os.path.join(save_dir, str(c))
  files = os.listdir(class_dir)
  i = np.random.randint(0, len(files))
  fname = os.path.join(class_dir, files[i])
  print(fname)
  img = Image.open(fname)
  plt.imshow(img)

In [None]:
# show_random_image(tr_save_dir)

# Keras

## LeNet-like Model

In [None]:
# Setup training callbacks
# Careful, TensorBoard callback could OOM with large validation set
# TODO: Add input images to TensorBoard output (maybe as a separate callback)
# TODO: Monitor size of input queues with callbacks
model_filename = os.path.join(exp_dir, "{val_loss:.2f}-{epoch:02d}.hdf5")
checkpointer = ModelCheckpoint(model_filename)
tensorboard = TensorBoard(log_dir=exp_dir, write_graph=False)  #, histogram_freq=1, write_images=True)
callbacks = [checkpointer, tensorboard]

In [None]:
# Custom final dense layer initializer
def my_init(shape, name=None, dim_ordering='tf'):
  """Gaussian scaled by sqrt(1/fan_in)"""
  fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
  s = np.sqrt(1. / fan_in)
  return normal(shape, s, name=name)

In [None]:
K.clear_session()
# Create a "LeNet"-like model
f = 3
inputs = Input(shape=(SIZE,SIZE,CHANNELS))
x = Convolution2D(32, f, f, init="he_normal", border_mode="same", activation="relu")(inputs)
x = MaxPooling2D()(x)
x = Convolution2D(32, f, f, init="he_normal", border_mode="same", activation="relu")(x)
x = MaxPooling2D()(x)
x = Convolution2D(32, f, f, init="he_normal", border_mode="same", activation="relu")(x)
x = MaxPooling2D()(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256, init="he_normal", activation="relu")(x)
x = Dropout(0.5)(x)
preds = Dense(CLASSES, init=my_init, activation="softmax")(x)
lenet = Model(input=inputs, output=preds, name="lenet")

# Multi-GPU exploitation via a linear combination of GPU loss functions.
with tf.device("/gpu:0"):
  x0 = Input(shape=(SIZE,SIZE,CHANNELS))  # first split of batch
  out0 = lenet(x0)  # run first split on shared model
with tf.device("/gpu:1"):
  x1 = Input(shape=(SIZE,SIZE,CHANNELS))  # second split of batch
  out1 = lenet(x1)  # run second split on shared model
model = Model(input=[x0, x1], output=[out0, out1])  # data-parallel model

# Add L2 regularization ("weight decay")
for layer in lenet.layers:
  if hasattr(layer, 'W_regularizer'):
    layer.W_regularizer = l2(1e-4)

# Compile model.
# optim = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=True)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy",
              loss_weights=[1/num_gpus]*num_gpus, metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
# from IPython.display import SVG
# from keras.utils.visualize_util import model_to_dot
# SVG(model_to_dot(lenet).create(prog='dot', format='svg'))

In [None]:
# for i, layer in enumerate(lenet.layers):
#   print(i, layer.name)

In [None]:
# Train the new softmax layer
# Note: Multi-GPU exploitation for data parallelism splits mini-batches
# into a set of micro-batches to be run in parallel on each GPU, but
# Keras will view the set of micro-batches as a single batch with
# multiple sources of inputs (i.e. Keras will view a set of examples
# being run in parallel as a single example with multiple sources of
# inputs), so the effective number of samples will be divided by the
# number of GPUs.
# GPU speedup:
#   1000s/epoch -> 620s/epoch (1.6x) for batch_size=32
# num workers + queue size experiments (100 batches):
#   231s w/ 1 worker + 10 q-size; 117s w/ 2 workers + 10 q-size
#   77s w/ 4 workers + 10 q-size; 79 w/ 8 workers + 10 q-size
#   75s w/ 4 workers + 4 q-size; 77s w/ 4 workers + 8 q-size;
#   76s w/ 4 workers + 16 q-size; 76s w/ 8 workers + 16 q-size;
train_samples = batch_size / num_gpus * math.ceil(tc/batch_size)
val_samples = batch_size / num_gpus * math.ceil(vc/batch_size)
epochs = 20
model.fit_generator(train_generator, samples_per_epoch=train_samples,
                    validation_data=val_generator, nb_val_samples=val_samples,
                    nb_epoch=epochs, class_weight=class_weights, callbacks=callbacks,
                    max_q_size=8, nb_worker=4, pickle_safe=True)

In [None]:
# Evaluate model on validation set
raw_metrics = model.evaluate_generator(val_generator, val_samples=val_samples,
                                       max_q_size=8, nb_worker=4, pickle_safe=True)
labeled_metrics = list(zip(model.metrics_names, raw_metrics))
losses = [v for k,v in labeled_metrics if k == "loss"]
accuracies = [v for k,v in labeled_metrics if k.endswith("acc")]
loss = sum(losses) / num_gpus
acc = sum(accuracies) / num_gpus
metrics = {"loss": loss, "acc": acc}
print(labeled_metrics)
print(metrics)

In [None]:
# Save model
filename = "{acc:.5}_acc_{loss:.5}_loss_model.hdf5".format(**metrics)
model.save(os.path.join(exp_dir, filename))

In [None]:
# Stop processes cleanly
# (otherwise, zombie processes will persist and hold onto GPU memory)
import multiprocessing as mp
for p in mp.active_children():
  p.terminate()
mp.active_children()