In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import math
import multiprocessing as mp
import queue
import shutil
import threading
import time

import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

plt.rcParams['figure.figsize'] = (10, 6)

# Read in train & val data

In [None]:
# train_df = sqlContext.read.load("data/train_0.01_sample_grayscale_64.parquet")
# val_df = sqlContext.read.load("data/val_0.01_sample_grayscale_64.parquet")

# train_df = sqlContext.read.load("data/train_0.01_sample_64.parquet")
# val_df = sqlContext.read.load("data/val_0.01_sample_64.parquet")

train_df = sqlContext.read.load("data/train.parquet")
val_df = sqlContext.read.load("data/val.parquet")
train_df, val_df

In [None]:
tc = train_df.count()
vc = val_df.count()
tc, vc

## Sample

## TODO: Switch to Spark 2.0 once [SPARK-18281](https://issues.apache.org/jira/browse/SPARK-18281) is fixed to take advantage of faster `toLocalIterator()` on `DataFrames`.

In [None]:
CHANNELS = 3
FEATURES = train_df.first().sample.toArray().shape[0]
SIZE = int(math.sqrt(FEATURES/CHANNELS))
CLASSES = 3
FEATURES, SIZE

# Create batch generator

In [None]:
def gen_batch(rdd, batch_size=32):
  """
  RDD data generator.
  
  Generator that cycles through the data and yields a
  batch at a time, reinitializing the iterator as needed
  to continue yielding batches.
  
  Args:
    rdd: A PySpark RDD containing the training data.
    batch_size: Size of batches to return.
  """
  rdd.cache()
  iterator = rdd.toLocalIterator()
  while True:
    features = []
    labels = []
    for i in range(batch_size):
      # Generate batch
      try:
        row = next(iterator)
      except StopIteration:
        # Restart iterator
        iterator = rdd.toLocalIterator()
        row = next(iterator)
      features.append(row.sample.values)
      labels.append(row.tumor_score)
    x_batch = np.array(features).astype(np.uint8)
    y_batch = np.array(labels).astype(np.uint8)
    yield x_batch, y_batch

In [None]:
class QueueingGenerator(threading.Thread):
  """Class that transforms a generator into a asynchronous, background queuing generator.

  See http://stackoverflow.com/questions/7323664/python-generator-pre-fetch
  and https://github.com/justheuristic/prefetch_generator.
  """

  def __init__(self, generator, capacity):
    """Transform a generator into a background generator.

    Args:
      generator: A generator function.
      capacity: Size of the queue.

      Returns:
        A new generator that fetches results from the given generator
        in a separate thread and fills a buffer queue. 
    """
    threading.Thread.__init__(self)
    self.queue = queue.Queue(capacity)
    self.generator = generator
    self.daemon = True
    self.start()
    self._stop = threading.Event()

  def run(self):
    for item in self.generator:
      if self._stop.is_set():
        break
      self.queue.put(item)
    self.queue.put(None)

  def __next__(self):
    item = self.queue.get()
    if item is None:
      raise StopIteration
    return item
  
  def stop(self):
    self._stop.set()

In [None]:
def fill_partition_num_queue(partition_num_queue, rdd):
  partitions = rdd.getNumPartitions()
  while True:
    for i in range(partitions):
      partition_num_queue.put(i)

def fill_partition_queue(partition_queue, partition_num_queue, rdd):
  while True:
    partition = partition_num_queue.get()
    rows = rdd.context.runJob(rdd, lambda x: x, [partition])
    partition_queue.put(rows)

def fill_row_queue(row_queue, partition_queue):
  while True:
    rows = partition_queue.get()
    for row in rows:
      row_queue.put(row)

# TODO: Maybe use this for TensorFlow feeding directly.
# def fill_batch_queue(batch_queue, row_queue, batch_size=32):
#   while True:
#     features = []
#     labels = []
#     for i in range(batch_size):
#       # Generate batch
#       row = row_queue.get()
#       features.append(row.sample.values)
#       labels.append(row.tumor_score)
#     x_batch = np.array(features).astype(np.uint8)
#     y_batch = np.array(labels).astype(np.uint8)
#     batch_queue.put((x_batch, y_batch))

# def gen_batch1(batch_queue):
#   while True:
#     x_batch, y_batch = batch_queue.get()
#     yield x_batch, y_batch
    
def gen_batch2(row_queue, batch_size=32):
  while True:
    features = []
    labels = []
    for i in range(batch_size):
      # Generate batch
      row = row_queue.get()
      features.append(row.sample.values)
      labels.append(row.tumor_score)
    x_batch = np.array(features).astype(np.uint8)
    y_batch = np.array(labels).astype(np.uint8)
    yield x_batch, y_batch

# TODO: Clean this up

In [None]:
partition_num_queue = mp.Queue(1000)
partition_queue = mp.Queue(100)
row_queue = mp.Queue(1000)
# batch_queue = mp.Queue(1000)

num_partition_threads = 20
num_row_processes = 4
# num_batch_processes = 2

rdd = train_df.rdd

partition_num_process = mp.Process(target=fill_partition_num_queue, args=(partition_num_queue, rdd), daemon=True)
# partition_processes = [mp.Process(target=fill_partition_queue, args=(partition_queue, partition_num_queue, rdd), daemon=True) for _ in range(num_partition_processes)]
partition_threads = [threading.Thread(target=fill_partition_queue, args=(partition_queue, partition_num_queue, rdd), daemon=True) for _ in range(num_partition_threads)]
row_processes = [mp.Process(target=fill_row_queue, args=(row_queue, partition_queue), daemon=True) for _ in range(num_row_processes)]
# batch_processes = [mp.Process(target=fill_batch_queue, args=(batch_queue, row_queue), daemon=True) for _ in range(num_batch_processes)]

ps = [partition_num_process] + row_processes #+ batch_processes

for p in partition_threads + ps:
  p.start()

In [None]:
partition_num_queue.qsize(), partition_queue.qsize(), row_queue.qsize()#, batch_queue.qsize()

# Get validation data

In [None]:
val_df

In [None]:
rows = val_df.take(3)  #.collect()

In [None]:
x = np.array([row.sample.toArray().astype(np.uint8) for row in rows])
y = np.array([row.tumor_score for row in rows]).astype(np.int32)

In [None]:
x.dtype, x.shape, y.dtype, y.shape

In [None]:
y.shape

In [None]:
np.expand_dims(y, -1).shape

In [None]:
def extract_data(df):
  """
  Extract data from a PySpark DataFrame into a NumPy array.
  """
  rows = df.collect()
  x = np.array([row.sample.toArray().astype(np.uint8) for row in rows])
  y = np.array([row.tumor_score for row in rows]).astype(np.uint8)
  return x, y

In [None]:
x_val, y_val = extract_data(val_df_sample)

In [None]:
x_val.dtype, x_val.shape, y_val.dtype, y_val.shape

# Create convnet model graph
Create network:
  conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> conv3 -> relu3 -> pool3 -> affine1 -> relu1 -> affine2 -> softmax

In [None]:
# 1. Add TensorBoard summaries and track.
# 2. Plug into larger dataset.
# 3. Run on cluster.
# 4. Explore saving to TFRecord format, then reading from files shared on DFS (gfs).

In [None]:
tf.reset_default_graph()

# Hyperparams & Settings
classes = CLASSES
features = FEATURES
C = CHANNELS  # Number of input channels (dimensionality of input depth)
Hin = SIZE  # Input height
Win = SIZE  # Input width
Hf = 3  # conv filter height
Wf = 3  # conv filter width
Hfp = 2  # pool filter height
Wfp = 2  # pool filter width
stride = 1  # conv stride
pstride = 2  # pool stride
pad = 1  # For same dimensions, (Hf - stride) / 2
F1 = 32  # num conv filters in conv1
F2 = 32  # num conv filters in conv2
F3 = 32  # num conv filters in conv3
N1 = 512  # num nodes in affine1
lr = 5e-4 # learning rate

# Inputs
with tf.name_scope("input") as scope:
  x = tf.placeholder(tf.uint8, [None, features], name="x")
  x_scaled = x / 255 * 2 - 1  # scale between [-1,1]
  x_image = tf.transpose(tf.reshape(x_scaled, [-1, C, Hin, Win]), perm=[0,2,3,1])  # shape (N,H,W,C)
  y_ = tf.placeholder(tf.uint8, [None,], name="y_")
  y_one_hot = tf.one_hot(tf.cast(y_, tf.int32) - 1, classes, dtype=tf.int32)  # or use sparse cross entropy
  tf.summary.image("x", x_image)
  tf.summary.histogram("x", x)
  tf.summary.histogram("x_scaled", x_scaled)
  tf.summary.histogram("y", y_)

# Conv layer 1: conv1 -> relu1 -> pool1
with tf.name_scope("conv1") as scope:
  W = tf.Variable(tf.random_normal([Hf, Wf, C, F1]) * np.sqrt(2.0/(Hf*Wf*C)), name="W")
  b = tf.Variable(tf.zeros([F1]), name="b")
  conv = tf.nn.conv2d(x_image, W, [1,stride,stride,1], padding="SAME") + b
  relu = tf.nn.relu(conv)
  pool = tf.nn.max_pool(relu, ksize=[1,Hfp,Wfp,1], strides=[1,pstride,pstride,1], padding="SAME")
  tf.summary.image("conv1", tf.transpose(W, [3,0,1,2]), max_outputs=F1)  # transpose to [N,H,W,C]

# Conv layer 2: conv2 -> relu2 -> pool2
with tf.name_scope("conv2") as scope:
  W = tf.Variable(tf.random_normal([Hf, Wf, F1, F2]) * np.sqrt(2.0/(Hf*Wf*F1)), name="W")
  b = tf.Variable(tf.zeros([F2]), name="b")
  conv = tf.nn.conv2d(pool, W, [1,stride,stride,1], padding="SAME") + b
  relu = tf.nn.relu(conv)
  pool = tf.nn.max_pool(relu, ksize=[1,Hfp,Wfp,1], strides=[1,pstride,pstride,1], padding="SAME")

# Conv layer 3: conv3 -> relu3 -> pool3
with tf.name_scope("conv3") as scope:
  W = tf.Variable(tf.random_normal([Hf, Wf, F2, F3]) * np.sqrt(2.0/(Hf*Wf*F2)), name="W")
  b = tf.Variable(tf.zeros([F3]), name="b")
  conv = tf.nn.conv2d(pool, W, [1,stride,stride,1], padding="SAME") + b
  relu = tf.nn.relu(conv)
  pool = tf.nn.max_pool(relu, ksize=[1,Hfp,Wfp,1], strides=[1,pstride,pstride,1], padding="SAME")

# Affine layer 1:  affine1 -> relu1 -> dropout
with tf.name_scope("affine1") as scope:
  D = int(F3*(Hin/2**3)*(Win/2**3))
  W = tf.Variable(tf.random_normal([D,N1]) * np.sqrt(2.0/D), name="W")
  b = tf.Variable(tf.zeros([N1]), name="b")
  affine = tf.matmul(tf.reshape(pool, [-1,D]), W) + b
  relu = tf.nn.relu(affine)
  keep_prob = tf.placeholder(tf.float32, name="keep_prob")
  dropout = tf.nn.dropout(relu, keep_prob)

# Affine layer 2:  affine2 -> softmax
with tf.name_scope("affine2") as scope:
  W = tf.Variable(tf.random_normal([N1,classes]) * np.sqrt(2.0/N1), name="W")
  b = tf.Variable(tf.zeros([classes]), name="b")
  logits = tf.matmul(dropout, W) + b
  probs = tf.nn.softmax(logits)
  tf.summary.histogram("logits", logits)
  tf.summary.histogram("probs", probs)

# Loss
with tf.name_scope("loss") as scope:
  cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_one_hot))
  tf.summary.scalar("loss", cross_entropy_loss)

# Train
#train_step = tf.train.GradientDescentOptimizer(lr).minimize(cross_entropy_loss)
#train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy_loss)
train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy_loss)

# Eval metrics
with tf.name_scope("eval") as scope:
  correct_pred = tf.equal(tf.argmax(logits,1), tf.argmax(y_one_hot,1))
  accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  tf.summary.scalar("accuracy", accuracy)

In [None]:
%%time
# Run `tensorboard --logdir=tf_logs --host=localhost --debug --reload_interval 5`
# Note: look at stderr for device placement logs
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
  # Summaries
  log_dir = "tf_logs"
  summary_op = tf.summary.merge_all()
  train_writer = tf.train.SummaryWriter(log_dir + "/train", sess.graph)
  val_writer = tf.train.SummaryWriter(log_dir + "/val")
  
  # Data Gen
  batch_size = 64
  capacity = 1000
  train_generator = gen_batch(train_df.rdd, batch_size)
  # ~20% speedup by using threaded queueing generator
  train_generator = QueueingGenerator(train_generator, capacity)
  # TODO: Setup batch loop over val dataset
  x_val, y_val = extract_data(val_df_sample)

  # Checkpoint saver
  saver = tf.train.Saver(tf.global_variables())

  # Train
  sess.run(tf.global_variables_initializer())
  #steps = 100
  epochs = 1
  steps = round(epochs * train_df.count() / batch_size)
  steps = 200
  for i in range(steps):
    xs, ys = next(train_generator)
    _ = sess.run([train_step], feed_dict={x: xs, y_:ys, keep_prob:0.5})
    if i % 100 == 0:
      # train stats
      summary, train_loss, train_acc = sess.run([summary_op, cross_entropy_loss, accuracy], feed_dict={x: xs, y_:ys, keep_prob:0.5})
      train_writer.add_summary(summary, i)
      # val stats
      summary, val_loss, val_acc = sess.run([summary_op, cross_entropy_loss, accuracy], feed_dict={x: x_val, y_:y_val, keep_prob:1})
      val_writer.add_summary(summary, i)
      print("Iter: {}, \t Train Accuracy, Loss: {:.4f},{:.4f} \t Val Accuracy, Loss: {:.4f},{:.4f}".format(i, train_acc, train_loss, val_acc, val_loss))
      #print("Iter: {}, \t Train Accuracy: {:.4f}".format(i, train_acc))
      if i % 1000 == 0 or (i+1) == steps:
        # Save model periodically
        # TODO: Only save best `n` models.
        checkpoint_path = os.path.join("models", "{}.ckpt".format(val_acc))
        saver.save(sess, checkpoint_path, global_step=i)
  train_writer.flush()  # Make sure everything is written before exiting
  val_writer.flush()  # Make sure everything is written before exiting

# Keras

## VGG16

1. Setup VGG16 pretrained model with new input & output layers.
2. Train new output layers (all others frozen).
3. Fine tune additional layers.
4. Profit.

In [None]:
# Look into labels being set as float64 instead of float32

In [None]:
# Clear out any existing Keras logs and model checkpoints
log_dir = os.path.join("tf_logs", "keras", "vgg")
model_dir = os.path.join("models", "keras", "vgg")
for path in [log_dir, model_dir]:
  if os.path.exists(path):
    #os.rmdir(path)  # fails if directory is not empty
    shutil.rmtree(path)

# Reset any current Keras session
import keras.backend as K
K.clear_session()  # reset TensorFlow session for iterative work

In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.layers import Dense, Flatten, Input, Permute, Reshape
from keras.models import Model
from keras.utils.np_utils import to_categorical

In [None]:
# TODO: Clean these up
def to_categorical(y, classes):
  # Avoid cast to float64 as done in keras.utils.np_utils.to_categorical
  n = len(y)
  Y = np.zeros((n, classes), dtype=np.int32)
  Y[np.arange(n), y] = 1
  #Y = np.eye(classes)[y]
  return Y

def gen_preprocessed_batch(batch_generator):
  for xs, ys in batch_generator:
    xs = (xs.reshape((-1,CHANNELS,SIZE,SIZE))  # shape (N,C,H,W)
            .transpose((0,2,3,1))  # shape (N,H,W,C)
            .astype(np.float32))
    yield preprocess_input(xs), to_categorical(ys-1, CLASSES) #, ys

In [None]:
# Create train and validation iterators.
batch_size = 32
train_generator = gen_preprocessed_batch(gen_batch(train_df.rdd, batch_size))
val_generator = gen_preprocessed_batch(gen_batch(val_df.rdd, batch_size))
val_sample_generator = gen_preprocessed_batch(gen_batch(val_df_sample.rdd, batch_size))

## ~20% speedup by using threaded queueing generator
# capacity = 1000
# train_generator = QueueingGenerator(train_generator, capacity)
# val_generator = QueueingGenerator(val_generator, capacity)

In [None]:
x, y, y_orig = next(train_generator)
x.shape, x.dtype, y.shape, y.dtype, y_orig.shape, y_orig.dtype

In [None]:
y[1], y_orig[1], y[20], y_orig[20]

In [None]:
%%time
# inputs = Input(shape=(FEATURES,))  # shape (N,C*H*W)
# inputs_reshaped = Reshape((C,SIZE,SIZE))(inputs)  # shape (N,C,H,W)
# inputs_transposed = Permute((2,3,1))(inputs_reshaped)  # shape (N,H,W,C)

# VGG16 without the fully-connected layers at the end
# base_model = VGG16(weights="imagenet", include_top=False, 
#                    input_tensor=inputs_transposed, input_shape=(SIZE,SIZE,C))
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(SIZE,SIZE,CHANNELS))

# New fully-connected layers for breast cancer problem
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation="relu")(x)
predictions = Dense(CLASSES, activation="softmax")(x)

# Create overall model
# model = Model(input=inputs, output=predictions)
model = Model(input=base_model.input, output=predictions)

# Freeze all layers except new ones
for layer in base_model.layers:
  layer.trainable = False

# Compile model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])

# Setup training callbacks
log_dir = os.path.join("tf_logs", "keras", "vgg")
model_dir = os.path.join("models", "keras", "vgg")
model_filename = os.path.join(model_dir, "{val_loss:.2f}-{epoch:02d}.hdf5")
for path in [log_dir, model_dir]:
  if not os.path.exists(path):
    os.mkdir(path)
tensorboard = TensorBoard(log_dir=log_dir)  #, histogram_freq=1, write_images=True)
checkpointer = ModelCheckpoint(model_filename)
callbacks = [tensorboard, checkpointer]

# Train these new layers at the end
train_samples = math.ceil(train_df.count()/batch_size) * batch_size  #10000
val_samples = math.ceil(val_df.count()/batch_size) * batch_size  #2000
val_sample_samples = math.ceil(val_sample_df.count()/batch_size) * batch_size  #2000
epochs = 1
model.fit_generator(train_generator, samples_per_epoch=train_samples, nb_epoch=epochs,
                    validation_data=val_generator, nb_val_samples=val_samples,
                    max_q_size=10000, # vary the queue size
                    callbacks=callbacks)

In [None]:
val_sample_samples = math.ceil(val_df_sample.count()/batch_size) * batch_size  #2000
val_sample_samples, val_df_sample.count()

In [None]:
model.evaluate_generator(val_sample_generator, val_samples=val_sample_samples, max_q_size=10000)

In [None]:
model.metrics_names

# TODO: Try caching RDD in generator function.

---

## ResNet50

1. Read in 256x256x3 dataframe.
2. Create random crops to 224x224x3.
3. Setup ResNet50 pretrained model with new input & output layers.
4. Train new output layers (all others frozen).
5. Fine tune additional layers.
6. Profit.

In [None]:
# Setup model and log directories
log_dir = os.path.join("tf_logs", "keras", "resnet50")
model_dir = os.path.join("models", "keras", "resnet50")
for path in [log_dir, model_dir]:
  if not os.path.exists(path):
    #os.mkdir(path)
    os.makedirs(path)  # make all intermediate dirs

In [None]:
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.layers import Dense, Flatten, Input, Permute, Reshape
from keras.models import Model
from keras.utils.np_utils import to_categorical

In [None]:
# TODO: Clean these up
def to_categorical(y, classes):
  # Avoid cast to float64 as done in keras.utils.np_utils.to_categorical
  n = len(y)
  Y = np.zeros((n, classes), dtype=np.int32)
  Y[np.arange(n), y] = 1
  #Y = np.eye(classes)[y]
  return Y

def gen_preprocessed_batch(batch_generator):
  for xs, ys in batch_generator:
    xs = (xs.reshape((-1,CHANNELS,SIZE,SIZE))  # shape (N,C,H,W)
            .transpose((0,2,3,1))  # shape (N,H,W,C)
            .astype(np.float32))
    yield preprocess_input(xs), to_categorical(ys-1, CLASSES) #, ys

In [None]:
# Create train and validation iterators.
batch_size = 32
# train_generator = gen_preprocessed_batch(gen_batch2(batch_queue))
train_generator = gen_preprocessed_batch(gen_batch2(row_queue))
val_generator = gen_preprocessed_batch(gen_batch(val_df.rdd, batch_size))
# val_sample_generator = gen_preprocessed_batch(gen_batch2(val_df_sample.rdd, batch_size))

## ~20% speedup by using threaded queueing generator
# capacity = 1000
# train_generator = QueueingGenerator(train_generator, capacity)
# val_generator = QueueingGenerator(val_generator, capacity)

In [None]:
%%time
# inputs = Input(shape=(FEATURES,))  # shape (N,C*H*W)
# inputs_reshaped = Reshape((C,SIZE,SIZE))(inputs)  # shape (N,C,H,W)
# inputs_transposed = Permute((2,3,1))(inputs_reshaped)  # shape (N,H,W,C)

# VGG16 without the fully-connected layers at the end
# base_model = VGG16(weights="imagenet", include_top=False, 
#                    input_tensor=inputs_transposed, input_shape=(SIZE,SIZE,C))
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(SIZE,SIZE,CHANNELS))

# New fully-connected layers for breast cancer problem
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(CLASSES, activation="softmax")(x)

# Create overall model
# model = Model(input=inputs, output=predictions)
model = Model(input=base_model.input, output=predictions)

# Freeze all layers except new ones
for layer in base_model.layers:
  layer.trainable = False

# Compile model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])

# Setup training callbacks
# model_filename = os.path.join(model_dir, "{val_loss:.2f}-{epoch:02d}.hdf5")
tensorboard = TensorBoard(log_dir=log_dir)  #, histogram_freq=1, write_images=True)
# checkpointer = ModelCheckpoint(model_filename)
callbacks = [tensorboard] #, checkpointer]

# Train these new layers at the end
# TODO: Update count logic
train_samples = 10000 #math.ceil(tc/batch_size) * batch_size  #10000
val_samples = 2000 #vc  #2000 #math.ceil(val_df.count()/batch_size) * batch_size  #2000
epochs = 5
model.fit_generator(train_generator, samples_per_epoch=train_samples, nb_epoch=epochs,
#                     validation_data=val_generator, nb_val_samples=val_samples,
                    max_q_size=10000, # vary the queue size
                    callbacks=callbacks,
                    nb_worker=4, pickle_safe=True)

In [None]:
# TODO: Cleanup input queues: Use processes with Keras, replace batch_queue with Keras,
# increase partition queue size, add function to create generators for any df, etc.

In [None]:
# TODO: Add queueing val generator back to training, maybe with less threads (although it will block).

In [None]:
# TODO: Add `stop` method/events to Threads/Processes

In [None]:
# TODO: Monitor size of input queues with callbacks

In [None]:
# TODO: Expand the size of the Dense layer, and perhaps replace Flatten with GlobalAveragePooling2D

In [None]:
# TODO: Determine what the class ratio is in the first 10000 train samples using `limit(10000)`; do the same with val

In [None]:
# val_generator = gen_preprocessed_batch(gen_batch(val_df.rdd, batch_size))
model.evaluate_generator(val_generator, val_samples=vc, max_q_size=10000)

In [None]:
model.metrics_names