In [1]:
import numpy as np
import os
from datetime import datetime
from time import time
import tensorflow as tf

In [2]:
data_dir = "data/"
svhn_train_data   = np.load(data_dir+"svhn_color_train.npy")
svhn_train_labels = np.load(data_dir+"svhn_train_labels.npy")
svhn_test_data    = np.load(data_dir+"svhn_color_test.npy")
svhn_test_labels  = np.load(data_dir+"svhn_test_labels.npy")

print svhn_train_data.shape, svhn_train_labels.shape
print svhn_test_data.shape, svhn_test_labels.shape

print svhn_train_data.max(), svhn_train_data.min(), svhn_train_data.mean(), svhn_train_data.std()
print svhn_test_data.max(), svhn_test_data.min(), svhn_test_data.mean(), svhn_test_data.std()

(604388, 32, 32, 3) (604388, 10)
(26032, 32, 32, 3) (26032, 10)
1.0 0.0 0.435832011484 0.198233065675
1.0 0.0 0.45796537714 0.225005359202


## Model Wrappers

In [3]:
# Since we are using ReLU activations, we are initializing to sqrt(2/fan_in)
# as per https://arxiv.org/pdf/1502.01852v1.pdf
def weight_variable(shape, name):
  weight_name = "w_" + name
  W = tf.get_variable(weight_name, shape=shape, 
                      initializer=tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_IN',
                                                                                 uniform=False))
  return W


def bias_variable(shape, default_bias=0.0):
  initial = tf.constant(default_bias, shape=shape)
  return tf.Variable(initial)


# Convolutional layer wrapper
def conv_layer(layer_name, input_tensor, receptive_field, channels_in, channels_out,
              padding='SAME', stride=1, act=tf.nn.relu, decay=0.0,
              pool=True, pooler=tf.nn.max_pool, pool_size=2, pool_stride=2, pool_padding='SAME',
              batch_norm=False, batch_norm_after=False, training=True):

  with tf.name_scope(layer_name):
    with tf.name_scope('weights'):
      weights = weight_variable([receptive_field, receptive_field, channels_in, channels_out], layer_name)

      if decay > 0:
          weight_decay = tf.multiply(tf.nn.l2_loss(weights), decay, name='weight_decay')
          tf.add_to_collection('losses', weight_decay)

    with tf.name_scope('biases'):
      biases = bias_variable([channels_out])

    with tf.name_scope('W_conv_x_plus_b'):
      preactivate = tf.nn.conv2d(input_tensor, weights, 
                                 strides=[1, stride, stride, 1], 
                                 padding=padding) + biases

    if batch_norm:
      with tf.name_scope('batchnorm'):
        normed = tf.layers.batch_normalization(preactivate, training=training)
      activations = act(normed, name='activation')
    else:
      activations = act(preactivate, name='activation')

    if pool:
      after_pool = pooler(activations, ksize=[1, pool_size, pool_size, 1], 
                          strides=[1, pool_stride, pool_stride, 1],
                          padding=pool_padding)
    else: 
      after_pool = activations
    
    if batch_norm_after:
        with tf.name_scope('batchnorm_after'):
          after_pool = tf.layers.batch_normalization(after_pool, training=training)
    
    return after_pool

# Fully Connected layer wrapper    
def dense_layer(layer_name, input_tensor, input_dim, output_dim, act=tf.nn.relu, decay=0.0,
               batch_norm=False, batch_norm_after=False, training=True):
  with tf.name_scope(layer_name):
    with tf.name_scope('weights'):
      weights = weight_variable([input_dim, output_dim], layer_name)

      if decay > 0:
        weight_decay = tf.multiply(tf.nn.l2_loss(weights), decay, name='weight_decay')
        tf.add_to_collection('losses', weight_decay)

    with tf.name_scope('biases'):
      biases = bias_variable([output_dim])
      
    with tf.name_scope('Wx_plus_b'):
      preactivate = tf.matmul(input_tensor, weights) + biases

    if batch_norm:
      with tf.name_scope('batchnorm_fc'):
        normed = tf.layers.batch_normalization(preactivate, training=training)
      activations = act(normed, name='activation')
    else:
      activations = act(preactivate, name='activation')

    if batch_norm_after:
      activations = tf.layers.batch_normalization(activations, training=training)

    activations = act(preactivate, name='activation')
    return activations

  
def flat_dimension(tensor):
  dim = 1 # Compute how many numbers we have, ignoring the batch size
  for d in tensor.get_shape()[1:].as_list():
    dim *= d
  return dim


# hacky next_batch, uses global preshuffled indices based on epoch
def grab_next_train_batch(batch_num, train_batch_size):
  total_images = len(svhn_train_data)
  assert train_batch_size <= total_images
  assert batch_num <= batches_per_epoch

  epoch = int((batch_num * train_batch_size) / total_images)
  current_idx = shuffled_indices[epoch] 
  start = (batch_num * train_batch_size) % total_images
  end = start + train_batch_size

  next_batch_idx = current_idx[start:end]
  next_batch = svhn_train_data[next_batch_idx], svhn_train_labels[next_batch_idx]
  return next_batch

# returns a test set in fixed increments
def grab_next_test_batch(batch_num, test_batch_size):
  start = batch_num * test_batch_size
  end = start + test_batch_size
  next_batch = svhn_test_data[start:end], svhn_test_labels[start:end]
  return next_batch

In [4]:
def run():
  # RESET TF GRAPH, just in case and set a fixed seed to compare models
  tf.reset_default_graph()
  tf.set_random_seed(GLOBAL_SEED)
  
  # Fill in the place holders depending on the context (training? validating? testing?)
  def feed_dict(batch_num, mode):
    if mode == 'train':
      batch_x, batch_y = grab_next_train_batch(batch_num, train_batch_size)
      keep_proba = train_keep_prob
      training_mode = True

    elif mode == 'validation_train':
      batch_x, batch_y = grab_next_train_batch(batch_num, valid_batch_size)
      keep_proba = 1.0
      training_mode = False

    elif mode == 'validation_test':
      # batch_num = 0 to keep grabbing the same batch for consistent testing
      batch_x, batch_y = grab_next_test_batch(0, valid_batch_size)
      keep_proba = 1.0
      training_mode = False

    elif mode == "test_all":
      batch_x, batch_y = grab_next_test_batch(batch_num, test_batch_size)
      keep_proba = 1.0
      training_mode = False

    batch_x = batch_x
    return {x: batch_x, y_: batch_y, 
            keep_prob: keep_proba, 
            batch_norm_train_mode: training_mode} 
  
  
  ### Place holders ###
  with tf.name_scope('test_train_variables'):
    # batch norm ON for training, OFF otherwise
    batch_norm_train_mode = tf.placeholder(tf.bool) 
    
    # keep_prob = 1.0 for testing, or set to global var  otherwise
    keep_prob = tf.placeholder(tf.float32)
    tf.add_to_collection('batch_norm_train_mode', batch_norm_train_mode)
    tf.add_to_collection('keep_prob', keep_prob)
    
    # Making sure correct placeholders are set (can check in Tensorboard)
    tf.summary.scalar('batch_norm_train_mode', tf.to_int32(batch_norm_train_mode, name='ToInt32'))
    tf.summary.scalar('dropout_keep_probability', keep_prob)

  
  with tf.name_scope('inputs'):
    x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3], name="x-input") 
    y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y-input")
    # Adding placeholders to collection for inference on saved model
    tf.add_to_collection('x', x)
    tf.add_to_collection('y_', y_)
    

  ##########################################
  ##                                      ##
  ##          Network Architecture        ##
  ##                                      ##
  ##########################################
 
  conv1 = conv_layer(layer_name='conv1', input_tensor=x, receptive_field=5, 
                     channels_in=3, channels_out=64, pool=True, pool_size=2, pool_stride=2,
                     batch_norm=True, batch_norm_after=False, training=batch_norm_train_mode)
  
  conv2 = conv_layer(layer_name='conv2', input_tensor=conv1, receptive_field=5, 
                     channels_in=64, channels_out=64, pool=False, pool_size=2, pool_stride=2,
                     batch_norm=True, batch_norm_after=False, training=batch_norm_train_mode)
  
  conv3 = conv_layer(layer_name='conv3', input_tensor=conv2, receptive_field=5, 
                     channels_in=64, channels_out=64, pool=True, pool_size=2, pool_stride=2,
                     batch_norm=True, batch_norm_after=False, training=batch_norm_train_mode)
  
  conv4 = conv_layer(layer_name='conv4', input_tensor=conv3, receptive_field=5, 
                     channels_in=64, channels_out=64, pool=False, pool_size=2, pool_stride=2,
                     batch_norm=True, batch_norm_after=False, training=batch_norm_train_mode)
  
  last_conv = conv4
  
  #with tf.name_scope('last_conv_flatten'):
  conv_reshaped = tf.reshape(last_conv, [-1, flat_dimension(last_conv)])

  fc1 = dense_layer(layer_name='fc1', input_tensor=conv_reshaped, input_dim=flat_dimension(last_conv),
                    output_dim=1024, decay=fc_decay, batch_norm=True, batch_norm_after=False, 
                    training=batch_norm_train_mode)
  dropped1 = tf.nn.dropout(fc1, keep_prob)
  
  fc2 = dense_layer(layer_name='fc2', input_tensor=dropped1, input_dim=1024,
                      output_dim=1024, decay=fc_decay, batch_norm=True, batch_norm_after=False, 
                      training=batch_norm_train_mode)
  dropped2 = tf.nn.dropout(fc2, keep_prob)
  
  last_fc = dropped2
  
  # Do not apply softmax activation yet! use the identity
  logits = dense_layer(layer_name='output', input_tensor=last_fc, input_dim=1024, output_dim=10, act=tf.identity)
  tf.add_to_collection('logits', logits) # add logits op for inference later
  
  print conv1.shape
  print conv2.shape
  print conv3.shape
  print conv4.shape
  print fc1.shape
  print fc2.shape

  ### Losses and Accuracy ###
  with tf.name_scope('accuracy'):
      correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
      accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  tf.summary.scalar('accuracy', accuracy)
  
  # Cross-Entropy Loss, there's also weight decay loss defined in the networks
  with tf.name_scope('cross_entropy'):
    diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logits)
    with tf.name_scope('total'):
      cross_entropy = tf.reduce_mean(diff)
      tf.add_to_collection('losses', cross_entropy)
  tf.summary.scalar('cross_entropy', cross_entropy)
  
  # Total loss (weight decay + cross-entropy)
  total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

  with tf.name_scope('train'):
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(learning_rate_init, global_step, decay_steps, decay_rate)
    tf.summary.scalar('learning_rate', learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(total_loss, global_step=global_step)
  
  # Merging all summaries
  merged_summaries = tf.summary.merge_all()

  # Batch norm needs the moving averages to be updated
  # and the default optimizer.minimize doesn't do it, so defining it explicitly here
  extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)  
  init = tf.global_variables_initializer()

  # For saving the trained model in the end
  saver = tf.train.Saver()
  
  # Print location of tensorboard 
  ts = datetime.now().strftime('%Y%m%d_%H%M')
  logs_path = "logs/{}/".format(ts)
  pwd = os.getcwd()+"/"
  print "-"* 70
  print("Run the following to start tensorboard server:\n" \
        "tensorboard --logdir=/{}{}".format(pwd, logs_path))
  
  
  ##########################################
  ##                                      ##
  ##           Launch the graph           ##
  ##                                      ##
  ##########################################

  with tf.Session() as sess:
    begin_time = time() # timer
    sess.run(init) 
    train_writer = tf.summary.FileWriter(logs_path + '/train', sess.graph)
    valid_writer = tf.summary.FileWriter(logs_path + '/valid')
    
    # Training loop: epochs * batches_per_epoch
    for epoch in xrange(training_epochs):
      print "Current Learning Rate: ", sess.run(learning_rate) # keep track of decaying learning rate
      
      for batch_num in xrange(batches_per_epoch):
        # actual training
        sess.run([optimizer, extra_update_ops, cross_entropy], feed_dict=feed_dict(batch_num, mode='train'))
        
        # occasional validation *set* test reporting
        if batch_num % valid_test_every == valid_test_every - 1:
          # Record summaries and accuracy on the *test* set
          # And compare against *training* set of same size (don't use these for training)
          summary, acc = sess.run([merged_summaries, accuracy], feed_dict=feed_dict(batch_num, mode='validation_test'))
          valid_writer.add_summary(summary, epoch * batches_per_epoch + batch_num)
          
          summary = sess.run(merged_summaries, feed_dict=feed_dict(batch_num, mode='validation_train'))
          train_writer.add_summary(summary, epoch * batches_per_epoch + batch_num)

          print('Validation accuracy at epoch %s: batch %s: %s' % (epoch, batch_num, acc))

      
      ### Test on the full test set START###
      test_batches = (len(svhn_test_data) / 4000) + 1
      accuracies = []
      for test_batch_num in range(test_batches):
        acc = sess.run([accuracy], feed_dict=feed_dict(test_batch_num, mode='test_all'))
        accuracies.append(acc)
      print "-"* 70
      print "Full dataset accuracies (in batches of 4000)", accuracies, "with mean:", np.mean(accuracies)
      ### Test on the full test set END ###
    
    train_writer.close()
    valid_writer.close()
    
    end_time = time()
    elapsed_mins = (end_time - begin_time) / 60.0
    print "\nOptimization Finished! in {} minutes".format(elapsed_mins)
    
    # Save down the current model
    if not os.path.exists("models"): os.makedirs("models")
    saver.save(sess, "models/{}".format(model_name))


In [None]:
##########################################
##                                      ##
##            Global Parameters         ##
##                                      ##
##########################################

GLOBAL_SEED = 69

shuffled_indices = []
for epoch in xrange(10):
  idx = np.arange(len(svhn_train_data))
  np.random.seed(seed=GLOBAL_SEED)
  np.random.shuffle(idx)
  shuffled_indices.append(idx)

# Training Parameters
train_batch_size = 200
valid_batch_size = 500 #1000
valid_test_every = 250
test_batch_size = 4000

training_epochs = 1
batches_per_epoch = int(len(svhn_train_data) / train_batch_size)


learning_rate_init = 0.002
decay_steps = batches_per_epoch
decay_rate = 0.5 # learning rate decays by 4x every epoch

# Regularization
fc_decay = 0.0
train_keep_prob = 0.75

model_name = "color_model"

run()

(?, 16, 16, 64)
(?, 16, 16, 64)
(?, 8, 8, 64)
(?, 8, 8, 64)
(?, 1024)
(?, 1024)
----------------------------------------------------------------------
Run the following to start tensorboard server:
tensorboard --logdir=//home/ec2-user/code/cnn_svhn/logs/20170502_0617/
Current Learning Rate:  0.002


## Testing inference

In [None]:
# saver = tf.train.Saver()
# x_batch, y_batch = svhn_test.next_batch(10)

# with tf.Session() as sess:
#   saver.restore(sess, "models/{}".format(model_name))
#   print "model restored"
#   #logits

#   logits = tf.get_collection("logits")[0]
#   x = tf.get_collection("x")[0]
#   y_ = tf.get_collection("y_")[0]
#   keep_prob = tf.get_collection("keep_prob")[0]
  
#   predictions = sess.run(logits, feed_dict={x: x_batch, y_: y_batch, keep_prob: 1.0})
#   y_preds = tf.argmax(predictions, 1)
#   y_actual = tf.argmax(y_batch, 1)
  
#   ## tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
#   print sess.run(y_preds)
#   print sess.run(y_actual)