## Inspecting Data

In [1]:
import gen_input
import numpy as np
import os
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf

train_test_valid_split = [1., 0., 0.]
# From http://ufldl.stanford.edu/housenumbers/
svhn_train = gen_input.read_data_sets("data/train_32x32.mat", train_test_valid_split).train
svhn_test = gen_input.read_data_sets("data/test_32x32.mat", train_test_valid_split).train

# Data information
input_channels = svhn_train.images.shape[2]
image_size = int(np.sqrt(svhn_train.images.shape[1]))
n_classes = 10 # total classes (0-9 digits)

# print svhn_train.images[0].shape
# print svhn_train.images.shape
# print svhn_test.images[0].shape
# print svhn_test.images.shape

# per_img_means = svhn_train.images.mean(axis=1)
# # per_img_norm = svhn_train.images - per_img_means[:, np.newaxis, :]

# for i in range(4010, 4025):
#     fig = plt.figure()
#     ax = fig.add_subplot(1,1,1)
    
#     img = svhn_train.images[i]
#     lbl = np.argmax(svhn_train.labels[i])
#     ax.set_title(lbl)
#     img = img.reshape([32,32,3])
#     imgplot = plt.imshow(img)
#     plt.show()
    
# #     img_s = per_img_norm[i]
# #     img_s = img_s.reshape([32,32,3])
# #     imgplot = plt.imshow(img_s)
# #     plt.show()

In [2]:
##########################################
##                                      ##
##            Helper Wrappers           ##
##                                      ##
##########################################

# We can't initialize these variables to 0 - the network will get stuck.

# Default stdev for weights and biases
init_std = 0.04
# Default decay, if non-zero
init_decay = 0.004

def weight_variable(shape, stddev=init_std):
  """Create a weight variable with appropriate initialization."""
  initial = tf.truncated_normal(shape, stddev)
  return tf.Variable(initial)

def bias_variable(shape, default_bias=init_std):
  """Create a bias variable with appropriate initialization."""
  initial = tf.constant(default_bias, shape=shape)
  return tf.Variable(initial)

def variable_summaries(var):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)
    with tf.name_scope('stddev'):
      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar('stddev', stddev)
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var)

def convlayer(layer_name, input_tensor, receptive_field, channels_in, channels_out,
              padding='SAME', stride=1, act=tf.nn.relu, decay=0,
              pool=False, pooler=tf.nn.max_pool, pool_size=2, pool_stride=2, pool_padding='SAME'):
  """General purpose convolutional layer, followed by pooling

  It does a matrix convolution, bias add, and then uses relu by default to nonlinearize.
  Then it pools using max pooling by default.
  It also sets up name scoping so that the resultant graph is easy to read,
  and adds a number of summary ops for TensorBoard.
  """
  # Adding a name scope ensures logical grouping of the layers in the graph.
  with tf.name_scope(layer_name):
    # This Variable will hold the state of the weights for the layer
    with tf.name_scope('weights'):
      weights = weight_variable([receptive_field, receptive_field, channels_in, channels_out])
      variable_summaries(weights)
      
      if decay > 0:
          weight_decay = tf.multiply(tf.nn.l2_loss(weights), decay, name='weight_decay')
          tf.add_to_collection('losses', weight_decay)
  
    with tf.name_scope('biases'):
      biases = bias_variable([channels_out])
      variable_summaries(biases)
    with tf.name_scope('W_conv_x_plus_b'):
      preactivate = tf.nn.conv2d(input_tensor, weights, 
                                 strides=[1, stride, stride, 1], 
                                 padding=padding) + biases
      tf.summary.histogram('pre_activations', preactivate)
      
    activations = act(preactivate, name='activation')
    tf.summary.histogram('activations', activations)
    
    if pool:
      max_pool = pooler(activations, ksize=[1, pool_size, pool_size, 1], 
                      strides=[1, pool_stride, pool_stride, 1],
                      padding=pool_padding)

      tf.summary.histogram('pools', max_pool)
      return max_pool
    else: 
      return activations
    
def nn_layer(layer_name, input_tensor, input_dim, output_dim, act=tf.nn.relu, decay=0):
  """Reusable code for making a normal neural net layer.
  It does a matrix multiply, bias add, and then uses ReLU to nonlinearize.
  It also sets up name scoping so that the resultant graph is easy to read,
  and adds a number of summary ops.
  """
  # Adding a name scope ensures logical grouping of the layers in the graph.
  with tf.name_scope(layer_name):
    # This Variable will hold the state of the weights for the layer
    with tf.name_scope('weights'):
      weights = weight_variable([input_dim, output_dim])
      
      if decay > 0:
        weight_decay = tf.multiply(tf.nn.l2_loss(weights), decay, name='weight_decay')
        tf.add_to_collection('losses', weight_decay)

      variable_summaries(weights)
    with tf.name_scope('biases'):
      biases = bias_variable([output_dim])
      variable_summaries(biases)
    with tf.name_scope('Wx_plus_b'):
      preactivate = tf.matmul(input_tensor, weights) + biases
      tf.summary.histogram('pre_activations', preactivate)
    activations = act(preactivate, name='activation')
    tf.summary.histogram('activations', activations)
    return activations

In [7]:
def run():
  ts = datetime.now().strftime('%Y%m%d_%H%M')
  logs_path = "logs/{}/".format(ts)

  # RESET TF GRAPH, just in case
  tf.reset_default_graph()

  # Input placeholders
  with tf.name_scope('input'):
    x = tf.placeholder(tf.float32, shape=[None, image_size*image_size, input_channels], name="x-input") 
    y_ = tf.placeholder(tf.float32, shape=[None, n_classes], name="y-input")

  with tf.name_scope('input_reshape'):
    image_shaped_input = tf.reshape(x, [-1, image_size, image_size, input_channels])
    tf.summary.image('input', image_shaped_input, 10)

  conv1 = convlayer(layer_name='conv1', input_tensor=image_shaped_input, receptive_field=5, 
                      channels_in=input_channels, channels_out=64, pool=True, pool_size=3, pool_stride=2)

  conv2 = convlayer(layer_name='conv2', input_tensor=conv1, receptive_field=5, 
                    channels_in=64, channels_out=64, pool=True, pool_size=3, pool_stride=2)

  dim = 1 # Compute how many numbers we have, ignoring the batch size
  for d in conv2.get_shape()[1:].as_list():
    dim *= d

  with tf.name_scope('conv2_reshape'):
    conv_shaped = tf.reshape(conv2, [-1, dim])

  fc1 = nn_layer(layer_name='fc1', input_tensor=conv_shaped, input_dim=dim, output_dim=384, decay=init_decay)

  with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32)
    tf.summary.scalar('dropout_keep_probability', keep_prob)
    dropped = tf.nn.dropout(fc1, keep_prob)

  fc2 = nn_layer(layer_name='fc2', input_tensor=fc1, input_dim=384, output_dim=192, decay=init_decay)

  with tf.name_scope('dropout2'):
    dropped2 = tf.nn.dropout(fc2, keep_prob)

  # Do not apply softmax activation yet! use the identity
  y = nn_layer(layer_name='output', input_tensor=dropped2, input_dim=192, output_dim=10, act=tf.identity) 
  
  with tf.name_scope('cross_entropy'):
    # The raw formulation of cross-entropy,
    # can be numerically unstable.
    #
    # So here we use tf.nn.softmax_cross_entropy_with_logits on the
    # raw outputs of the nn_layer above, and then average across the batch
    diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
    with tf.name_scope('total'):
      cross_entropy = tf.reduce_mean(diff)
      tf.add_to_collection('losses', cross_entropy)

  tf.summary.scalar('cross_entropy', cross_entropy)

  total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

  with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(
        total_loss)

  with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
      correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    with tf.name_scope('accuracy'):
      accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  tf.summary.scalar('accuracy', accuracy)

  sess = tf.InteractiveSession()

  # Merge all the summaries and write them out
  merged = tf.summary.merge_all()
  train_writer = tf.summary.FileWriter(logs_path + '/train', sess.graph)
  test_writer = tf.summary.FileWriter(logs_path + '/test')

  tf.global_variables_initializer().run()


  # Normalize by subtracting per image, per channel means
  def normalize_batch(batch):
    per_img_ch_means = batch.mean(axis=1)
    return batch - per_img_ch_means[:, np.newaxis, :]

  # Train the model, and also write summaries.
  # Every 10th step, measure test-set accuracy, and write test summaries
  # All other steps, run train_step on training data, & add training summaries

  def feed_dict(mode):
    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
    if mode == 'Train':
      batch_x, batch_y = svhn_train.next_batch(batch_size)
      keep_proba = train_keep_prob
    elif mode == 'Test':
      batch_x, batch_y = svhn_test.next_batch(test_batch_size)
      keep_proba = 1.0
    elif mode == 'Train_no_drop':
      batch_x, batch_y = svhn_train.next_batch(batch_size)
      keep_proba = 1.0      
    elif mode == 'Final':
      batch_x, batch_y = svhn_test.next_batch(svhn_test.num_examples)
      keep_proba = 1.0      
    batch_x = normalize_batch(batch_x)
    return {x: batch_x, y_: batch_y, keep_prob: keep_proba}

  for epoch in xrange(training_epochs):
    for batch_num in xrange(total_batches):
      if batch_num % test_every == test_every - 1:  
        # Record summaries and accuracy on the *test* set
        summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(mode='Test'))
        test_writer.add_summary(summary, epoch * total_batches + batch_num)
        print('Test accuracy at epoch %s - batch %s: %s' % (epoch, batch_num, acc))

        # Now proceed to produce training summary too
        summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(mode='Train_no_drop'))
        train_writer.add_summary(summary, i)
      else:
        sess.run([train_step], feed_dict=feed_dict(mode='Train'))
        
        
  print "\nOptimization Finished!\n"
  
  acc = sess.run([accuracy], feed_dict=feed_dict(mode='Final'))
  print('Accuracy at finish: %s' % (acc))

  print "-"* 70
  pwd = os.getcwd()+"/"
  print("Run the following to start tensorboard server:\n" \
        "tensorboard --logdir=/{}{}".format(pwd, logs_path))

  train_writer.close()
  test_writer.close()

In [None]:
##########################################
##                                      ##
##              Parameters              ##
##                                      ##
##########################################
# Training Parameters
learning_rate = 0.001
training_epochs = 5

batch_size = 32 # 128
total_batches = int(0.1 * svhn_train.num_examples / batch_size) # train on 10% of the data

test_every = 100 # record test accuracy every 16 batches
test_batch_size = int(0.1*svhn_test.num_examples) # test on 5% of the data

# Keep probability 
train_keep_prob = 1.0 # no drop out

run()

# To Do

In [None]:
# with tf.name_scope('Optimizer'):
#     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    
#     # Op to calculate every variable gradient
#     grads = tf.gradients(loss, tf.trainable_variables())
#     grads = list(zip(grads, tf.trainable_variables()))
    
#     # Op to update all variables according to their gradient
#     apply_grads = optimizer.apply_gradients(grads_and_vars=grads)

# # Summaries to visualize gradients
# for grad, var in grads:
#     var_name = var.name.replace(":", "_") # to suppress the pesky warning
#     tf.summary.histogram(var_name + '_gradient', grad)

In [None]:
# http://stackoverflow.com/questions/34696845/how-to-see-multiple-images-through-tf-image-summary

In [None]:
# https://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow/38325288#38325288