Deep Learning
=============

# This is an attempt to use TensorFlow shared variables
------------

The goal of this is to try to figure out how to apply the optimized weights to the validation and testing sets while utilizing shared variables. Currently the weights are not saved from the training when applying them to the validation set. 

In [4]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [5]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [6]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [7]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [19]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
eval_batch_size = 100

def _variable_on_cpu(name, shape, initializer):
    """Helper to create a Variable stored on CPU memory.
      Returns:
        Variable Tensor
    """
    with tf.device('/cpu:0'):
        var = tf.get_variable(name, shape, initializer=initializer)
    return var

def _variable_with_weight_decay(name, shape, stddev, wd=None):
  """Helper to create an initialized Variable with weight decay.
  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.
  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.
  Returns:
    Variable Tensor
  """
  var = _variable_on_cpu(
      name,
      shape,
      tf.truncated_normal_initializer(stddev=stddev))
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var

def conv_relu(data, kernel_shape, bias_shape, initializer):
    weights = _variable_with_weight_decay('weights',
              shape=kernel_shape, 
              stddev=0.1)
    biases = _variable_on_cpu('biases', bias_shape, initializer)
    conv = tf.nn.conv2d(data, weights, strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv + biases)

def matmul(data, kernel_shape, bias_shape, initializer):
    weights = _variable_with_weight_decay('weights',
              shape=kernel_shape, 
              stddev=0.1)
    biases = _variable_on_cpu('biases', bias_shape, initializer)
    matmul = tf.matmul(data, weights)
    return matmul + biases

def model(data):
    with tf.variable_scope("conv1"):
        relu_1 = conv_relu(
            data,
            [patch_size, patch_size, num_channels, depth],
            [depth],
            tf.constant_initializer(0.0))
        pool_1 = tf.nn.max_pool(relu_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],padding='SAME')
        
    with tf.variable_scope("conv2"):
        relu_2 = conv_relu(
            pool_1,
            [patch_size, patch_size, depth, depth],
            [depth],
            tf.constant_initializer(1.0))
        pool_2 = tf.nn.max_pool(relu_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],padding='SAME')
        shape = pool_2.get_shape().as_list()
        reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])
       
    with tf.variable_scope("matmul1"):
        relu_3 = tf.nn.relu(matmul(
            reshape,
            [image_size // 4 * image_size // 4 * depth, num_hidden], 
            [num_hidden],
            tf.constant_initializer(1.0)))
    
    with tf.variable_scope("matmul2"):
        return matmul(
            relu_3, 
            [num_hidden, num_labels],
            [num_labels],
            tf.constant_initializer(1.0))
        
graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.placeholder(
        tf.float32, shape=(eval_batch_size, image_size, image_size, num_channels))
    tf_test_dataset = tf.placeholder(
        tf.float32, shape=(eval_batch_size, image_size, image_size, num_channels))
    
    # Training computation.
    logits = model(tf_train_dataset)
    
    loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    # Predictions for the training, validation, and test data.
    with tf.variable_scope("Train") as scope:
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))

In [12]:
num_steps = 1000

def eval_in_batches(dataset, placeholder, predictor):
    pred = []
    for step in range(100):
        offset = (step * eval_batch_size) % (valid_labels.shape[0] - eval_batch_size)
        batch_data = dataset[offset:(offset + eval_batch_size), :, :, :]
        feed_dict = {placeholder : batch_data}
        batch = session.run(
          predictor, feed_dict=feed_dict)
        for x in batch:
            pred.append(x.tolist())
    return np.array(pred)

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps+1):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 250 == 0 and step != 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        eval_in_batches(valid_dataset, tf_valid_dataset, valid_prediction), valid_labels))


Initialized
Minibatch loss at step 250: 0.745563
Minibatch accuracy: 75.0%
Validation accuracy: 10.9%
Minibatch loss at step 500: 0.314797
Minibatch accuracy: 93.8%
Validation accuracy: 10.9%
Minibatch loss at step 750: 0.702335
Minibatch accuracy: 81.2%
Validation accuracy: 10.9%
Minibatch loss at step 1000: 0.120226
Minibatch accuracy: 100.0%
Validation accuracy: 10.9%
