In [109]:

# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)


image_size = 28
num_labels = 10

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)


#train_dataset = train_dataset[:512, :]
#train_labels = train_labels[:512, :]

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])



Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)
Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [113]:
"""
turn the logistic regression example with SGD into a 1 hidden layer nn 
with rectified linar units nn.relu() and 1024 hidden nodes
"""

batch_size = 128
layer1_size = 1024
layer2_size = 1024
layer3_size = 305
layer4_size = 75
# note, logits_size == num_labels

layer1_weights_stdev = 0.0517
layer2_weights_stdev = 0.0441
layer3_weights_stdev = 0.0441
layer4_weights_stdev = 0.0809
logits_weights_stdev = 0.1632

regularization_meta = 0.03
keepprob = 0.75

graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # layer weights.
  layer1_weights = tf.Variable(tf.truncated_normal([image_size * image_size, layer1_size], stddev=layer1_weights_stdev))
  layer2_weights = tf.Variable(tf.truncated_normal([layer1_size, layer2_size], stddev=layer2_weights_stdev))
  layer3_weights = tf.Variable(tf.truncated_normal([layer2_size, layer3_size], stddev=layer3_weights_stdev))
  layer4_weights = tf.Variable(tf.truncated_normal([layer3_size, layer4_size], stddev=layer4_weights_stdev))
  logits_weights = tf.Variable(tf.truncated_normal([layer4_size, num_labels], stddev=logits_weights_stdev))

  # layer biases
  layer1_biases = tf.Variable(tf.zeros([layer1_size]))
  layer2_biases = tf.Variable(tf.zeros([layer2_size]))
  layer3_biases = tf.Variable(tf.zeros([layer3_size]))
  layer4_biases = tf.Variable(tf.zeros([layer4_size]))
  logits_biases = tf.Variable(tf.zeros([num_labels]))
  
  def apply_layers(dataset, use_dropout=False):
    layer1 = tf.nn.relu(tf.matmul(dataset, layer1_weights) + layer1_biases)
    if use_dropout:
        layer1 = tf.nn.dropout(layer1, keepprob)
    
    layer2 = tf.nn.relu(tf.matmul(layer1, layer2_weights) + layer2_biases)
    if use_dropout:
        layer2 = tf.nn.dropout(layer2, keepprob)

    layer3 = tf.nn.relu(tf.matmul(layer2, layer3_weights) + layer3_biases)
    if use_dropout:
        layer3 = tf.nn.dropout(layer3, keepprob)
        
    layer4 = tf.nn.relu(tf.matmul(layer3, layer4_weights) + layer4_biases)
    if use_dropout:
        layer4 = tf.nn.dropout(layer4, keepprob)
    
    logits = tf.matmul(layer4, logits_weights) + logits_biases
    return logits

  # Training computation.
  train_logits = apply_layers(tf_train_dataset, True)
  valid_logits = apply_layers(tf_valid_dataset)
  test_logits = apply_layers(tf_test_dataset)

  train_loss = tf.reduce_mean(
      tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=train_logits))
  #loss_l2 = train_loss + (regularization_meta * (tf.nn.l2_loss(weights2)))
  
  # Optimizer.
  global_step = tf.Variable(0, trainable=False)  # count the number of steps taken.
  learning_rate = tf.train.exponential_decay(0.3, global_step, 3500, 0.86, staircase=True)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(train_loss, global_step=global_step)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(train_logits)
  valid_prediction = tf.nn.softmax(valid_logits)
  test_prediction = tf.nn.softmax(test_logits)

In [114]:
num_steps = 95001


with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print("Initialized")
    
    
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, train_loss, train_prediction], feed_dict=feed_dict)
    if (step % 1000 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 2.334041
Minibatch accuracy: 17.2%
Validation accuracy: 19.0%
Minibatch loss at step 1000: 0.403549
Minibatch accuracy: 89.1%
Validation accuracy: 86.9%
Minibatch loss at step 2000: 0.457989
Minibatch accuracy: 82.8%
Validation accuracy: 88.6%
Minibatch loss at step 3000: 0.324150
Minibatch accuracy: 88.3%
Validation accuracy: 89.0%
Minibatch loss at step 4000: 0.400973
Minibatch accuracy: 88.3%
Validation accuracy: 89.5%
Minibatch loss at step 5000: 0.304499
Minibatch accuracy: 91.4%
Validation accuracy: 90.0%
Minibatch loss at step 6000: 0.318201
Minibatch accuracy: 89.1%
Validation accuracy: 89.9%
Minibatch loss at step 7000: 0.222132
Minibatch accuracy: 94.5%
Validation accuracy: 90.4%
Minibatch loss at step 8000: 0.204564
Minibatch accuracy: 92.2%
Validation accuracy: 90.7%
Minibatch loss at step 9000: 0.268605
Minibatch accuracy: 89.8%
Validation accuracy: 91.1%
Minibatch loss at step 10000: 0.310920
Minibatch accuracy: 93.0%
Validation accur

Validation accuracy: 92.4%
Minibatch loss at step 90000: 0.183556
Minibatch accuracy: 94.5%
Validation accuracy: 92.5%
Minibatch loss at step 91000: 0.176357
Minibatch accuracy: 93.8%
Validation accuracy: 92.5%
Minibatch loss at step 92000: 0.065052
Minibatch accuracy: 98.4%
Validation accuracy: 92.5%
Minibatch loss at step 93000: 0.080302
Minibatch accuracy: 97.7%
Validation accuracy: 92.5%
Minibatch loss at step 94000: 0.155420
Minibatch accuracy: 96.1%
Validation accuracy: 92.4%
Minibatch loss at step 95000: 0.145496
Minibatch accuracy: 94.5%
Validation accuracy: 92.4%
Test accuracy: 97.0%
