Deep Learning
=============

Assignment 3
------------

Previously in `2_fullyconnected.ipynb`, you trained a logistic regression and a neural network model.

The goal of this assignment is to explore regularization techniques.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

First reload the data we generated in _notmist.ipynb_.

In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a shape that's more adapted to the models we're going to train:
- data as a flat matrix,
- labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [40]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

---
Problem 1
---------

Introduce and tune L2 regularization for both logistic and neural network models. Remember that L2 amounts to adding a penalty on the norm of the weights to the loss. In TensorFlow, you can compute the L2 loss for a tensor `t` using `nn.l2_loss(t)`. The right amount of regularization should improve your validation / test accuracy.

---

In [7]:
import numpy as np
pickle_file = 'eql_lsts.pickle'
eql_lsts = np.load(pickle_file)
apx_eql_lst = eql_lsts["apx_lst"]

In [8]:
import itertools
bad_train_ix = map(lambda x: x[0], apx_eql_lst)
good_train_ix = list(filter(lambda i: i not in bad_train_ix, 
                            range(train_dataset.shape[0])))

In [13]:
import copy
import random
num_steps = 2001
# ix list for actual SGD
def random_permutation(iterable):
  return np.random.permutation(len(iterable))
  # # fisher/yates:
  # "l - random selection from permutations(iterable)"
  # l = copy.deepcopy(iterable)
  # n = len(l)
  # for i in range(n-1):
  #   j = random.randrange(n-i)
  #   t = l[i]
  #   l[i] = l[i+j]
  #   l[i+j] = l[i]
  # return l
rand_train_ix = random_permutation(good_train_ix)

In [17]:
batch_size = 128
num_hidden = 1024*16
beta = .01
keep_prob = 0.5
graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  weights0 = tf.Variable(
    tf.truncated_normal([image_size * image_size, num_hidden]))
  biases0 = tf.Variable(tf.zeros([num_hidden]))
  weights1 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
  biases1 = tf.Variable(tf.truncated_normal([num_labels]))

  # hidden
  hidden_dataset = tf.nn.relu(tf.matmul(tf_train_dataset, weights0) + biases0)


  # Training computation.
  logits = tf.matmul(hidden_dataset, weights1) + biases1
  loss = (tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))+
          beta*(tf.nn.l2_loss(weights0)+tf.nn.l2_loss(biases0)+
                tf.nn.l2_loss(weights1)+tf.nn.l2_loss(biases1)))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)

  valid_hidden = tf.nn.relu(tf.matmul(tf_valid_dataset, weights0) + biases0)
  valid_prediction = tf.nn.softmax(
    tf.matmul(valid_hidden, weights1) + biases1)
  test_hidden = tf.nn.relu(tf.matmul(tf_test_dataset, weights0) + biases0)
  test_prediction = tf.nn.softmax(tf.matmul(test_hidden, weights1) + biases1)

In [15]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [16]:
offset = 0
with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    last_offset = offset
    offset = (step * batch_size) % (len(rand_train_ix) - batch_size)
    if offset < last_offset:
      rand_train_ix = random_permutation(good_train_ix)
    # Generate a minibatch.
    batch_data = train_dataset[rand_train_ix[offset:(offset + batch_size)], :]
    batch_labels = train_labels[rand_train_ix[offset:(offset + batch_size)], :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 7347.640625
Minibatch accuracy: 5.5%
Validation accuracy: 37.0%


Minibatch loss at step 500: 0.876382
Minibatch accuracy: 96.1%
Validation accuracy: 78.7%


Minibatch loss at step 1000: 0.620260
Minibatch accuracy: 92.2%
Validation accuracy: 78.6%


Minibatch loss at step 1500: 0.658640
Minibatch accuracy: 88.3%
Validation accuracy: 78.3%


Minibatch loss at step 2000: 0.555534
Minibatch accuracy: 93.8%
Validation accuracy: 76.1%
Test accuracy: 66.2%


---
Problem 2
---------
Let's demonstrate an extreme case of overfitting. Restrict your training data to just a few batches. What happens?

---

In [15]:
import numpy as np
pickle_file = 'eql_lsts.pickle'
eql_lsts = np.load(pickle_file)
apx_eql_lst = eql_lsts["apx_lst"]

In [16]:
import itertools
bad_train_ix = map(lambda x: x[0], apx_eql_lst)
good_train_ix = list(filter(lambda i: i not in bad_train_ix, 
                            range(train_dataset.shape[0])))
num_train = 800
good_train_ix = good_train_ix[:num_train]

In [17]:
import copy
import random
num_steps = 2001
# ix list for actual SGD
def random_permutation(iterable):
  return np.random.permutation(len(iterable))
  # # fisher/yates:
  # "l - random selection from permutations(iterable)"
  # l = copy.deepcopy(iterable)
  # n = len(l)
  # for i in range(n-1):
  #   j = random.randrange(n-i)
  #   t = l[i]
  #   l[i] = l[i+j]
  #   l[i+j] = l[i]
  # return l
rand_train_ix = fisher_yates_sampling(good_train_ix)

In [18]:
batch_size = 128
num_hidden = 1024
beta = .01
graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  weights0 = tf.Variable(
    tf.truncated_normal([image_size * image_size, num_hidden]))
  biases0 = tf.Variable(tf.zeros([num_hidden]))
  weights1 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
  biases1 = tf.Variable(tf.truncated_normal([num_labels]))

  # hidden
  hidden_dataset = tf.nn.relu(tf.matmul(tf_train_dataset, weights0) + biases0)

  # Training computation.
  logits = tf.matmul(hidden_dataset, weights1) + biases1
  loss = (tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))+
          beta*(tf.nn.l2_loss(weights0)+tf.nn.l2_loss(biases0)+
                tf.nn.l2_loss(weights1)+tf.nn.l2_loss(biases1)))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)

  valid_hidden = tf.nn.relu(tf.matmul(tf_valid_dataset, weights0) + biases0)
  valid_prediction = tf.nn.softmax(
    tf.matmul(valid_hidden, weights1) + biases1)
  test_hidden = tf.nn.relu(tf.matmul(tf_test_dataset, weights0) + biases0)
  test_prediction = tf.nn.softmax(tf.matmul(test_hidden, weights1) + biases1)

In [19]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [20]:
offset = 0
with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    last_offset = offset
    offset = (step * batch_size) % (len(rand_train_ix) - batch_size)
    if offset < last_offset:
      rand_train_ix = fisher_yates_sampling(good_train_ix)
    # Generate a minibatch.
    batch_data = train_dataset[rand_train_ix[offset:(offset + batch_size)], :]
    batch_labels = train_labels[rand_train_ix[offset:(offset + batch_size)], :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Test accuracy: 68.6%


Validation accuracy: 79.3%


Minibatch loss at step 2000: 0.490076
Minibatch accuracy: 96.9%


Validation accuracy: 80.4%


Minibatch loss at step 1500: 0.414823
Minibatch accuracy: 99.2%


Validation accuracy: 81.4%


Minibatch loss at step 1000: 0.400443
Minibatch accuracy: 100.0%


Validation accuracy: 81.8%


Minibatch loss at step 500: 0.780712
Minibatch accuracy: 97.7%


Validation accuracy: 30.3%


Initialized
Minibatch loss at step 0: 6528.056152
Minibatch accuracy: 13.3%


---
Problem 3
---------
Introduce Dropout on the hidden layer of the neural network. Remember: Dropout should only be introduced during training, not evaluation, otherwise your evaluation results would be stochastic as well. TensorFlow provides `nn.dropout()` for that, but you have to make sure it's only inserted during training.

What happens to our extreme overfitting case?

---

In [84]:
import numpy as np
pickle_file = 'eql_lsts.pickle'
eql_lsts = np.load(pickle_file)
apx_eql_lst = eql_lsts["apx_lst"]

In [85]:
import itertools
bad_train_ix = list(map(lambda x: x[0], apx_eql_lst))
good_train_ix = list(filter(lambda i: i not in bad_train_ix, 
                            range(train_dataset.shape[0])))

In [61]:
train_fraction = .01
num_train = round(train_fraction*len(good_train_ix))
actual_train_ix = good_train_ix[:num_train]

In [66]:
import copy
import random
num_steps = 1001
# ix list for actual SGD
def random_permutation(iterable):
  return np.random.permutation(len(iterable))
  # # fisher/yates:
  # "l - random selection from permutations(iterable)"
  # l = copy.deepcopy(iterable)
  # n = len(l)
  # for i in range(n-1):
  #   j = random.randrange(n-i)
  #   t = l[i]
  #   l[i] = l[i+j]
  #   l[i+j] = l[i]
  # return l
rand_train_ix = random_permutation(actual_train_ix)

In [67]:
batch_size = 128
keep_prob = .5                  # 0<keep_prob<=1
num_hidden = 1024*4
beta = .01
graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  weights0 = tf.Variable(
    tf.truncated_normal([image_size * image_size, num_hidden]))
  biases0 = tf.Variable(tf.zeros([num_hidden]))
  weights1 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
  biases1 = tf.Variable(tf.truncated_normal([num_labels]))

  # hidden
  hidden_dataset = tf.nn.relu(tf.matmul(tf_train_dataset, weights0) + biases0)
  hidden_drop = tf.nn.dropout(hidden_dataset,keep_prob)*(1/keep_prob)

  # Training computation.
  logits = tf.matmul(hidden_drop, weights1) + biases1
  loss = (tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))+
          beta*(tf.nn.l2_loss(weights0)+tf.nn.l2_loss(biases0)+
                tf.nn.l2_loss(weights1)+tf.nn.l2_loss(biases1)))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)

  valid_hidden = tf.nn.relu(tf.matmul(tf_valid_dataset, weights0) + biases0)
  valid_prediction = tf.nn.softmax(
    tf.matmul(valid_hidden, weights1) + biases1)
  test_hidden = tf.nn.relu(tf.matmul(tf_test_dataset, weights0) + biases0)
  test_prediction = tf.nn.softmax(tf.matmul(test_hidden, weights1) + biases1)

In [68]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [69]:
offset = 0
with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    last_offset = offset
    offset = (step * batch_size) % (len(rand_train_ix) - batch_size)
    if offset < last_offset:
      rand_train_ix = random_permutation(actual_train_ix)
    # Generate a minibatch.
    batch_data = train_dataset[rand_train_ix[offset:(offset + batch_size)], :]
    batch_labels = train_labels[rand_train_ix[offset:(offset + batch_size)], :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Test accuracy: 69.6%


Minibatch loss at step 2000: 0.485670
Minibatch accuracy: 94.5%
Validation accuracy: 80.7%


Minibatch loss at step 1500: 0.527620
Minibatch accuracy: 96.9%
Validation accuracy: 82.2%


Minibatch loss at step 1000: 1.110798
Minibatch accuracy: 95.3%
Validation accuracy: 83.2%


Minibatch loss at step 500: 98.461121
Minibatch accuracy: 99.2%
Validation accuracy: 82.6%


Validation accuracy: 35.0%


Initialized
Minibatch loss at step 0: 14592.014648
Minibatch accuracy: 12.5%


---
Problem 4
---------

Try to get the best performance you can using a multi-layer model! The best reported test accuracy using a deep network is [97.1%](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html?showComment=1391023266211#c8758720086795711595).

One avenue you can explore is to add multiple layers.

Another one is to use learning rate decay:

    global_step = tf.Variable(0)  # count the number of steps taken.
    learning_rate = tf.train.exponential_decay(0.5, global_step, ...)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
 
 ---


In [97]:
import numpy as np
pickle_file = 'eql_lsts.pickle'
eql_lsts = np.load(pickle_file)
apx_eql_lst = eql_lsts["apx_lst"]

In [98]:
import itertools
bad_train_ix = list(map(lambda x: x[0], apx_eql_lst))
good_train_ix = list(filter(lambda i: i not in bad_train_ix, 
                            range(train_dataset.shape[0])))

In [99]:
train_fraction = 1
num_train = round(train_fraction*len(good_train_ix))
actual_train_ix = good_train_ix[:num_train]

In [100]:
import copy
import random
# ix list for actual SGD
def random_permutation(iterable):
  return np.random.permutation(len(iterable))
  # # fisher/yates:
  # "l - random selection from permutations(iterable)"
  # l = copy.deepcopy(iterable)
  # n = len(l)
  # for i in range(n-1):
  #   j = random.randrange(n-i)
  #   t = l[i]
  #   l[i] = l[i+j]
  #   l[i+j] = l[i]
  # return l
rand_train_ix = random_permutation(actual_train_ix)

In [112]:
parameters = {
  'num_steps':6501,
  'batch_size':128,
  'keep_prob':.8,                  # 0<keep_prob<=1
  'learning_rate':[
    0.001,          # Base learning rate.
    128,           # Current index into the dataset (multiply by batch size).
    num_train,     # Decay steps.
    0.8           # Decay rate.
    ],
  'beta':.01,   # regularization parameter
  'num_hidden':[2^10,2^10],
  'layer_fcn':[tf.nn.relu,tf.nn.relu] ,
  'num_hidden_layers':2,
  'momentum':.9,
  'opt_fcn':tf.train.MomentumOptimizer # AdamOptimizer,MomentumOptimizer,GradientDescentOptimizer
}
assert parameters['num_hidden_layers'] == len(parameters['layer_fcn']) == len(parameters['num_hidden'])
graph = tf.Graph()
with graph.as_default():
  batch = tf.Variable(0)
  learning_rate = tf.train.exponential_decay(
    parameters['learning_rate'][0],
    parameters['learning_rate'][1]*batch,
    *parameters['learning_rate'][2:],
    staircase=True)
  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(parameters['batch_size'], image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(parameters['batch_size'], num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables
  weights = [tf.Variable(
    tf.truncated_normal([image_size * image_size, parameters['num_hidden'][0]]))]
  biases = [tf.Variable(tf.zeros([parameters['num_hidden'][0]]))]
  hidden_dataset = tf.nn.dropout(
    parameters['layer_fcn'][0](tf.add(tf.matmul(tf_train_dataset, weights[0]), biases[0])), 
    parameters['keep_prob'])*(1/parameters['keep_prob'])

  for l in range(1,parameters['num_hidden_layers']):
    weights += [tf.Variable(tf.truncated_normal([parameters['num_hidden'][l-1], parameters['num_hidden'][l]]))]
    biases += [tf.Variable(tf.zeros([parameters['num_hidden'][l]]))]
    hidden_dataset = tf.nn.dropout(
      parameters['layer_fcn'][l](tf.add(tf.matmul(hidden_dataset, weights[l]), biases[l])),
      parameters['keep_prob'])*(1/parameters['keep_prob'])


  weights += [tf.Variable(tf.truncated_normal([parameters['num_hidden'][-1], num_labels]))]
  biases += [tf.Variable(tf.zeros([num_labels]))]
 
  # Training computation.
  logits = tf.matmul(hidden_dataset, weights[-1]) + biases[-1]
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
  for i in range(parameters['num_hidden_layers']+1):
    loss += parameters['beta']*(tf.nn.l2_loss(weights[i])+tf.nn.l2_loss(biases[i]))

  # Optimizer
  # optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  optimizer = parameters['opt_fcn'](
    learning_rate,
    parameters['momentum']).minimize(loss, global_step=batch)
  # optimizer = parameters['opt_fcn'](
  #   learning_rate=parameters['learning_rate'], 
  #   global_step=parameters['global_step']).minimize(loss)
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)

  valid_hidden = parameters['layer_fcn'][0](tf.matmul(tf_valid_dataset, weights[0]) + 
                              biases[0])
  test_hidden = parameters['layer_fcn'][0](tf.matmul(tf_test_dataset, weights[0]) + 
                             biases[0])
  for l in range(1,parameters['num_hidden_layers']):
    valid_hidden = parameters['layer_fcn'][l](tf.matmul(valid_hidden, weights[l]) +
                                biases[l])
    test_hidden = parameters['layer_fcn'][l](tf.matmul(test_hidden, weights[l]) + 
                               biases[l])

  valid_prediction = tf.nn.softmax(tf.matmul(valid_hidden, weights[-1]) + 
                                   biases[-1])
  test_prediction = tf.nn.softmax(tf.matmul(test_hidden, weights[-1]) + 
                                  biases[-1])

In [102]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [113]:
offset = 0
test_accuracy = 0
with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(parameters['num_steps']):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    last_offset = offset
    offset = (step * parameters['batch_size']) % (len(rand_train_ix) - parameters['batch_size'])
    if offset < last_offset:
      rand_train_ix = random_permutation(actual_train_ix)
    # Generate a minibatch.
    batch_data = train_dataset[rand_train_ix[offset:(offset + parameters['batch_size'])], :]
    batch_labels = train_labels[rand_train_ix[offset:(offset + parameters['batch_size'])], :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  test_accuracy = accuracy(test_prediction.eval(), test_labels)
  print("Test accuracy: %.1f%%" % test_accuracy)

Test accuracy: 25.1%


Minibatch loss at step 6500: 11.521459
Minibatch accuracy: 28.9%
Validation accuracy: 28.7%


Minibatch loss at step 6000: 12.083858
Minibatch accuracy: 21.9%
Validation accuracy: 27.6%


Minibatch loss at step 5500: 12.607504
Minibatch accuracy: 21.1%
Validation accuracy: 26.5%


Minibatch loss at step 5000: 13.112435
Minibatch accuracy: 28.1%
Validation accuracy: 25.1%


Minibatch loss at step 4500: 13.792360
Minibatch accuracy: 22.7%
Validation accuracy: 23.3%


Minibatch loss at step 4000: 14.609591
Minibatch accuracy: 18.8%
Validation accuracy: 22.4%


Minibatch loss at step 3500: 15.401945
Minibatch accuracy: 18.8%
Validation accuracy: 21.5%


Minibatch loss at step 3000: 16.365429
Minibatch accuracy: 21.1%
Validation accuracy: 20.2%


Minibatch loss at step 2500: 17.581362
Minibatch accuracy: 17.2%
Validation accuracy: 19.9%


Minibatch loss at step 2000: 18.865662
Minibatch accuracy: 14.8%
Validation accuracy: 18.9%


Minibatch loss at step 1500: 20.277245
Minibatch accuracy: 19.5%
Validation accuracy: 18.5%


Minibatch loss at step 1000: 22.221041
Minibatch accuracy: 25.0%
Validation accuracy: 17.8%


Minibatch loss at step 500: 24.418331
Minibatch accuracy: 11.7%
Validation accuracy: 17.3%


Initialized
Minibatch loss at step 0: 65.115540
Minibatch accuracy: 11.7%
Validation accuracy: 8.8%


In [114]:
with open("results.txt", "a") as myfile:
  myfile.write(str(parameters))
  myfile.write("\n"+str(test_accuracy))