In [None]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import pickle

In [None]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

In [None]:
image_size = 28
num_labels = 10

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

In [None]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [None]:
#constructing/tuning a better model to increase the accurcy
"""Techniques used in this model
1. Stochastic gradient method instead of gradient method for loss optimization
2. Relu as activation funtion for hidden layer (others such as tanh mightbe tried later)
3. L2 for regularization
4. Dropout with regularization
5. Multiple hidden layers"""

In [None]:
#local tunables
batch_size = 256
hidden_units1 = 1024
hidden_units2 = 1024

beta = 0.01

tuned_graph = tf.Graph()
with tuned_graph.as_default():

  tf_train_dataset = tf.placeholder(tf.float32,shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  weights1 = tf.Variable(tf.truncated_normal([image_size * image_size, hidden_units1]))
  biases1 = tf.Variable(tf.zeros([hidden_units1]))
  
  hidden_output1 = tf.matmul(tf_train_dataset, weights1) + biases1
  relu_output1 = tf.nn.relu(hidden_output1)
    
  keep_prob = tf.placeholder("float")
  relu_output1_drop = tf.nn.dropout(relu_output1, keep_prob)


  weights2 = tf.Variable(tf.truncated_normal([hidden_units1,hidden_units2]))
  biases2 = tf.Variable(tf.zeros([hidden_units2]))
    
  hidden_output2 = tf.matmul(relu_output1_drop, weights2) + biases2
  relu_output2 = tf.nn.relu(hidden_output2)
    
  relu_output2_drop = tf.nn.dropout(relu_output2, keep_prob)
    
  weights3 = tf.Variable(tf.truncated_normal([hidden_units2, num_labels]))
  biases3 = tf.Variable(tf.zeros([num_labels]))
    
  final_output = tf.matmul(relu_output2_drop, weights3) + biases3

  #relu_final_output = tf.nn.relu(final_output)
  #dropout_layer3 = tf.nn.relu(final_output_dropout)
  
    
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(final_output, tf_train_labels))
  loss = tf.reduce_mean(loss + beta * ( tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(weights3)))
    
  # Optimizer
  global_step = tf.Variable(0)
  learning_rate_var = tf.placeholder("float")
  learning_rate = tf.train.exponential_decay(learning_rate_var, global_step, 100000, 0.95, staircase=True)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step= global_step)

  relu_output1_valid = tf.nn.relu(tf.matmul(tf_valid_dataset,weights1) + biases1)    
  relu_output2_valid = tf.nn.relu(tf.matmul(relu_output1_valid,weights2) + biases2)  

  relu_output1_test = tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1)
  relu_output2_test = tf.nn.relu(tf.matmul(relu_output1_test, weights2) + biases2)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(final_output)
  valid_prediction = tf.nn.softmax(tf.matmul(relu_output2_valid, weights3) + biases3)
  test_prediction = tf.nn.softmax(tf.matmul(relu_output2_test, weights3) + biases3)

print("Graph with hidden layer created")

In [None]:
num_steps = 6001

prob = 0.8
lr = 0.001

with tf.Session(graph=tuned_graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):

    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)

    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]

    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, keep_prob : prob, learning_rate_var : lr}
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))