In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

#Data loading

In [6]:
pickle_file = 'data/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

('Training set', (10000, 28, 28), (10000,))
('Validation set', (5000, 28, 28), (5000,))
('Test set', (5000, 28, 28), (5000,))


#Data reformating
Reformat data into shape adapted to logistic regression and multiplayer percentron model
- data as flat matrix, each images is representes of a row of 28*28 variables.
- labels as one-hot encodings: map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...] and so on

In [7]:
image_size = 28
num_labels = 10

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

('Training set', (10000, 784), (10000, 10))
('Validation set', (5000, 784), (5000, 10))
('Test set', (5000, 784), (5000, 10))


#Softmax classifier
Softmax is a simple classifier to assign probabilities to an object being one of several different things.
A softmax classifier has two steps:
1. We perform a linear transformation on the image vector $x$ of flattened pixels:
$$ x_i \rightarrow W*x_i + b$$
<img src="images/linear_model.png" height="300">
2. We convert that linear score into probabilities by appyling softmax function:
$$f(x)_i = \frac{\exp(x_i)}{\sum_{k=1}^N\exp(x_k)}$$
For training, we optimize the loss function defined by the cross-entropy.
$$\text{loss} = -\sum_{i=1}^Ny\log(\hat{y})$$
where $y$ is the true distribution (one-hot encoding labels) and $\hat{y}$ is the predicted probability distribution.
Cross-entropy is 

# TensorFlow computational graph
TensorFlow works like this:
* First you describe the computation that you want to see performed: what the inputs, the variables, and the operations look like. These get created as nodes over a computation graph. This description is all contained within the block below:

      with graph.as_default():
          ...

* Then you can run the operations on this graph as many times as you want by calling `session.run()`, providing it outputs to fetch from the graph that get returned. This runtime operation is all contained in the block below:

      with tf.Session(graph=graph) as session:
          ...


In [8]:
graph = tf.Graph()

with graph.as_default():
    # Input data.
    # Load the training, validation and test data into constants that are
    # attached to the graph.
    tf_train_dataset = tf.constant(train_dataset)
    tf_train_labels = tf.constant(train_labels)
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)

    # Variables.
    # These are the parameters that we are going to be training. The weight
    # matrix will be initialized using random valued following a (truncated)
    # normal distribution. The biases get initialized to zero.
    weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
    biases = tf.Variable(tf.zeros([num_labels]))
    

    # Training computation.
    # We multiply the inputs with the weight matrix, and add biases. We compute
    # the softmax and cross-entropy (it's one operation in TensorFlow, because
    # it's very common, and it can be optimized). We take the average of this
    # cross-entropy across all training examples: that's our loss.
    logits = tf.matmul(tf_train_dataset, weights) + biases
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

    # Optimizer.
    # We are going to find the minimum of this loss using gradient descent.
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

    # Predictions for the training, validation, and test data.
    # These are not part of training, but merely here so that we can report
    # accuracy figures as we train.
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [12]:
num_steps = 801

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
    # This is a one-time operation which ensures the parameters get initialized as
    # we described in the graph: random weights for the matrix, zeros for the
    # biases. 
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Run the computations. We tell .run() that we want to run the optimizer,
        # and get the loss value and the training predictions returned as numpy
        # arrays.
        _, l, predictions = session.run([optimizer, loss, train_prediction])
        if (step % 100 == 0):
            print('Loss at step %d: %f' % (step, l))
            print('Training accuracy: %.1f%%' % accuracy(predictions, train_labels))
            # Calling .eval() on valid_prediction is basically like calling run(), but
            # just to get that one numpy array. Note that it recomputes all its graph
            # dependencies.
            print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Loss at step 0: 16.179237
Training accuracy: 13.3%
Validation accuracy: 15.9%
Loss at step 100: 2.261353
Training accuracy: 72.6%
Validation accuracy: 70.9%
Loss at step 200: 1.838964
Training accuracy: 75.2%
Validation accuracy: 73.4%
Loss at step 300: 1.600083
Training accuracy: 76.6%
Validation accuracy: 74.5%
Loss at step 400: 1.435220
Training accuracy: 77.4%
Validation accuracy: 74.7%
Loss at step 500: 1.311547
Training accuracy: 78.1%
Validation accuracy: 75.0%
Loss at step 600: 1.213903
Training accuracy: 78.8%
Validation accuracy: 75.1%
Loss at step 700: 1.134147
Training accuracy: 79.2%
Validation accuracy: 75.1%
Loss at step 800: 1.067530
Training accuracy: 79.5%
Validation accuracy: 75.4%
Test accuracy: 83.2%


#Optional: Tensorboard
Tensorboard is visualization tool of TensorFlow's learning tasks.
You can use TensorBoard to visualize your TensorFlow graph, plot quantitative metrics about the execution of your graph, and show additional data like images that pass through it.

For further information, you could see https://www.tensorflow.org/versions/r0.7/how_tos/summaries_and_tensorboard/index.html

In [23]:
graph = tf.Graph()

with graph.as_default():
    # Input data.
    # Load the training, validation and test data into constants that are
    # attached to the graph.
    tf_train_dataset = tf.constant(train_dataset)
    tf_train_labels = tf.constant(train_labels)
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    tf.image_summary("img", tf.reshape(tf_train_dataset, [len(train_dataset), image_size, image_size, 1]))

    # Variables.
    # These are the parameters that we are going to be training. The weight
    # matrix will be initialized using random valued following a (truncated)
    # normal distribution. The biases get initialized to zero.
    weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
    biases = tf.Variable(tf.zeros([num_labels]))
        
    
    

    # Training computation.
    # We multiply the inputs with the weight matrix, and add biases. We compute
    # the softmax and cross-entropy (it's one operation in TensorFlow, because
    # it's very common, and it can be optimized). We take the average of this
    # cross-entropy across all training examples: that's our loss.
    
    #Use a name scope to organize nodes in the graph visualizer
    with tf.name_scope("softmax"):
        logits = tf.matmul(tf_train_dataset, weights) + biases
        train_prediction = tf.nn.softmax(logits)
    
    # Add summary ops to collect data
    tf.histogram_summary('weights', weights)
    tf.histogram_summary('biases', biases)
    tf.histogram_summary('y', train_prediction)
    
    with tf.name_scope("loss"):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
    tf.scalar_summary('cross entropy', loss)

    # Optimizer.
    # We are going to find the minimum of this loss using gradient descent.
    with tf.name_scope("training"):
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

    # Predictions for the training, validation, and test data.
    # These are not part of training, but merely here so that we can report
    # accuracy figures as we train.
    
    
    with tf.name_scope("validation"):
        valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
        test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
        
    merged = tf.merge_all_summaries()

In [37]:
num_steps = 801

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
    # This is a one-time operation which ensures the parameters get initialized as
    # we described in the graph: random weights for the matrix, zeros for the
    # biases. 
    
    writer = tf.train.SummaryWriter("softmax_logs", session.graph.as_graph_def(add_shapes=True))
    
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Run the computations. We tell .run() that we want to run the optimizer,
        # and get the loss value and the training predictions returned as numpy
        # arrays.
        _, l, predictions, summary_str = session.run([optimizer, loss, train_prediction, merged])
        writer.add_summary(summary_str, step)
        if (step % 100 == 0):
            print('Loss at step %d: %f' % (step, l))
            print('Training accuracy: %.1f%%' % accuracy(predictions, train_labels))
            # Calling .eval() on valid_prediction is basically like calling run(), but
            # just to get that one numpy array. Note that it recomputes all its graph
            # dependencies.
            print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
    w = weights.eval()

Initialized
Loss at step 0: 18.749634
Training accuracy: 6.8%
Validation accuracy: 8.9%
Loss at step 100: 2.248372
Training accuracy: 71.5%
Validation accuracy: 70.1%
Loss at step 200: 1.806984
Training accuracy: 74.8%
Validation accuracy: 73.5%
Loss at step 300: 1.572495
Training accuracy: 76.6%
Validation accuracy: 74.6%
Loss at step 400: 1.414999
Training accuracy: 77.3%
Validation accuracy: 75.2%
Loss at step 500: 1.299050
Training accuracy: 78.1%
Validation accuracy: 75.6%
Loss at step 600: 1.208796
Training accuracy: 78.7%
Validation accuracy: 75.9%
Loss at step 700: 1.135731
Training accuracy: 79.3%
Validation accuracy: 76.1%
Loss at step 800: 1.074747
Training accuracy: 79.6%
Validation accuracy: 76.2%
Test accuracy: 82.8%


To get acess to Tensorboard: we type `tensorboard --logdir=softmax_logs` and we go to http://0.0.0.0:6006/ in the brower. For our softmax model, we obtain the following figures:
<img src="images/softmax_graph.png" alt="softmax computational graph", height="400">
<img src="images/softmax_loss.png", alt="loss", height="300">