In [1]:
# Launched with docker run -d -p 8888:8888 -v `pwd`:/home/jovyan/work jupyter/tensorflow-notebook start-notebook.sh --NotebookApp.token=''
# Based on playing around with https://github.com/aymericdamien/TensorFlow-Examples
import tensorflow as tf

From https://arxiv.org/pdf/1603.04467.pdf

A TensorFlow computation is described by a directed
graph, which is composed of a set of nodes. The graph
represents a dataflow computation, with extensions for
allowing some kinds of nodes to maintain and update
persistent state and for branching and looping control
structures within the graph.

In a TensorFlow graph, each node has zero or more inputs
and zero or more outputs, and represents the instantiation
of an operation. Values that flow along normal
edges in the graph (from outputs to inputs) are tensors,
arbitrary dimensionality arrays where the underlying element
type is specified or inferred at graph-construction
time.

An operation has a name and represents an abstract computation
(e.g., “matrix multiply”, or “add”). An operation
can have attributes, and all attributes must be provided
or inferred at graph-construction time in order to
instantiate a node to perform the operation.

Clients programs interact with the TensorFlow system by
creating a Session. To create a computation graph, the
Session interface supports an Extend method to augment
the current graph managed by the session with additional
nodes and edges (the initial graph when a session is created
is empty). The other primary operation supported
by the session interface is Run, which takes a set of output
names that need to be computed, as well as an optional
set of tensors to be fed into the graph in place of
certain outputs of nodes. Using the arguments to Run,
the TensorFlow implementation can compute the transitive
closure of all nodes that must be executed in order
to compute the outputs that were requested, and can then
arrange to execute the appropriate nodes in an order that
respects their dependencies.

In most computations a graph is executed multiple times.
Most tensors do not survive past a single execution of the
graph. However, a Variable is a special kind of operation
that returns a handle to a persistent mutable tensor
that survives across executions of a graph. Handles to
these persistent mutable tensors can be passed to a handful
of special operations, such as Assign and AssignAdd
(equivalent to +=) that mutate the referenced tensor. For
machine learning applications of TensorFlow, the parameters
of the model are typically stored in tensors held in
variables, and are updated as part of the Run of the training
graph for the model.

A tensor in our implementation is a typed, multidimensional
array. We support a variety of tensor element
types, including signed and unsigned integers ranging
in size from 8 bits to 64 bits, IEEE float and double
types, a complex number type, and a string type (an arbitrary
byte array).

In [2]:
# Create a Constant op
# The op is added as a node to the default graph.
#
# The value returned by the constructor represents the output
# of the Constant op.
hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
sess.run(hello) # Returns the value of computing the operation `hello`

'Hello, TensorFlow!'

In [3]:
# Basic constant operations
a = tf.constant(2)
b = tf.constant(3)
with tf.Session() as sess:
    print "a: %i" % sess.run(a), "b: %i" % sess.run(b)
    print "Addition with constants: %i" % sess.run(a + b)
    print "Multiplication with constants: %i" % sess.run(a * b)
    print "Multiple computations in a single run: %s" % sess.run([a, b])

a: 2 b: 3
Addition with constants: 5
Multiplication with constants: 6
Multiple computations in a single run: [2, 3]


In [4]:
# Basic Operations with placeholder as graph input
# The value returned by the constructor represents the output
# of the Variable op. 
a = tf.placeholder(tf.int16)
b = tf.placeholder(tf.int16)

# Define some operations
add = tf.add(a, b)
mul = tf.multiply(a, b)

# Launch the default graph.
with tf.Session() as sess:
    # Run every operation with variable input
    print "Addition with placeholders: %i" % sess.run(add, feed_dict={a: 2, b: 3})
    print "Multiplication with placeholders: %i" % sess.run(mul, feed_dict={a: 4, b: 5})

Addition with placeholders: 5
Multiplication with placeholders: 20


In [5]:
# Define a Variable, which is a tensor that can be updated
x = tf.Variable(0)

# y is an op that updates x (the result is the updated value)
y = tf.assign(x, 1)

# One must initialize variables inside a session before using them. There's an op for that.
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print "Initial value of x: %i" % sess.run(x)
    print "Value of running the operation y: %i" % sess.run(y)
    print "Re-computed value of x: %i" % sess.run(x)

Initial value of x: 0
Value of running the operation y: 1
Re-computed value of x: 1


In [6]:
# Matrix Multiplication from TensorFlow official tutorial

# Create a Constant op that produces a 1x2 matrix.  The op is
# added as a node to the default graph.
#
# The value returned by the constructor represents the output
# of the Constant op.
matrix1 = tf.constant([[3., 3.]])

# Create another Constant that produces a 2x1 matrix.
matrix2 = tf.constant([[2.],[2.]])

# Create a Matmul op that takes 'matrix1' and 'matrix2' as inputs.
# The returned value, 'product', represents the result of the matrix
# multiplication.
a_product = tf.matmul(matrix1, matrix2)
b_product = tf.matmul(matrix2, matrix1)


# To run the matmul op we call the session 'run()' method, passing 'product'
# which represents the output of the matmul op.  This indicates to the call
# that we want to get the output of the matmul op back.
#
# All inputs needed by the op are run automatically by the session.  They
# typically are run in parallel.
#
# The call 'run(product)' thus causes the execution of threes ops in the
# graph: the two constants and matmul.
#
# The output of the op is returned in 'result' as a numpy `ndarray` object.
with tf.Session() as sess:
    # Modified to compute both products instead of just one.
    result = sess.run([a_product, b_product])
print result[0]
print
print result[1]

[[ 12.]]

[[ 6.  6.]
 [ 6.  6.]]


In [7]:
# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [8]:
# Parameters
learning_rate = 0.1
training_epochs = 25
batch_size = 100
display_step = 5

# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784]) # mnist data image of shape N x 28*28=784
y = tf.placeholder(tf.float32, [None, 10])  # 0-9 digits recognition => 10 classes

# Model weights, initialized to 0
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# Construct model
# Softmax is applied to each row. Each row becomes a finite prob. dist.
pred = tf.nn.softmax(tf.matmul(x, W) + b) # xW + b adds b to every row of xW (numpy broadcasting)

# Minimize error using cross entropy between actual "distribution" `y`
# and computed distribution `pred`. `reduce_sum(.., axis=1)` sums each row,
# and `reduce_mean` averages the rows.

#cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), axis=1))

# Another example, where cost is |y - pred|^2
cost = tf.reduce_mean(tf.reduce_sum(tf.square(y - pred), axis=1))

# Another example, now with L1
#cost = tf.reduce_mean(tf.reduce_sum(tf.abs(y - pred), axis=1))

# Update weights using Gradient Descent
# `run`ning the optimizer will compute cost, 
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

In [9]:
# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    # Training cycle -- we run through the entire training set `training_epochs` times
    for epoch in range(training_epochs):
        avg_cost = 0.
        num_batches = int(mnist.train.num_examples/batch_size)
        
        # Loop over all batches
        for unused in range(num_batches):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            # Fit training using batch data
            # Compute the cost and apply the optimizer for a single batch.
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                          y: batch_ys})
            
            # Running computation of average loss for this epoch
            avg_cost += c / num_batches
            
        # Display logs per epoch step
        if (epoch + 1) % display_step == 0:
            print "Epoch: {:04d} cost= {:.9f}".format(epoch + 1, avg_cost)

    print "Optimization Finished!"

    # Calculate accuracy on test set
    print "Accuracy:", accuracy.eval(feed_dict={x: mnist.test.images, y: mnist.test.labels})

Epoch: 0005 cost= 0.146617283
Epoch: 0010 cost= 0.130840378
Epoch: 0015 cost= 0.123749520
Epoch: 0020 cost= 0.119298661
Epoch: 0025 cost= 0.116197848
Optimization Finished!
Accuracy: 0.9267
