In [1]:
import tensorflow as tf
import numpy as np
from __future__ import division

# TensorFlow basics

In [2]:
node1 = tf.constant(3.0, tf.float32)
node2 = tf.constant(4.0) # also tf.float32 implicitly
print node1, node2

Tensor("Const:0", shape=(), dtype=float32) Tensor("Const_1:0", shape=(), dtype=float32)


In [3]:
# To actually evaluate the nodes, we must run the computational graph within a session.
# A session encapsulates the control and state of the TensorFlow runtime. (Scope?)
sess = tf.Session()
print 'Ran:', sess.run([node1, node2])# run enough of the computational graph to evaluate node1 and node2

Ran: [3.0, 4.0]


In [4]:
# We can build more complicated computations by combining Tensor nodes with operations.
# Operations are also nodes.
node3 = tf.add(node1, node2)
print "node3:", node3
print "sess.run(node3):", sess.run(node3)

node3: Tensor("Add:0", shape=(), dtype=float32)
sess.run(node3): 7.0


In [5]:
# A graph can be parameterized to accept external inputs, known as placeholders.
# A placeholder is a promise to provide a value later.
a = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)

# Equivalent Operations:
# adder_node = tf.add(a,b)
adder_node = tf.reduce_sum([a,b], axis=0) # Add corresponding rows (axis = 0) of Tensor(a | b). (axis = 1 adds cols)
# adder_node = a + b

# We can evaluate this graph with multiple inputs by using the feed_dict parameter
# to specify Tensors that provide concrete values to these placeholders.
print 'add0:', sess.run(fetches=adder_node, feed_dict={a: 3, b: 4.5})
print 'add1:', sess.run(fetches=adder_node, feed_dict={a: [1,3], b: [2, 4]})

# More complex:
add_and_triple = adder_node * 3.
print  'add2:', sess.run(fetches=add_and_triple, feed_dict={a: 3, b:4.5})

add0: 7.5
add1: [ 3.  7.]
add2: 22.5


# Model setup

In [10]:
# In ML, we want to update the model to produce a different output on the same input.
# Variables allow us to add trainable parameters to a graph (weights, biases).
# They are constructed with a type and initial value:

W = tf.Variable([.3], tf.float32) # weight (trainable)
b = tf.Variable([-.3], tf.float32) # bias (trainable)
x = tf.placeholder(tf.float32) # input
linear_model = W * x + b

# Constants are initialized when you call tf.constant, and their value can never change.
# By contrast, variables are not initialized when you call tf.Variable.
# To initialize all the variables in a TensorFlow program, you must explicitly call
# a special operation as follows:

init = tf.global_variables_initializer()
# Sets global -->Variables<-- (W, b) to defaults:
print 'init:', sess.run(init) # init.op_def == "NoOp" <-- init not a model (no computation), just global variable initializer.

# It is important to realize init is a handle to the TensorFlow sub-graph that
# initializes all the global variables. Until we call sess.run, the variables are uninitialized.

# Since x is a placeholder, we can evaluate linear_model for several values of x simultaneously as follows:
print 'Ran:', sess.run(fetches=linear_model, feed_dict={x: [1,2,3,4]})

init: None
Ran: [ 0.          0.30000001  0.60000002  0.90000004]


In [48]:
W = tf.Variable([.3], tf.float32) # weight (trainable)
b = tf.Variable([-.3], tf.float32) # bias (trainable)
x = tf.placeholder(tf.float32) # input
linear_model = W * x + b

init = tf.global_variables_initializer()
print 'init:', sess.run(init)

print 'Ran:', sess.run(fetches=linear_model, feed_dict={x: [1,2,3,4]})

init: None
Ran: [ 0.          0.30000001  0.60000002  0.90000004]


In [7]:
# To evaluate the model on training data, we need a y placeholder to provide the desired values,
# and we need to write a loss function.


# A loss function measures how far apart the current model is from the provided data.
# We'll use a standard loss model for linear regression, which sums the squares of errors.
# (error = linear_model(x) - y). We call tf.square to square that error.
# Then, we sum all the squared errors to create a single scalar that abstracts the error of all
# examples using tf.reduce_sum:

# 1. Build computational graph
y = tf.placeholder(tf.float32)
squared_deltas = tf.square(linear_model - y)
loss = tf.reduce_sum(squared_deltas)

# 2. Run the graph, providing values for all placeholders.
print 'Loss:', sess.run(loss, {x: [1,2,3,4], y: [0,-1,-2,-3]})

Loss: 23.66


In [8]:
# We could improve this manually by reassigning the values of
# W and b to the perfect values of -1 and 1.

# NOTE: fixW, fixb are -->Operations<-- themselves, nodes!!! We must first run these nodes to reassign values to W, b.
fixW = tf.assign(W, [-1.])
fixb = tf.assign(b, [1.])
print 'Unchanged:', sess.run([W, b]) # See? The values haven't changed.

# Run fix_ nodes to reassign values to W, b:
sess.run([fixW, fixb])
print 'Changed:', sess.run([W, b]) # Now after running these nodes, the values have been changed.

Unchanged: [array([ 0.30000001], dtype=float32), array([-0.30000001], dtype=float32)]
Changed: [array([-1.], dtype=float32), array([ 1.], dtype=float32)]


In [9]:
# Finally, the Variables have good enough weights:
print 'Loss:', sess.run(loss, {x:[1,2,3,4], y:[0,-1,-2,-3]})

Loss: 0.0


# Model Training

In [194]:
# TensorFlow provides optimizers that slowly change each variable in order to minimize the loss function.

# The simplest optimizer is gradient descent. It modifies each variable according to the magnitude of the
# derivative of loss with respect to that variable. TensorFlow can automatically produce derivatives given only a
# description of the model using the function tf.gradients.

# For simplicity, optimizers typically do this for you. For example:

# Create nodes in computational graph that enable training.
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) # (defines how we'll optimize over loss)
train = optimizer.minimize(loss) # doesn't do anything yet, just sets up the nodes. Will need to provide inputs.
                                 # (when called, this will automatically increment global_step) (defines training goal)

# Start the training:
sess.run(init) # reset values to incorrect -->defaults<--!!!.
for i in range(1000):
    sess.run(train, {x: [1,2,3,4], y: [0,-1,-2,-3]}) # one step of computation (on all? inputs)

print sess.run([W, b])

[array([-0.9999969], dtype=float32), array([ 0.99999082], dtype=float32)]


# Complete Code

In [195]:
# Model parameters
W = tf.Variable([.3], tf.float32)
b = tf.Variable([-.3], tf.float32)

# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)

# Loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares

# Optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss) # (when run, this will automatically increment global_step)

# Training data
x_train = [1,2,3,4]
y_train = [0,-1,-2,-3]

# Training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # reset values to wrong
for i in range(1000):
    sess.run(train, {x:x_train, y:y_train})
    
    # Evaluate training accuracy
    if i % 100 == 0:
        curr_W, curr_b, curr_loss  = sess.run([W, b, loss], {x:x_train, y:y_train})
        print("(Iteration: %s) W: %s b: %s loss: %s" % (i, curr_W, curr_b, curr_loss))

(Iteration: 0) W: [-0.21999997] b: [-0.456] loss: 4.01814
(Iteration: 100) W: [-0.84270465] b: [ 0.53753263] loss: 0.14288
(Iteration: 200) W: [-0.95284992] b: [ 0.86137295] loss: 0.0128382
(Iteration: 300) W: [-0.98586655] b: [ 0.95844597] loss: 0.00115355
(Iteration: 400) W: [-0.99576342] b: [ 0.98754394] loss: 0.000103651
(Iteration: 500) W: [-0.99873012] b: [ 0.99626648] loss: 9.3124e-06
(Iteration: 600) W: [-0.99961936] b: [ 0.99888098] loss: 8.36456e-07
(Iteration: 700) W: [-0.99988592] b: [ 0.9996646] loss: 7.51492e-08
(Iteration: 800) W: [-0.99996579] b: [ 0.99989945] loss: 6.75391e-09
(Iteration: 900) W: [-0.99998969] b: [ 0.99996972] loss: 6.12733e-10


# Using tf.contrib.learn (Estimator)

In [168]:
# tf.contrib.learn is a high-level TensorFlow library that simplifies the mechanics of machine learning,
# including the following:
# * running training loops
# * running evaluation loops
# * managing data sets
# * managing feeding
# * tf.contrib.learn defines many common models.

In [196]:
# Notice how much simpler the linear regression program becomes with tf.contrib.learn:

# Declare list of features. We only have one real-valued feature. There are many
# other types of columns that are more complicated and useful.
features = [tf.contrib.layers.real_valued_column(column_name="x", dimension=1)]

# An estimator is the front end to invoke training (fitting) and evaluation
# (inference). There are many predefined types like linear regression,
# logistic regression, linear classification, logistic classification, and
# many neural network classifiers and regressors. The following code
# provides an estimator that does linear regression.
estimator = tf.contrib.learn.LinearRegressor(feature_columns=features) # automatically handles W, b variables unlike^^

# TensorFlow provides many helper methods to read and set up data sets.
# Here we use `numpy_input_fn`. We have to tell the function how many batches
# of data (num_epochs) we want and how big each batch should be.
x = np.array([1., 2., 3., 4.])
y = np.array([0., -1., -2., -3.])
input_fn = tf.contrib.learn.io.numpy_input_fn(x={"x": x},
                                              y=y,
                                              batch_size=4,
                                              num_epochs=1000)

# We can invoke 1000 training steps by invoking the `fit` method and passing the
# training data set.
print '\n\nFitting estimator:\n\n'
estimator.fit(input_fn=input_fn, steps=1000)

# Here we evaluate how well our model did. In a real example, we would want
# to use a separate validation and testing data set to avoid overfitting.
print '\n\nEvaluating estimator:\n\n'
done = estimator.evaluate(input_fn=input_fn)
print '\n\nDone:\n\n', done

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': None, '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_task_type': None, '_environment': 'local', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11ac58a90>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_master': ''}


Fitting estimator:


Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.

# Custom tf.contrib.learn Model (model_fn)

In [197]:
# tf.contrib.learn does not lock you into its predefined models. Suppose we wanted to create a custom model
# that is not built into TensorFlow. We can still retain the high level abstraction of data set, feeding,
# training, etc. of tf.contrib.learn. 

# For illustration, we will show how to implement our own equivalent model
# to LinearRegressor using our knowledge of the lower level TensorFlow API.

# To define a custom model that works with tf.contrib.learn, we need to use tf.contrib.learn.Estimator.
# Because tf.contrib.learn.LinearRegressor is actually a sub-class of tf.contrib.learn.Estimator,
# instead of sub-classing Estimator, we simply provide Estimator a function model_fn that tells
# tf.contrib.learn how it can evaluate predictions, training steps, and loss (to mimic the exact behavior
# of a .LinearRegressor).

# The code is as follows:

# Declare list of features, we only have one real-valued feature.
features = [tf.contrib.layers.real_valued_column("x", dimension=1)]

def model(features, labels, mode):
    '''cf. Closure `feval(...)` in optim.sgd input for Torch.'''
    # Build a linear model and predict values
    W = tf.get_variable("W", [1], dtype=tf.float64) # Gets an existing variable w/ these params or creates a new one.
    b = tf.get_variable("b", [1], dtype=tf.float64)
    y = W * features['x'] + b # predcict
    
    # Loss sub-graph
    loss = tf.reduce_sum(tf.square(y - labels))
    
    # Training sub-graph
    global_step = tf.train.get_global_step() # global_step value
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train = tf.group(optimizer.minimize(loss),      # tf.group(*inputs):
                   tf.assign_add(ref=global_step,   #
                                 value=1))          # Create an op that groups multiple operations.
                                                    # When this op finishes, all ops in `input` have finished.
                                                    # This op has no output.
                                                    #
                                                    # tf.assign_add: Update 'ref' by adding 'value' to it.
                                                    # (we must manually increment global_step value at each step)
    # ModelFnOps connects the subgraphs that we built to the
    # appropriate functionality.
    return tf.contrib.learn.ModelFnOps(
      mode=mode,
      loss=loss,
      train_op=train)

estimator = tf.contrib.learn.Estimator(model_fn=model)

# define our data set
x = np.array([1., 2., 3., 4.])
y = np.array([0., -1., -2., -3.])
input_fn = tf.contrib.learn.io.numpy_input_fn({"x": x}, y, 4, num_epochs=1000)

# train
print '\n\nFitting (train) estimator:\n\n'
estimator.fit(input_fn=input_fn, steps=1000)

# evaluate our model
print '\n\nEvaluating (predict) estimator:\n\n'
done = estimator.evaluate(input_fn=input_fn, steps=10)
print '\n\nDone:\n\n', done

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': None, '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_task_type': None, '_environment': 'local', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11b217650>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_master': ''}


Fitting (train) estimator:


INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmplj2BkB/model.ckpt.
INFO:tensorflow:loss = 28.7540333712, step = 1
INFO:tensorflow:global_step/sec: 990.019
INFO:tensorflow:loss = 0.262683448884, step = 101 (0.102 sec)
INFO:tensorflow:global_step/sec: 919.43
INFO:tensorflow:loss = 0.0208926150453, step = 201 (0.108