In [5]:
# Load tensorflow package
import tensorflow as tf

a = tf.constant(5)
b = tf.constant(2)
c = tf.constant(3)


d = tf.multiply(a,b)
e = tf.add(c,b)
f = tf.subtract(d,e)

#creating  a Session and Running it.
#A session object is the part of the TF API that communicates between Python objects 
sess = tf.Session()
outs = sess.run(f)
sess.close()
print("outs = {}".format(outs))



outs = 5


# Constructing and Managing Graph

In [3]:
print(tf.get_default_graph())
g = tf.Graph()
p = tf.constant(5)
print(g)
print(a.graph is g) # To check for default graph
print(a.graph is tf.get_default_graph())

<tensorflow.python.framework.ops.Graph object at 0x7faedc4eee10>
<tensorflow.python.framework.ops.Graph object at 0x7faedc4ee8d0>
False
True


# The "with" statement

 The with statement is used to wrap the execution of a block with
methods defined by a context manager—an object that has the special
method functions .__enter__() to set up a block of code
and .__exit__() to exit the block. 
* Opening a session using the with clauser will ensure the session is automatically closed once all computations are done.

In [4]:
g1 = tf.get_default_graph()
g2 = tf.Graph()

print(g1 is tf.get_default_graph())

with g2.as_default():
    print(g1 is tf.get_default_graph())
    
print(g1 is tf.get_default_graph())


True
False
True


# Fetches

Recall, outs = sess.run(f)  we request one specific node (node f) by passing the
variable it was assigned to as an argument to the sess.run() method. This argument
is called fetches, corresponding to the elements of the graph we wish to compute.

In [5]:
with tf.Session() as sess:
    fetches = [a,b,c,d,e,f]
    outs =sess.run(fetches)
    
print("outs = {}".format(outs))
print(type(outs[0]))

outs = [5, 2, 3, 10, 5, 5]
<type 'numpy.int32'>


# Flowing Tensors

When we construct a node in the graph, like we did with tf.add(), we are actually
creating an operation instance. These operations do not produce actual values until
the graph is executed, but rather reference their to-be-computed result as a handle
that can be passed on—flow—to another node. These handles, which we can think of
as the edges in our graph, are referred to as Tensor objects, and this is where the
name TensorFlow originates from.

In [6]:
c = tf.constant(4.0)
print(c)

Tensor("Const_4:0", shape=(), dtype=float32)


# Data Types

The basic units of data that pass through a graph are numerical, Boolean, or string elements.
We can ecplicitly choose what data type we want to work with by specifying it when
 we create the Tensor object.

In [8]:
c = tf.constant(4.0, dtype=tf.float64)
print(c)
print(c.dtype)

Tensor("Const_5:0", shape=(), dtype=float64)
<dtype: 'float64'>


In [11]:
v = tf.constant("4.0", dtype=tf.string)
print(v)
print(v.dtype)

Tensor("Const_6:0", shape=(), dtype=string)
<dtype: 'string'>


# Casting

It is important to make sure the data types match throughout the graph - 
performing an operation with nonmatching data types will result in an exception.To change
the data type setting of a Tensor object, we can use the tf.cast() operation, passing the relevant 
Tensor and the new data type of interest as first and second arguments respectively.


In [17]:
x = tf.constant([1,2,3], name='x', dtype=tf.float32)
print(x.dtype)
x = tf.cast(x,tf.int64)
print(x.dtype)
x = tf.cast(x,tf.string)
print(x.dtype)

<dtype: 'float32'>
<dtype: 'int64'>
<dtype: 'string'>


# Tensor Arrays and Shapes

In [11]:
import tensorflow as tf
import numpy as np

c = tf.constant([[1,2,3],[4,5,6]])

print("Python List input: {}".format(c.get_shape()))

c = tf.constant(np.array
                ([
                    [[1,2,3], 
                    [4,5,6]], 
                    [[1,1,1], 
                    [2,2,2]]
                            ])) 
print("3d NumPy array input: {}".format(c.get_shape()))

Python List input: (2, 3)
3d NumPy array input: (2, 2, 3)


The get_shape() method returns the shape of the tensor as a tuple of integers. The number of integers corresponds to the 
number of dimensions of the tensor and each integer is the numter of array entries along that dimension.

In [27]:
sess = tf.InteractiveSession()
c = tf.linspace(0.0, 4.0, 5)
print("The content of 'c' : \n {} \n".format(c.eval()))
sess.close()

The content of 'c' : 
 [ 0.  1.  2.  3.  4.] 



The sequence generator tf.linspace(a,b,n) where n evenly spaced values from a to b.

tf.InteractiveSession() allows the replacement of tf.Session() without the need of assigning a variable to hold the session.

# Matrix Multiplication 

Say we have a Tensor storing a matrix A and another storing a vector x, and we wish to compute the matrix product of the
two:
    Ax = b
This can be achieved by tf.matmul(A,B)

# Names

Each Tensor object has an identifying name. This name is an intrinsic string name, not to be confused with the name of the
variable. As with dtype, we can use the .name atrribute to see the name of the object.

In [30]:
with tf.Graph().as_default():
    c1 = tf.constant(4, dtype=tf.float64, name = "c")
    c2 = tf.constant(4, dtype=tf.int32, name = "c")
    
print(c1.name)
print(c2.name)

c:0
c_1:0


Objects residing within the same graph cannot have the same name - TensorFlow forbids it. As a consequence, it will
automatically add an underscore/space and a number to distinguish the two. However, both objects have the same name 
when they are associated with different graphs.

# Name scopes 

In some cases when dealing with a large, complicated graph, we would like to create some node grouping to make it easier
to follow and manage. This is done by using tf.name_scope("prefix") together with the useful with clause again:
    

In [31]:
with tf.Graph().as_default():
    c1 = tf.constant(4, dtype=tf.float64, name = "c")
    with tf.name_scope("prefix_name"):
        c2 = tf.constant(4, dtype=tf.int32, name = "c")
        c3 = tf.constant(4, dtype=tf.float64, name = "c")
    
    print(c1.name)
    print(c2.name)
    print(c3.name)

c:0
prefix_name/c:0
prefix_name/c_1:0


In this example we have grouped objects contained in variable c2 and c3 under the scope prefix_name, which show up as 
a prefix in their names. This is useful for visualization of the graph structure.

# Variables, Placeholders, and Simple Optimization

TensorFlow uses special objects called Variables. Unlike other Tensor objects that are
"refilled" with data each time we run the session. Variable can maintain a fixed
state in the graph. Variables can be used as input for other operations in the graph.

Variable usages are of two stages.  First,as tf.Variable() function in order to create a Variable and define what it will be initialized with.
Second, as tf.global_variables_initializer() method for memory allocation for the Variable and sets its initial values.

In [4]:
init_val = tf.random_normal((1,5),0,1)
var = tf.Variable(init_val, name='var')
print("pre run: \n".format(var))

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    post_var = sess.run(var)
    
print("\npost run: \n{}".format(post_var))

pre run: 


post run: 
[[-0.82054317  0.61283463 -0.57423735 -0.84147424  0.30096558]]


# Placeholders

Placeholders as the name implies can be thought as empty Variables that will be filled 
with data later on. We use them by first constructing graph and only when it is 
executed feeding them with the input data.


Placeholder have optional shape argument. If a shape is not fed or passed as None,
then the placeholder can be fed with data of any size. It is common to use None, for
the dimension of a matrix that corresponds to the number of samples(usually rows),
while having the length of the features(usually columns) fixed:

In [5]:
#ph = tf.placeholder(tf.float32, shape=(None, 10))

Whenever we define a placeholder, input  values must be fed to it or else exception will be thrown.
The input data is passed to the session.run() method as a dictionary

In [7]:
#sess.run(s, feed_dict={x: X_data, w: w_data})

These input are matrix-multiplied to create a five-unit vector xw and added with a constant vector b 
filled with the value -1. Finally, the variable s takes the maximum value of that 
vector by using tf.reduce_max() operation. The word reduce is used because we are
reducing a five-unit vector to a single scalar:

In [28]:
x_data = np.random.randn(5,10)
w_data = np.random.randn(10,1)

with tf.Graph().as_default():
    x = tf.placeholder(tf.float32, shape=(5,10))
    w = tf.placeholder(tf.float32, shape=(10,1))
    b = tf.fill((5,1),-1.)
    xw = tf.matmul(x,w)
    xwb = xw + b
    s = tf.reduce_max(xwb)
    with tf.Session() as sess:
        outs = sess.run(s,feed_dict={x: x_data, w: w_data})
        
print("outs = {}".format(outs))


outs = -1.37360680103302


# Optimization

Optimization process of a simple regression model.

### Training to predict

Let's start by describing our regression model:

f(xi) = wTxi + b

yi = f(xi) + εi

f(xi)is assumed to be a linear combination of some input data xi, with a set of weight w 
and an intercept b. Our target output yi is a noisy version of f(xi) after being summed with 
Gaussian noise εi.



We need to create placeholders for our input and output data and Variable for our weights and intercept(bias):

In [30]:
x = tf.placeholder(tf.float32, shape=[None, 3])
y_true = tf.placeholder(tf.float32, shape=None)
w = tf.Variable([[0,0,0]], dtype=tf.float32,name="weights")
b = tf.Variable(0,dtype=tf.float32,name="bias")

Our predicted output y_pred is the result of matrix multiplication of our input container x
and our weight w plus a bias term b:
    

In [31]:
y_pred =tf.matmul(w,tf.transpose(x)) + b

### Definining  a loss function.

To capture the discrepancy between our model's predictions and the observed target we need a 
measure reflecting "distance." This distance is otfen called loss function or objective function. 
Then goal is to optimize the model by finding set of parameters (weights and bias ) that minimize it.

### MSE and cross entropy

MSE stands for mean squared error, which is the average squared  distance between the real target 
and the predicted model also called residuals.
In our linear regression example, we take the difference between the true target and predicted. We use
tf.square() to compute the square of the difference vector. We then averaged the result using tf.reduce_mean() function:

In [1]:
#loss = tf.reduce_mean(tf.square(y_true - y_pred))

### Cross Entropy

Cross entropy is another common loss use for categorical data. It uses the softmax classifier. Cross entropy is a measure of similarity between two distribution.

In [4]:
#loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)

#loss = tf.reduce_mean(loss)


### The gradient descent optimizer

Optimization involves updating set of weights iteratively in a way that decreases the
loss over time. The most commonly used approach is gradient descent, where
the loss's gradient with respect to the set of weights.

### Sampling methods

Ideally is it makes sense to calculate the gradient for the entire set of 
samples in order to benefit from the maximum amount of available information. This method,
however, has some shortcomings which can be very slow and therefore 
intractable when the dataset requires more memory than is available.

The general applicable technique is the stochastic gradient descent(SGD)
where instead of feeding the entire dataset to the algorithm for the 
computation of each step, a subset of the data sampled sequentially.

The number of samples ranges from one sample at a time to a few hundred
, but the common sizes are between 50 - 500(commonly referred to as mini-batches).


### Gradient descent in TensorFlow

In Tensorflow the gradient descent it computed automatically. An important parameter to
set is the algorithms' learning rate, this determines how aggressive each
update iteration will be.


In [1]:
#optimizer = tf.train.GradientDescentOptimizer(learning_rate)
#train = optimizer.minimize(loss)

Example 1: In this problem we are interested in retrieving a set of 
    weights w and a bias term b, assuming our target value is a linear
    combination of some input vector x, with an additional Gaussian noise Ei added to each sample.
    

In [22]:
import numpy as np
import tensorflow as tf
#== Create data and simulate results
x_data = np.random.randn(2000,3)
w_real = [0.3,0.5,0.1]
b_real = -0.2

noise = np.random.randn(1,2000)*0.1
y_data = np.matmul(w_real, x_data.T) + b_real + noise

NUM_STEPS = 10

g = tf.Graph()
wb_ = []
with g.as_default():
    x = tf.placeholder(tf.float32, shape=[None,3])
    y_true = tf.placeholder(tf.float32,shape=None)
   #y_pred = tf.matmul(w,tf.transpose(x)) + b
    
    with tf.name_scope('inference') as scope:
        w = tf.Variable([[0,0,0]],dtype=tf.float32,name='weights')
        b = tf.Variable(0,dtype=tf.float32,name='bias')
        y_pred = tf.matmul(w,tf.transpose(x)) + b
    with tf.name_scope('loss') as scope:
        loss = tf.reduce_mean(tf.square(y_true-y_pred))
    with tf.name_scope('train') as scope:
        learning_rate = 0.5
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train = optimizer.minimize(loss) 
        
        
    # Before starting, initialize the variables. We will 'run' this first.
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for step in range(NUM_STEPS):
            sess.run(train,{x: x_data, y_true: y_data})
            if (step % 5 == 0):
                print(step, sess.run([w,b]))
                wb_.append(sess.run([w,b]))
        print(10, sess.run([w,b]))                                     

    

0 [array([[ 0.30403277,  0.49297577,  0.08719987]], dtype=float32), -0.18051079]
5 [array([[ 0.29727903,  0.4986017 ,  0.10117673]], dtype=float32), -0.20137946]
10 [array([[ 0.29727903,  0.4986017 ,  0.10117673]], dtype=float32), -0.20137946]


From the above result the estimated weights and bias are w =[0.3,0.5,0.098] and b = -0.198.


Example 2: Logistic regression. We wish to retrieve the weights and bias components in a simulated data setting, this time in a
    logistic regression framework. In this case, the linear component wTx + b is the input of a nonlinear functiion called 
    the logistic function. What is effectively does is squash the values of the linear part into the interval [0,1].

The logistic function we are using is called sigmoid function. We generate our samples by using the same set of weights and biases as in the previous example:

In [21]:
import tensorflow as tf
N = 2000
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# === Create data and simulate results ===
x_data = np.random.randn(N,3)
w_real = [0.3,0.5,0.1]
b_real = -0.2

wxb = np.matmul(w_real,x_data.T) + b_real

y_data_pre_noise = sigmoid(wxb)
y_data = np.random.binomial(1, y_data_pre_noise)
y_pred = tf.sigmoid(y_pred)
loss = y_true*tf.log(y_pred) - (1-y_true)*tf.log(1-y_pred)
loss = tf.reduce_mean(loss)
#tf.nn.sigmoid_cross_entropy_with_logits(labels=,logits=)
NUM_STEPS = 50
with tf.name_scope('loss') as scope:
    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true,logits=y_pred)
    loss = tf.reduce_mean(loss)
# Before starting, initialize the variables. We will 'run' this first.
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for step in range(NUM_STEPS):
        sess.run(train,{x: x_data, y_true: y_data})
        if (step % 5 == 0):
            print(step, sess.run([w,b]))
            wb_.append(sess.run([w,b]))
    print(50, sess.run([w,b]))






TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("Placeholder:0", shape=(?, 3), dtype=float32) is not an element of this graph.