In [1]:
import tensorflow as tf

## Start a session

In [8]:
sess = tf.InteractiveSession()

#### define some constants

In [5]:
a = tf.constant(2)
b = tf.constant(3)

#### Define an operation

In [9]:
mult = tf.mul(a,b)

### Run the graph

In [11]:
sess.run(mult)

6

## Tensors
* multi-dimensional vectors

## Variables
* used to represent parameters of the model
* variables can be updated
* in-memory buffers that contain tensors
* survive across multiple executions of a graph (instead of being wiped and rewritten each time like normal tensors)
* Requirements for variables
    * must be explicitly initialized before a graph is executed
    * can be updated with gradient methods
    * can save the state to disk and load them for later use

## Weight variables in NNs
```python
weights = tf.Variable(tf.truncated_normal([300,200],stddev=0.5,name="weights", mean=0.0))
```
* the above code could be matrix tying together 2 layers of a neural network, the 1st layer would have 300 neurons and the 2nd would have 200 neurons
* optional argument trainable=False if you don't want the weights to be able to update (maybe for some kind of static word vector)

In [12]:
# Randomly initialize weights that could tie a neural network layer together
## this would have input shape 300 dim and output shape 200 dim
weights = tf.Variable(tf.truncated_normal([300,200],stddev=0.5,name="weights", mean=0.0))

## What happens when tf.variable is called
* 3 operations added to computation graph
    1. operation producing tensor the tensor we use to initialize the variable
    2. tf.assign operation --> responsible for filling the variable with intializing tensor prior to the variable's use
    3. variable operation --> holds the current value of the variable

## Have to initialize tensorflow variables before running the graph
```python
tf.initialize_all_variables()
```
This triggers all of the tf.assign operations in the graph

# Tensorflow Operations
* abstract transformations that are applied to tensors in the computation graph
* operations can be given a name for easy reference into the computation graph
* operations consist of one or more kernels
    * kernel is a device-specific implementation (GPU vs CPU)

# Placeholder Tensors
### Control how input is passed into deep models
Can't use standard variables because they are only meant to be initialized once

placeholder tensors are populated every single time the computation graph is run

```python
x = tf.placeholder(tf.float32, name="x", shape=[None,784])
W = tf.Variable(tf.random_uniform([784,10],-1,1),name="W")
multiply = tf.matmul(x,W)
```

### Code above:
* x:
    * represents mini-batch of data 
    * 784 columns (each sample has 784 dimensions)
    * undefined rows --> means x can be initialized with any number of data samples (any size mini-batch)
* W:
    * can represent the weight matrix tying a layer of a neural network to another layer (be that layer another hidden layer or an output/softmax layer
    * input dimension of 784 
    * output dimension of 10 (can represent # of classes)
    * -1,1 represents the range that the numbers are drawn from
* placeholders need to be filled every time the computation graph (or subgraph) is run

# Sessions
* how tensorflow program interacts with computation graph
* builds the graph, initializes variables, and runs the graph
### example below, but ignore the read data thing so i dont have the data...

In [17]:
import tensorflow as tf
# from read_data import get_minibatch()


# Describing the computational graph
x = tf.placeholder(tf.float32, name="x", shape=[None,784])
W = tf.Variable(tf.truncated_normal([784,10],-1,1),name="W")
b = tf.Variable(tf.zeros([10]), name="biases")

output = tf.matmul(x,W) + b

# initialize the variables
init_op = tf.initialize_all_variables()

# define the tensorflow session
sess = tf.Session()

# performs the initialization
sess.run(init_op)

# define feed_dict, which fills the placeholders with necessary input data
## define more dictionary entries if the graph has multiple places where data will go
feed_dict = {"x": get_minibatch()}

# run the graph again but pass in the values you want inside the placeholders
sess.run(output, feed_dict=feed_dict)

SyntaxError: invalid syntax (<ipython-input-17-579ba858d8ce>, line 2)

## sess.run() used to:
* initialize variables
* put data into placeholders (input slots)
* train the network


# Navigating Variable Scopes and Sharing Variables
* complex models require a lot of re-using and sharing large sets of variables --> will want to instantiate in one place

# Code below should be used in a case where you want to be able to create a DIFFERENT network everytime you call it
* NOTE if you want to reuse or access the same network you need to define it different (will be explained below)

In [18]:
'''Defines a network with 6 variables and 3 layers'''
def my_network(input):
    # first layer
     ## takes input of dim 784, outputs tensor with dim=100
    W_1 = tf.Variable(tf.random_uniform([784,100],-1,1),name="W_1")
    b_1 = tf.Variable(tf.zeros([100]), name="biases_1")
    output_1 = tf.matmul(input,W_1) + b_1
    
    # second layer
     ## takes input of dim 100, outputs tensor with dim=50
    W_2 = tf.Variable(tf.random_uniform([100,50],-1,1),name="W_2")
    b_2 = tf.Variable(tf.zeros([50]),name="biases_2")
    output_2 = tf.matmul(output_1,W_2) + b_2 # output_1 is the input to this layer
    
    # third layer
     ## takes input of dim 50, outputs tensor with dim=10 (should be the # of classes)
    W_3 = tf.Variable(tf.random_uniform([50,10],-1,1),name="W_3")
    b_3 = tf.Variable(tf.zeros([10]),name="biases_3")
    output_3 = tf.matmul(output_2,W_3) + b_3
    
    # printing names (to show that this creates a different network each time)
    print "Printing names of weight parameters"
    print W_1.name, W_2.name, W_3.name
    print "Printing names of bias parameters"
    print b_1.name, b_2.name, b_3.name
    
    return output_3
    
    
## pass something into this function like:
i_1 = tf.placeholder(tf.float32, [1000,784],name="i_1")
my_network(i_1)

# second time you call this the name of "W_1" will actually be "W_1_1"

## Tensorflow's variable scoping mechanisms largely controlled by 2 functions
1. ```tf.get_variable(<name>,<shape>,<initializer>)``` Checks if a variable with this name exists, retrieves the variable if it does, creates it using the shape and initializer if it doesnt
2. ```tf.variable_scope(<scope_name>)```: manages the namespace and determines the scope in which ```tf.get_variable``` operates

In [19]:
def layer(input, weight_shape, bias_shape):
    weight_init = tf.random_uniform_initializer(minval=-1,maxval=1)
    bias_init = tf.constant_initializer(value=0)
    
    W = tf.get_variable("W",weight_shape, initializer=weight_init)
    b = tf.get_variable("b",bias_shape, initializer=bias_init)
    
    return tf.matmul(input,W) + b

def my_network(input):
    # define scope of layer_1
    with tf.variable_scope("layer_1"):
        output_1 = layer(input, [784, 100], [100]) # get the output of the first layer
    with tf.variable_scope("layer_2"):
        output_2 = layer(output_1, [100,50], [50]) # get the output of the 2nd layer
    with tf.variable_scope("layer_3"):
        output_3 = layer(output_2, [50,10], [10]) # get the output of the 3rd layer
        
    return output_3 # this is the final output of the model

## By default sharing the variables is not allowed, but you can turn that off by saying explicitly

## Building models in a "tower" is the way to use multiple gpus or cpus

# Logistic Regression Model in TensorFlow
* p(y=i|x) = softmax(Wx+b)
    * goal is to learn values of W and b that most effectively classify the inputs as accurately as possible

## Build the model in 4 phases:
1. Inference
    * produces probability distribution over the output classes given a minibatch
2. Loss
    * computes the value of the error function (cross-entropy loss for this example)
3. training
    * computes gradients of model's parameters and updates the model
4. evaluate
    * determines the effectiveness of the model

## 1) Inference
* given minibatch w/ 784-dimensional vectors representing MNIST images
* represent log-reg by softmax(Wx+b)

In [20]:
def inference(x):
    tf.constant_initializer(value=0)
    W = tf.get_variable("W", [784,10], initializer=init)
    b = tf.get_variable("b",[10], initializer=init)
    
    output = tf.nn.softmax(tf.matmul(x,W) + b)
    return output

## 2) Loss
* average error per data sample
* comput cross entropy loss over a minibatch

In [None]:
def loss(output, y):
    dot_product = y * tf.log(output)
    
    # Reduction along axis 0 collapses each column into a single value
    # Reduction along axis 1 collapses each row into a single value
    # Generally, reduction along axis i collapses the ith dimension of a tensor to size 1
    
    cross_entropy = -tf.reduce_sum(dot_product, reduction_indices=1)
    
    loss = tf.reduce_mean(cross_entropy)
    
    return loss

## 3) Training
* given cost incurred, compute gradients and modify parameters of model
* tensorflow gives access to built-in optimizers that use special train operations when run

In [None]:
def training(cost, global_step):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(cost, global_step=global_step)
    return train_op

## 4) Evaluate
* put everything into a single computational subgraph to evaluate model on valid or test set

In [None]:
def evaluate(output, y):
    correct_prediction = tf.equal(tf.argmax(output,1),tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy

# Full logistic regression in tensor flow with loggin

In [None]:
# parameters
learning_rate = 0.01
training_epochs = 1000
batch_size = 100
display_step = 1

with tf.Graph().as_default():
    
    # mnist data image of shape 28*28 = 784
    x = tf.placeholder("float", [None, 784])
    
    # 0-9 digits recognition => 10 classes
    y = tf.placeholder("float",[None, 10])
    
    output = inference(x)
    
    cost = loss(output, y)
    
    global_step = tf.Variable(0, name='global_step', trainable=False)
    
    train_op = training(cost, global_step)
    
    eval_op = evaluate(output, y)
    
    summary_op = tf.merge_all_summaries()
    
    saver = tf.train.Saver()
    
    sess = tf.Session()
    
    summary_writer = tf.train.SummaryWriter("logistic_logs/", graph_def=sess.graph_def)
    
    init_op = tf.initialize_all_variables()
    
    sess.run(init_op)
    
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0
        total_batch = int(mnist.train.num_examples/batch_size)
        # loop over all batches
        for i in range(total_batch):
            minibatch_x, minbatch_y = mnist.train.next_batch(batch_size)
            # fit training using batch data
            feed_dict = {x: minibatch_x, y: minbatch_y}
            sess.run(train_op, feed_dict=feed_dict)
            avg_cost += minibatch_cost/total_batch
            # display logs per epoch step
            if epoch % display_step == 0:
                val_feed_dict = {
                    x: mnist.validation.images,
                    y: mnist.validation.labels
                }
                
                accuracy = sess.run(eval_op, feed_dict=val_feed_dict)
                print "Validation error: ", (1-accuracy)
                
                summary_str = sess.run(summary_op, feed_dict = feed_dict)
                summary_writer.add_summary(summary_str, sess.run(global_step))
                
                saver.save(sess, "logistic_logs/model-checkpoints")
                
                print "Optimization Finished!"
 
    test_feed_dict = {
         x : mnist.test.images,
         y : mnist.test.labels
    }

    accuracy = sess.run(eval_op, feed_dict=test_feed_dict)

    print "Test Accuracy:", accuracy