### Deep Learning with tensorflow

In [1]:
import tensorflow as tf

In [14]:
x1 = tf.constant(5)
x2 = tf.constant(6)
result = x1*x2  #inefficient way
result = tf.multiply(x1,x2)
'''x1_mat = tf.constant([56,6])
x2_mat = tf.constant([6,6;6,6])
result = tf.matmul(x1_mat,x2_mat)'''
result

<tf.Tensor 'Mul_18:0' shape=() dtype=int32>

In [17]:
sess = tf.Session() #creates session
print(sess.run(result))
sess.close()

30


^ No process actually ran the computation until we ran the session

In [20]:
# Actual, efficient way:
# will automatically close the session, so don't have to remember to close the session
with tf.Session() as sess:
    output = sess.run(result) # can save that as python variables to reference them
    print(output)

# so you can print the output but can't print sess.run(result)
print(output)
print(sess.run(result))

30
30


RuntimeError: Attempted to use a closed Session.

##### Basic Neural Nets

Using feed-forward Neural Network

(Basic MLP)
input > weights > hidden layer 1 (activation function) > weights > hidden layer 2 (activation function) > weights > output layer

compare output with intended output > assign a cost function or loss function

optimization function (optimizer) > minimize cost (e.g. AdamOptimizer, Stochastic Gradient Descent, AdaGrad)
"Goes backwards and manipulates the weights accordingly" - backpropagation

FFN + backprop = epoch
lowering the cost function with each epoch

<< 1 hidden layer = regular neural net.
2 or more hidden layers = deep neural net >>

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('data/',one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [59]:
"""
10 classes, 0-9

0 = [1,0,0,0,0,0,0,0,0,0]
1 = [0,1,0,0,0,0,0,0,0,0]
2 = [0,0,1,0,0,0,0,0,0,0]
etc

ONE element is 'hot' or 'on'
and the rest are 'cold' or 'off'
"""

# defining our model
# we will have 3 hidden layers

n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500 # donot have to be equal in number. It can be 500,1500,15

n_classes = 10
batch_size = 100 
# ^ gonna go through batches of 100 features & feed them through our network at a time 
# and manipulate the weights and then do another batch - by batches of a 100 images. We can do 1000 images too etc

# defining placeholder variables
x = tf.placeholder('float',[None,784]) # 784 because the images in the dataset are 28x28 = 784 values/pixels
y = tf.placeholder('float')

def neural_network_model(data):
    
    # (input_data * weights) + biases <- model for each layer
    # biases because if the inputs are something like 0, then we need something to equal it out
    hidden_1_layer = {'weights':tf.Variable(tf.random_normal([784,n_nodes_hl1])),
                      'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
    hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,n_nodes_hl2])),
                      'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
    hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,n_nodes_hl3])),
                      'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}
    output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3,n_classes])),
                      'biases':tf.Variable(tf.random_normal([n_classes]))}
    
    l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']), hidden_1_layer['biases'])
    l1 = tf.nn.relu(l1) # relu = rectified linear, which is the activation function that we are using. Like a threshold fn
    
    l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']), hidden_2_layer['biases'])
    l2 = tf.nn.relu(l2)
    
    l3 = tf.add(tf.matmul(l2,hidden_3_layer['weights']), hidden_3_layer['biases'])
    l3 = tf.nn.relu(l3)
    
    output = tf.matmul(l3,output_layer['weights']) + output_layer['biases']
    
    return output
    # We have finished coding the model.
    # Now we have to tell tensorflow what to do in this model and what to do in the session

def train_neural_network(x): # x is just the input data
    prediction = neural_network_model(x)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
    # ^ using cross entropy with logits as our cost function here
    
    # now to minimize this cost function
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    
    # an epoch = ffn + backprop
    epochs = 10
    
    with tf.Session() as ses:
        ses.run(tf.global_variables_initializer())
        
        for e in range(epochs):
            epoch_loss = 0
            for _ in range(int(mnist.train.num_examples / batch_size)):
                epoch_x,epoch_y = mnist.train.next_batch(batch_size)
                _, c = ses.run([optimizer, cost], feed_dict = {x:epoch_x, y:epoch_y}) # c is cost
                # somehow, tensorflow knows it has to modify the weights so it does without us having to explicitly code it
                
                epoch_loss += c
            print('Epoch ',e,'done. Epoch loss = ',epoch_loss)
            
        correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
        accuracy = tf.reduce_mean(tf.cast(correct,'float'))
        print('Accuracy: ',accuracy.eval({x:mnist.test.images,y:mnist.test.labels}))

train_neural_network(x)

Epoch  0 done. Epoch loss =  1744421.0326385498
Epoch  1 done. Epoch loss =  399354.0344619751
Epoch  2 done. Epoch loss =  224111.1451063156
Epoch  3 done. Epoch loss =  133647.82119944692
Epoch  4 done. Epoch loss =  83260.95193886757
Epoch  5 done. Epoch loss =  51629.15432609675
Epoch  6 done. Epoch loss =  37299.55490410328
Epoch  7 done. Epoch loss =  27306.909869850148
Epoch  8 done. Epoch loss =  20420.21962215049
Epoch  9 done. Epoch loss =  16788.007174332888
Accuracy:  0.9501


95% on the mnist dataset is considered laughable. Because this is not quite the best model.
But all we did was feed through raw pixel values. We didn't help it generalize, we didn't help it have a different perspective or anything. 

Like CNN will be taking chunks of the pixels and those are the layers or whatever.
It has a little bit more of a logical approach.
Whereas, here, we're just throwing data at the neural net and telling it to figure it out.
Which it did, considering everything. Even though it isn't the industry standard for this task, it did pretty well.

# --------

In the MLP etc there is no sense of time or order of events.
E.g. Harry killed Bill.
Traditional Deep Neurals Nets will not know the difference between Harry killed Bill and Bill killed Harry

RNN, CNN have a sense of order.

RNN used more with language data
CNN used more with image data - for single frames
For successions of frames etc, combo of CNN-RNN

##### RNN

input > activation fn > output 
then the output serves as input again to the activation function
so


x1 > A() > o1

      v
x2 > A() > o2

      v
x3 > A() > o3

The activation function has a forget gate which decides what to keep from the previoud output
It also decides what to add from the input and then finally what to output

In [31]:
import numpy as np
x = np.ones((3,2,2,1)) # total 3, each has 2, then those 2 each have 2, those each have 1
np.transpose(x,(1,0,2,3)) # transpose the axes
# so now each 2 has 3 and those 3 have 2 and those 2 have 1 (cuz axes 0 and 1 got exchanged)
x

array([[[[1.],
         [1.]],

        [[1.],
         [1.]]],


       [[[1.],
         [1.]],

        [[1.],
         [1.]]],


       [[[1.],
         [1.]],

        [[1.],
         [1.]]]])

In [3]:
from tensorflow.contrib import rnn
mnist = input_data.read_data_sets('data/',one_hot=True)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


In [4]:
hm_epochs = 3
n_classes = 10
batch_size = 128
chunk_size = 28
n_chunks = 28 # 28 chunks of 28 pixels cuz the image is 28x28
rnn_size = 128 # rather than having all those layers, we just have a single size of the rnn which is 128

x = tf.placeholder('float',[None,n_chunks,chunk_size])
y = tf.placeholder('float')

def recurrent_neural_network(data):
    
    layer = {'weights':tf.Variable(tf.random_normal([rnn_size,n_classes])),
             'biases':tf.Variable(tf.random_normal([n_classes]))}
    
    global x
    # formatting the data cuz rnn wants it in a certain format
    # just like we need to reshape the data for sklearn
    x = tf.transpose(x,[1,0,2])
    x = tf.reshape(x,[-1,chunk_size])
    x = tf.split(x,n_chunks,0)
    
    lstm_cell = rnn.BasicLSTMCell(rnn_size) # a basic lstm cell that with recur, that is of rnn size
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    
    output = tf.matmul(outputs[-1],layer['weights']) + layer['biases'] # final output * weights + biases
    
    return output

def train_neural_network(x):
    prediction = recurrent_neural_network(x)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))

    optimizer = tf.train.AdamOptimizer().minimize(cost)
    
    with tf.Session() as ses:
        ses.run(tf.global_variables_initializer())
        
        for epoch in range(hm_epochs):
            epoch_loss = 0
            for _ in range(int(mnist.train.num_examples / batch_size)):
                epoch_x,epoch_y = mnist.train.next_batch(batch_size)
                epoch_x = epoch_x.reshape((batch_size,n_chunks,chunk_size))
                
                _, c = ses.run([optimizer, cost], feed_dict = {x:epoch_x, y:epoch_y})
                # reshape images by -1 because each individual image and not the batch size
                
                epoch_loss += c
            print('Epoch ',epoch,'done. Epoch loss = ',epoch_loss)
            
        correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
        accuracy = tf.reduce_mean(tf.cast(correct,'float'))
        print('Accuracy: ',accuracy.eval({x:mnist.test.images.reshape((-1,n_chunks,chunk_size)),y:mnist.test.labels}))

train_neural_network(x)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Epoch  0 done. Epoch loss =  188.49540507793427
Epoch  1 done. Epoch loss =  55.29036856070161
Epoch  2 done. Epoch loss =  37.6534407697618
Accuracy:  0.9731
