# Task2: Many to Many Models: 3_layer
Here we predict the next pixel for each pixel in the images and calculate the cross entropy of each singular pixel against what the ground truth. Then the loss is summed up for every pixel. Note that there is only ground truth matches for the first 783 pixel predictions since the last output will be out of the input comparison range.

The GRU cell is used as from task one it appeared to be more stable and give more accurate results.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import pdb

In [2]:
# Need to load the MNist data to work with
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/MNIST/", one_hot=True)
# one hot true gives the y labels as vectors with 1's which correspond to the number it is

Extracting data/MNIST/train-images-idx3-ubyte.gz
Extracting data/MNIST/train-labels-idx1-ubyte.gz
Extracting data/MNIST/t10k-images-idx3-ubyte.gz
Extracting data/MNIST/t10k-labels-idx1-ubyte.gz


In [3]:
n_classes = 10 # number of digits
batch_size = 100
chunk_size = 1 # feeding in pixel by pixel
n_chunks = 784 # number of pixels
rnn_size = 32
learning_rate = 0.001

# Placeholders to store the data. This shape is used to match the tf rnn cell input
x = tf.placeholder('float', [None, n_chunks,chunk_size],name='InputData')
y = tf.placeholder('float',name='LabelData')

# The ground truth of the pixels 2 to end to compare against predicted
true_pixels = tf.reshape(x, [-1,n_chunks])
true_pixels = true_pixels[:,1:]



logs_path = '/tmp/tensorflow_logs/example'

Define the varibles that will be used in to transform the 32d layer to 1d.

In [4]:
weights = tf.Variable(tf.random_normal([rnn_size,1]))
biases=tf.Variable(tf.random_normal([1]))

In [5]:
# Here the gru cell is defined of specified size
gru_cell = tf.nn.rnn_cell.GRUCell(rnn_size)

# Make the rnn cell 3 layers deep
gru_cell = tf.nn.rnn_cell.MultiRNNCell(cells=[gru_cell] * 3, state_is_tuple=True)

# The ouputs are a tensor of all the ouput states of the pixels
outputs, states = tf.nn.dynamic_rnn(cell = gru_cell, inputs = x,dtype=tf.float32)

# check the shape
print(outputs.get_shape())



(?, 784, 32)


In [6]:
# Outputs are reshaped in order to have in (batch*784,RNN_size) for easy matrix multiplication.
outputs = tf.reshape(outputs, [-1,rnn_size])

# linear transformation
linear = tf.matmul(outputs,weights) + biases

# Need to reshape so that we have (batch_size, num_pixels)
pixel_pred = tf.reshape(linear,[-1,n_chunks])

# Since the output is a prediction for the next state, the first 783 is taken since that is the number of GT pixels 
# available for comparisson
pixel_pred = pixel_pred[:,:-1]


In [7]:
# Using the tf function to calculate the Xent to avoid numerical underflow, it applies a sigmoid layer before 
# calculating the loss.
loss =  tf.nn.sigmoid_cross_entropy_with_logits(pixel_pred,true_pixels)

# Sum up the loss from each pixel to get the loss of an image
loss = tf.reduce_sum(loss,1)

# See the sum of the loss rather than the mean
loss_test = tf.reduce_sum(loss)

# Get the mean image cost and optimize over that.
cost = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

In [8]:
# Need to save the model, weights and biases varibles
saver = tf.train.Saver(write_version = tf.train.SaverDef.V2)

# Suggested Directory to use
save_MDir = 'models/Task2/gru3_layer'


#create the directory if it does not exist already
if not os.path.exists(save_MDir):
    os.makedirs(save_MDir)

#save_model = os.path.join(save_MDir,'best_accuracy_second')
save_model = os.path.join(save_MDir,'best_accuracy3')

## Binarize function

In [9]:
def binarize(images, threshold=0.1):
    return (threshold < images).astype("float32")

In [10]:
# Initializing the variables
init = tf.global_variables_initializer()

# Create a summary to monitor cost tensor
#tf.summary.scalar("loss", cost)
# Create a summary to monitor accuracy tensor
#tf.summary.scalar("accuracy", accuracy)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()

In [11]:
# Need to save the model, weights and biases varibles
saver2 = tf.train.Saver()

# Suggested Directory to use
save2_MDir = 'models/Task2/gru3_layer/best'


#create the directory if it does not exist already
if not os.path.exists(save2_MDir):
    os.makedirs(save2_MDir)

#save_model2 = os.path.join(save2_MDir,'best_accuracy_second')
save_model2 = os.path.join(save2_MDir,'best_accuracy_3')




### Optimizer function
Here the main work is done. Each batch is passed through and outputs the cost at the end of each epoch.

In [12]:
def optimize(hm_epochs, start_epoch):
    with tf.Session() as sess:
        sess.run(init)    
        count = 0
        cost_list=[]        
        start_epoch_t = time.time()
        freq_epoch = hm_epochs/5
        # For each epoch loop over all batches and optimize the cost and produce the test cost
        for epoch in range(hm_epochs):
            print("-------Running Epoch:{}-------".format(epoch+1+start_epoch))
            epoch_loss = 0
            
            start = time.time()
            n_batches = int(mnist.train.num_examples/batch_size)
            #n_batches = 10
            # print batch test and train costs.
            freq = int(n_batches/2)
            
            # Loop over all batches here
            for i in range(n_batches):
                # Get the batches ready and into the correct form and shape
                epoch_x, epoch_y = mnist.train.next_batch(batch_size)
                epoch_x = binarize(epoch_x)
                epoch_x = epoch_x.reshape((batch_size,n_chunks,chunk_size))


                _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
                
                epoch_loss += c
                if i% freq ==0 or i == (n_batches):
                    
                    print("Trained {} batches with current epoch cost: {}".format(i+1,epoch_loss))
                    print("Last batch average cost: ", c)
                    print("Current run time is: {} \n".format(time.time()-start_epoch_t))
                    
            if epoch % freq_epoch==0:
                e_loss,e_cost = sess.run([loss_test,cost],feed_dict = {x: binarize(mnist.test.images.reshape((-1, 784, 1))), y: mnist.test.labels})
                cost_list.append(e_cost)
                
                print('Epoch', start_epoch+epoch+1, 'completed out of:',hm_epochs+start_epoch,', Current Test total loss is :',e_loss,' with average',e_cost, ', time:', time.time()-start,'\n')
                print("Total time taken for current epoch : {:f} \n".format(time.time()-start))
                # if the min cost is found then save the model to see if it is much smaller than later epochs.
                if cost_list[count]== min(cost_list):
                        saver2.save(sess= sess, save_path = save_model2)
                        print(cost_list)
                
                count = count+1
                        
        
        Final_cost_test = sess.run(cost,feed_dict = {x: binarize(mnist.test.images.reshape((-1, 784, 1))), y: mnist.test.labels})
    
        
        saver.save(sess= sess, save_path = save_model)
        
        print("At final epoch: {}, the is cost {}".format(start_epoch+epoch+1, Final_cost_test))
    print("Total time taken for run : {:f}".format(time.time()-start_epoch_t))     

In [13]:
#optimize(30,0)

-------Running Epoch:1-------
Trained 1 batches with current epoch cost: 546.7645263671875
Last batch average cost:  546.765
Current run time is: 1.0379581451416016 

Trained 276 batches with current epoch cost: 55796.713287353516
Last batch average cost:  118.908
Current run time is: 218.4333508014679 

Epoch 1 completed out of: 30 , Current Test loss: 95.8565 , time: 474.20103311538696 

Total time taken for current epoch : 474.207551 

[95.856506]
-------Running Epoch:2-------
Trained 1 batches with current epoch cost: 94.27005004882812
Last batch average cost:  94.2701
Current run time is: 476.6199367046356 

Trained 276 batches with current epoch cost: 24486.080757141113
Last batch average cost:  83.1002
Current run time is: 698.0438075065613 

Epoch 2 completed out of: 30 , Current Test loss: 82.1284 , time: 482.241902589798 

Total time taken for current epoch : 482.242404 

[95.856506, 82.12841]
-------Running Epoch:3-------
Trained 1 batches with current epoch cost: 84.7767944

# Restoring model
Here the model is restored and the values in the report match the recovered ones.

In [13]:
def print_cost(rnn_size,epochs):
    

    cost_train_list = []
    cost_test_list =[]
    b_size = 1000
    num_train = len(mnist.train.labels)
    num_test = len(mnist.test.labels)
    n_batches = num_train/b_size
    count = 0
    i = 0
    num_train = len(mnist.train.labels[:10000,:])    
    n_batches = num_train/b_size    
    start = time.time()
    while i < num_train:
        print('Processing batch number {} of {}.'.format(count+1,n_batches))
        # The ending index for the next batch is denoted j.
        j = min(i + b_size, num_train)
        
        if j<= num_test:
            
            # Get the images from the test-set between index i and j.
            images_test = mnist.test.images.reshape((-1, 784, 1))[i:j, :]

            # Get the associated labels.
            labels_test = mnist.test.labels[i:j, :]

            cost_test = sess.run(cost,feed_dict = {x: binarize(images_test), y: labels_test})
        
            cost_test_list.append(cost_test)
        images_train = mnist.train.images.reshape((-1, 784, 1))[i:j, :]

        # Get the associated labels.
        labels_test = mnist.train.labels[i:j, :]

        cost_train = sess.run(cost,feed_dict = {x: binarize(images_train), y: labels_test})
        cost_train_list.append(cost_train)
        i = j
        count +=1
    
    total_cost_train = sum(cost_train_list)/len(cost_train_list)
    total_cost_test = sum(cost_test_list)/len(cost_test_list)
    
    print(time.time()-start)
    
    print('The training cost for the 3 layer {} unit GRU many to many model is {} after {} epochs \n'.format(rnn_size,total_cost_train,epochs))
    
    print('The test cost for the 3 layer {} unit GRU many to many model is {} after {} epochs \n'.format(rnn_size,total_cost_test,epochs))


In [14]:
save_MDir = 'models/Task2/gru3_layer'
save_model = os.path.join(save_MDir,'best_accuracy3')
init = tf.global_variables_initializer()

In [15]:
sess= tf.Session()
sess.run(init)
saver2restore = tf.train.Saver()
saver2restore.restore(sess = sess, save_path= save_model)

In [18]:
print_cost(rnn_size=32, epochs = 30)
sess.close()

Processing batch number 1 of 55.0.
Processing batch number 2 of 55.0.
Processing batch number 3 of 55.0.
Processing batch number 4 of 55.0.
Processing batch number 5 of 55.0.
Processing batch number 6 of 55.0.
Processing batch number 7 of 55.0.
Processing batch number 8 of 55.0.
Processing batch number 9 of 55.0.
Processing batch number 10 of 55.0.
Processing batch number 11 of 55.0.
Processing batch number 12 of 55.0.
Processing batch number 13 of 55.0.
Processing batch number 14 of 55.0.
Processing batch number 15 of 55.0.
Processing batch number 16 of 55.0.
Processing batch number 17 of 55.0.
Processing batch number 18 of 55.0.
Processing batch number 19 of 55.0.
Processing batch number 20 of 55.0.
Processing batch number 21 of 55.0.
Processing batch number 22 of 55.0.
Processing batch number 23 of 55.0.
Processing batch number 24 of 55.0.
Processing batch number 25 of 55.0.
Processing batch number 26 of 55.0.
Processing batch number 27 of 55.0.
Processing batch number 28 of 55.0.
P

In [16]:
print_cost(rnn_size=32, epochs = 30)
sess.close()

Processing batch number 1 of 10.0.
Processing batch number 2 of 10.0.
Processing batch number 3 of 10.0.
Processing batch number 4 of 10.0.
Processing batch number 5 of 10.0.
Processing batch number 6 of 10.0.
Processing batch number 7 of 10.0.
Processing batch number 8 of 10.0.
Processing batch number 9 of 10.0.
Processing batch number 10 of 10.0.
51.15917897224426
The training cost for the 3 layer 32 unit GRU many to many model is 66.33243942260742 after 30 epochs 

The test cost for the 3 layer 32 unit GRU many to many model is 66.0387451171875 after 30 epochs 

