<a href="https://colab.research.google.com/github/amitmakashir/Deep-learning/blob/master/Part1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import warnings
warnings.filterwarnings('ignore')

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

from timeit import default_timer as timer

In [0]:
# Network structure
hidden_units = [1024,1024,1024,1024,1024]     # hidden_units[0] => 1024 hidden units in the first layer 
output_nodes = 10
input_nodes = 784

no_of_train_images = len(mnist.train.images)

# Hyperparameters
learning_rate = 0.001
epochs = 100
minibatch_size = 350
num_iters = no_of_train_images // minibatch_size

In [0]:
# Create placeholders for input X and labels y
x = tf.placeholder(tf.float32, [None, 784])
y_true = tf.placeholder(tf.float32, [None, 10])


# Constructs a neural network with the specified inputs, outputs and hidden layers
# hidden_units is a list containing hidden units at every hidden layer
# the length of hidden_units list determines the no of hidden layers
# the output_nodes determine the no of classes to be predicted
# Currently use the "He" initialization for initializing the weights
def construct_network(input_nodes, output_nodes, hidden_units):
    no_of_hidden_layers = len(hidden_units)
    W = []
    b = []

    initializer = tf.contrib.layers.variance_scaling_initializer()
  
    for i in range(no_of_hidden_layers + 1):
        if i == 0:   # First layer
            w = tf.Variable(initializer([input_nodes, hidden_units[i]]), dtype=tf.float32)
            bias = tf.Variable(tf.random_normal([hidden_units[i]]), dtype=tf.float32)
      
        elif i == no_of_hidden_layers:  # Last layer
            w = tf.Variable(initializer([hidden_units[i-1], output_nodes]), dtype=tf.float32)
            bias = tf.Variable(tf.random_normal([output_nodes]), dtype=tf.float32)
      
        else:  # Hidden layers
            w = tf.Variable(initializer([hidden_units[i-1], hidden_units[i]]), dtype=tf.float32)
            bias = tf.Variable(tf.random_normal([hidden_units[i]]), dtype=tf.float32)

        W.append(w)
        b.append(bias)
  
    return W,b


# Once the network is constructed, pass the input X in the network.
# Apply suitable activations in the hidden layers.
# Don't apply any activations on the last layer in this function as this function
# could be used for classification as well as regression problems
def solve(x,W,b):
    n = len(W)
    y = []

    for i in range(n): # First layer
        if i == 0:
            y.append(tf.nn.relu(tf.matmul(x, W[i]) + b[i]))

        elif i == n-1: # Last layer
            y.append(tf.matmul(y[i-1], W[i]) + b[i])

        else: # Hidden layers
            y.append(tf.nn.relu(tf.matmul(y[i-1], W[i]) + b[i]))
  
    return y


# Return a given no of random samples from the data X and y 
def randomly_sample(X,y,no_of_samples):
    # Generate random indices for the samples
    indices = random.sample(range(0, len(X)), no_of_samples)

    samples_X = np.array([X[i] for i in indices])
    samples_label = np.array([y[i] for i in indices])

    samples_label_argmax = np.argmax(samples_label,1)

    return samples_X,samples_label_argmax


def feedForward(x,y_true,W,b):
    ## "y" is the output of the network. Pass it through the softmax function
    y = solve(x,W,b)
    y_pred = tf.nn.softmax(y[-1])

    ## Calculate the accuracy of this classifier by calculating % of correctly classified images
    correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_true,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return y,accuracy

        

## Main program starts from here
  
## Construct the network using the given configuration 
W,b = construct_network(input_nodes, output_nodes, hidden_units) 

## "y" is the output of the network. Pass it through the softmax function
y = solve(x,W,b)
y_pred = tf.nn.softmax(y[-1])


## Define the cost function to be optimized and the optimizer to be used
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y[-1],labels=y_true))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

### Start the session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

## Calculate the accuracy of this classifier by calculating % of correctly classified images
correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_true,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
test_accuracy = 0

## Running the neural network
for e in range(epochs):
    if test_accuracy > 0.98:
        break
            
    for _ in range(num_iters):
        batch_xs, batch_ys = mnist.train.next_batch(minibatch_size)
        sess.run(train_step, feed_dict={x: batch_xs, y_true: batch_ys})
  
        # Calculate the test accuracy after every epoch
        test_accuracy = sess.run(accuracy, feed_dict={x: mnist.test.images, y_true: mnist.test.labels})
        if test_accuracy > 0.98:
            break

print("Accuracy on Test images: "+ str(test_accuracy))

# Weights
W = sess.run(W, feed_dict={x: batch_xs, y_true: batch_ys})

Instructions for updating:
Colocations handled automatically by placer.
Accuracy on Test images: 0.9812


In [0]:
# Use SVD to get singular values of the weight matrix. Choose only the top "D"
# values from the singular vector and create a diagonal matrix from it. Multiply 
# it to u and v to reconstruct the weight matrix again. As we only choose the "D" 
# values the reconstructed matrix would sparser than the original matrix
def compressWeight(w,D):
    '''
    u => (784,784)
    s => (784,)
    v => (1024,1024)

    After choosing "D" values from s:
    s => (20,)

    Converting s to diagonal matrix:
    s => (20,20)

    Multiplying u,s and v
    w => (784,20) * (20,20) * (20,1024)
    '''
    u,s,v =  np.linalg.svd(w,full_matrices=True)
  
    if D == "DFull":
        D = s.shape[0]
        
    w = np.matrix(u[:, :D]) * np.diag(s[:D]) * np.matrix(v[:, :D].T)

    w = sess.run(tf.convert_to_tensor(w, np.float32))
    return w 


## Q 1.5 

In [0]:
D = [10, 20, 50, 100, 200,"DFull"]

W_all = []

for d in D:
    W_new = []
    
    for i,w in enumerate(W): 
        if i == len(W)-1: # we are not compressing the last weight
            W_new.append(w)
        else:
            W_new.append(compressWeight(w,d))
    
    # Push the newly reconstructed weights to the "W_all" list
    W_all.append(W_new)
    
    start = timer()
    # Run the feedforward on these weights
    y_pred,test_accuracy = sess.run(feedForward(x,y_true,W_all[-1],b), feed_dict={x: mnist.test.images, y_true: mnist.test.labels})    
    end = timer()
    
    print("For D = ",str(d),", Test accuracy = ",str(test_accuracy),", Time taken in seconds = ",str(round(end-start,4)))    

For D =  10 , Test accuracy =  0.1028 , Time taken in seconds =  0.4126
For D =  20 , Test accuracy =  0.1028 , Time taken in seconds =  0.1813
For D =  50 , Test accuracy =  0.1135 , Time taken in seconds =  0.4301
For D =  100 , Test accuracy =  0.1135 , Time taken in seconds =  0.1791
For D =  200 , Test accuracy =  0.1135 , Time taken in seconds =  0.182
For D =  DFull , Test accuracy =  0.1135 , Time taken in seconds =  0.1827


### 1.6 (Option 1)

In [0]:
# Save the pretrained weights for the analysis
pretrained_W = W

# Hyperparameters
learning_rate = 0.001
epochs = 100
minibatch_size = 350
num_iters = no_of_train_images // minibatch_size

## Main program starts from here

# Constructs a neural network with the specified inputs, outputs and hidden layers
# hidden_units is a list containing hidden units at every hidden layer
# the length of hidden_units list determines the no of hidden layers
# the output_nodes determine the no of classes to be predicted
# Currently use the "He" initialization for initializing the weights
def construct_network_withSVD(input_nodes, output_nodes, hidden_units,pretrained_w):
    no_of_hidden_layers = len(hidden_units)
    W = []
    b = []
    D = 20
    
    initializer = tf.contrib.layers.variance_scaling_initializer()

    for i in range(no_of_hidden_layers + 1):
        if i == 0:   # First layer
            # w = tf.Variable(initializer([input_nodes, hidden_units[i]]), dtype=tf.float32)
            bias = tf.Variable(tf.random_normal([hidden_units[i]]), dtype=tf.float32)
                    
            u, s, v = np.linalg.svd(pretrained_w[i],full_matrices=True)    
            
            u = tf.Variable(u[:,:D], dtype=tf.float32)
            v_hat = tf.Variable(tf.matmul(tf.linalg.diag(s[:D]) , v[:, :D],adjoint_b=True), dtype=tf.float32)
            w = tf.matmul(u,v_hat)

        elif i == no_of_hidden_layers:  # Last layer
            w = tf.Variable(initializer([hidden_units[i-1], output_nodes]), dtype=tf.float32)
            bias = tf.Variable(tf.random_normal([output_nodes]), dtype=tf.float32)

        else:  # Hidden layers
            w = tf.Variable(initializer([hidden_units[i-1], hidden_units[i]]), dtype=tf.float32)
            bias = tf.Variable(tf.random_normal([hidden_units[i]]), dtype=tf.float32)
            
            s,u,v = tf.linalg.svd(w,full_matrices=True)
            
            u = tf.Variable(u[:,:D], dtype=tf.float32)
            v_hat = tf.Variable(tf.matmul(tf.linalg.diag(s[:D]) , v[:, :D],adjoint_b=True), dtype=tf.float32)
            w = tf.matmul(u,v_hat)
    
        W.append(w)
        b.append(bias)
    
    return W,b


# Once the network is constructed, pass the input X in the network.
# Apply suitable activations in the hidden layers.
# Don't apply any activations on the last layer in this function as this function
# could be used for classification as well as regression problems
def solve_withSVD(x,W,b):
    n = len(W)
    y = []

    for i in range(n): # First layer
        if i == 0:
            y.append(tf.nn.relu(tf.matmul(x, W[i]) + b[i]))

        elif i == n-1: # Last layer
            y.append(tf.matmul(y[i-1],W[i]) + b[i])

        else: # Hidden layers
            y.append(tf.nn.relu(tf.matmul(y[i-1], W[i]) + b[i]))
  
    return y

## Construct the network using the given configuration 
W,b = construct_network_withSVD(input_nodes, output_nodes, hidden_units,pretrained_W) 

## "y" is the output of the network. Pass it through the softmax function
y = solve_withSVD(x,W,b)
y_pred = tf.nn.softmax(y[-1])

## Define the cost function to be optimized and the optimizer to be used
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y[-1],labels=y_true))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

## Calculate the accuracy of this classifier by calculating % of correctly classified images
correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_true,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

tf.global_variables_initializer().run()

test_accuracy = 0

## Running the neural network
for e in range(epochs):
    if test_accuracy > 0.97:
        break 
    for _ in range(num_iters):
        batch_xs, batch_ys = mnist.train.next_batch(minibatch_size)
        sess.run(train_step, feed_dict={x: batch_xs, y_true: batch_ys})
  
        # Calculate the test accuracy after every epoch
        test_accuracy = sess.run(accuracy, feed_dict={x: mnist.test.images, y_true: mnist.test.labels})
        if test_accuracy > 0.97:
            break
    
print("Accuracy on Test images: "+ str(test_accuracy))


Epoch: 0 , Acc =  0.9134
Epoch: 1 , Acc =  0.9426
Epoch: 2 , Acc =  0.9567
Epoch: 3 , Acc =  0.961
Epoch: 4 , Acc =  0.9647
Epoch: 5 , Acc =  0.9706
Accuracy on Test images: 0.9706


In [0]:
sess.close()