## Imports

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
from numpy import linalg as LA

  from ._conv import register_converters as _register_converters


In [2]:
from mnist import MNIST
data = MNIST(data_dir="data/MNIST/")

## Copy some of the data-dimensions for convenience.
img_size_flat is the number of pixels in an image when the image is flattened, img_size_flat = 28 * 28 = 784.

img_shape is the shape of an image in 2-D, img_shape = 28 * 28 (28 by 28).

num_classes is the number of output classes for the images, num_classes = 10

In [3]:
img_size_flat = data.img_size_flat
img_shape = data.img_shape
num_classes = data.num_classes

## Define the placeholders for training and testing data
x is a placeholder variable whose dimensions are [None, img_size_flat] through which the images are fed to the model for training as well as testing.

y_true is the placeholder through which the correct class for a training or testing data is fed to the network.

In [4]:
x = tf.placeholder(tf.float32, [None, img_size_flat])
y_true = tf.placeholder(tf.float32, [None, num_classes])
y_true_cls = tf.argmax(y_true, axis =1)

## Define the model
Our model is a multi-layer perceptron with 4 hidden layers having 1000, 1000, 500 and 200 neurons respectively. For ease of simplicity, biases are not used in the model.

In [5]:
hidden1 = 1000
hidden2 = 1000
hidden3 = 500
hidden4 = 200

## Variables to be optimized
w1-w5 are 5 trainable variables that will be optimized during the training.

In [6]:
w1 = tf.Variable(tf.truncated_normal([img_size_flat,hidden1], stddev = 0.05))
h1 = tf.nn.relu(tf.matmul(x,w1))

w2 = tf.Variable(tf.truncated_normal([hidden1,hidden2],stddev = 0.05))
h2 = tf.nn.relu(tf.matmul(h1,w2))

w3 = tf.Variable(tf.truncated_normal([hidden2,hidden3], stddev = 0.05))
h3 = tf.nn.relu(tf.matmul(h2,w3))

w4 = tf.Variable(tf.truncated_normal([hidden3,hidden4],stddev = 0.05))
h4 = tf.nn.relu(tf.matmul(h3,w4))

w5 = tf.Variable(tf.truncated_normal([hidden4,num_classes], stddev = 0.05))
logits = tf.matmul(h4,w5)

y_pred = tf.nn.softmax(logits)

y_pred_cls = tf.argmax(y_pred, axis=1)

## Calculation of model loss
To understand and quantify the performance of the model, a cross-entropy loss is defined over the oputput of the model and true class values.

In [7]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=y_true)
cost = tf.reduce_mean(cross_entropy)

In [8]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
session = tf.Session()
session.run(tf.global_variables_initializer())

In [10]:
batch_size = 100

In [11]:
def optimize(num_iterations):
    for i in range(num_iterations):
        x_batch, y_true_batch, _ = data.random_batch(batch_size=batch_size)
        feed_dict_train = {x: x_batch,y_true: y_true_batch}
        session.run(optimizer, feed_dict=feed_dict_train)

In [12]:
feed_dict_test = {x: data.x_test,
                  y_true: data.y_test,
                  y_true_cls: data.y_test_cls}

In [13]:
def print_accuracy():
    acc = session.run(accuracy, feed_dict=feed_dict_test)
    print("Accuracy on test-set: {0:.1%}".format(acc))

## Performance after 100 optimization iterations

In [14]:
# We have already performed 1 iteration.
optimize(num_iterations=100)

In [15]:
print_accuracy()

Accuracy on test-set: 85.8%


## Performance after 2000 optimization iterations

In [16]:
# We have already performed 100 iterations.
optimize(num_iterations=1900)

In [17]:
print_accuracy()

Accuracy on test-set: 97.4%


## Set the value of k, k = percentage sparsity

In [18]:
k1 = 95

In [19]:
wt1 = np.ndarray.flatten(np.asarray(session.run(w1)))
wt2 = np.ndarray.flatten(np.asarray(session.run(w2)))
wt3 = np.ndarray.flatten(np.asarray(session.run(w3)))
wt4 = np.ndarray.flatten(np.asarray(session.run(w4)))
wt5 = np.asarray(session.run(w5))

In [20]:
wt1[np.abs(wt1)<np.percentile(np.abs(wt1),k1)] = 0
wt1 = tf.convert_to_tensor(wt1.reshape([img_size_flat,hidden1]))

wt2[np.abs(wt2)<np.percentile(np.abs(wt2),k1)] = 0
wt2 = tf.convert_to_tensor(wt2.reshape([hidden1,hidden2]))

wt3[np.abs(wt3)<np.percentile(np.abs(wt3),k1)] = 0
wt3 = tf.convert_to_tensor(wt3.reshape([hidden2,hidden3]))

wt4[np.abs(wt4)<np.percentile(np.abs(wt4),k1)] = 0
wt4 = tf.convert_to_tensor(wt4.reshape([hidden3,hidden4]))

In [21]:
wt1 = tf.Variable(wt1)
ht1 = tf.nn.relu(tf.matmul(x,wt1))

wt2 = tf.Variable(wt2)
ht2 = tf.nn.relu(tf.matmul(ht1,wt2))

wt3 = tf.Variable(wt3)
ht3 = tf.nn.relu(tf.matmul(ht2,wt3))

wt4 = tf.Variable(wt4)
ht4 = tf.nn.relu(tf.matmul(ht3,wt4))

wt5 = tf.Variable(wt5)
logits1 = tf.matmul(ht4,wt5)

In [22]:
y_pred = tf.nn.softmax(logits1)

y_pred_cls = tf.argmax(y_pred, axis=1)

In [23]:
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [24]:
session.run(wt1.initializer)
session.run(wt2.initializer)
session.run(wt3.initializer)
session.run(wt4.initializer)
session.run(wt5.initializer)

In [25]:
print_accuracy()

Accuracy on test-set: 38.4%


## Unit/Neuron Pruning

In [37]:
k2 = 95

In [38]:
wt_1 = np.asarray(session.run(w1))
wt_2 = np.asarray(session.run(w2))
wt_3 = np.asarray(session.run(w3))
wt_4 = np.asarray(session.run(w4))
wt_5 = np.asarray(session.run(w5))

In [39]:
norm1 = LA.norm(wt_1, axis=0)
norm2 = LA.norm(wt_2, axis=0)
norm3 = LA.norm(wt_3, axis=0)
norm4 = LA.norm(wt_4, axis=0)

In [40]:
wt_1[:,np.argwhere(norm1<np.percentile(norm1,k2))] = 0
wt_2[:,np.argwhere(norm2<np.percentile(norm2,k2))] = 0
wt_3[:,np.argwhere(norm3<np.percentile(norm3,k2))] = 0
wt_4[:,np.argwhere(norm4<np.percentile(norm4,k2))] = 0

In [41]:
wt_1 = tf.convert_to_tensor(wt_1)
wt_2 = tf.convert_to_tensor(wt_2)
wt_3 = tf.convert_to_tensor(wt_3)
wt_4 = tf.convert_to_tensor(wt_4)
wt_5 = tf.convert_to_tensor(wt_5)

In [42]:
wt_1 = tf.Variable(wt_1)
ht_1 = tf.nn.relu(tf.matmul(x,wt_1))

wt_2 = tf.Variable(wt_2)
ht_2 = tf.nn.relu(tf.matmul(ht_1,wt_2))

wt_3 = tf.Variable(wt_3)
ht_3 = tf.nn.relu(tf.matmul(ht_2,wt_3))

wt_4 = tf.Variable(wt_4)
ht_4 = tf.nn.relu(tf.matmul(ht_3,wt_4))

wt_5 = tf.Variable(wt_5)
logits2 = tf.matmul(ht_4,wt_5)

In [43]:
y_pred = tf.nn.softmax(logits2)

y_pred_cls = tf.argmax(y_pred, axis=1)

In [44]:
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [45]:
session.run(wt_1.initializer)
session.run(wt_2.initializer)
session.run(wt_3.initializer)
session.run(wt_4.initializer)
session.run(wt_5.initializer)

In [46]:
print_accuracy()

Accuracy on test-set: 11.0%
