In [35]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys

from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf
from tqdm import tqdm
import numpy as np


In [2]:
FLAGS_NET = {}
FLAGS_NET['optimizer_lr'] = 1e-4
FLAGS_NET['optimizer'] = tf.train.AdamOptimizer # could also be tf.train.GradientDescentOptimizer
#FLAGS_NET['optimizer'] = tf.train.GradientDescentOptimizer
FLAGS_NET['get_gradients'] = False
FLAGS_NET['results_name_suffix'] = '_no_dropout'


# Auxiliary functions

In [4]:
def conv2d(x, W):
    """conv2d returns a 2d convolution layer with full stride."""
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    """max_pool_2x2 downsamples a feature map by 2X."""
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                                                strides=[1, 2, 2, 1], padding='SAME')


def weight_variable(shape):
    """weight_variable generates a weight variable of a given shape."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    """bias_variable generates a bias variable of a given shape."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


# Graph definition

In [23]:
# Import data
mnist = input_data.read_data_sets('./', one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784]) # 784=28*28

# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])

# Build the graph for the deep net

# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
x_image = tf.reshape(x, [-1, 28, 28, 1])

# First convolutional layer - maps one grayscale image to 32 feature maps.
W_conv1 = weight_variable([5, 5, 1, 32])
# each kernel has spatial resolution od (5x5), inputs tensor has 1 channel
# train 32 convolutional kernels
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

# Pooling layer - downsamples by 2X.
h_pool1 = max_pool_2x2(h_conv1)

# Second convolutional layer -- maps 32 feature maps to 64.
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

# Second pooling layer.
h_pool2 = max_pool_2x2(h_conv2)

# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Map the 1024 features to 10 classes, one for each digit
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2


# Neccessities for training process. Defining loss.
cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
# And now an operation that will compute the gradients for all the weights
# to minimize the loss cross_entropy.
optimizer = FLAGS_NET['optimizer'](learning_rate=FLAGS_NET['optimizer_lr'])

train_step = optimizer.minimize(cross_entropy)
if FLAGS_NET['get_gradients']:
    # Computer gradients for all the variables
    var_grads = optimizer.compute_gradients(cross_entropy, var_list=[W_fc1])
# Only for results monitoring
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))



Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz


In [24]:
saver = tf.train.Saver()


# Training loop. 
Skip if checkpoints are available.

In [8]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_acc_all = []
    train_loss_all = []
    test_acc_all = []
    test_loss_all = []
    if FLAGS_NET['get_gradients']:
        grads_vars_all = []
    for i in tqdm(range(201)):
        batch = mnist.train.next_batch(32)

        # Here we will compute the variance of the gradients for all the variables
        if FLAGS_NET['get_gradients']:
            batch = mnist.train.next_batch(32)

        if i % 100 == 0:
            indices_train = np.random.choice(np.arange(len(mnist.train.labels)), 5000, replace=True)
            train_acc_, train_loss_ = sess.run([accuracy, cross_entropy],
                feed_dict={
                    x: mnist.train.images[indices_train],
                    y_: mnist.train.labels[indices_train],
                    keep_prob: 1.0})
            indices_test = np.random.choice(np.arange(len(mnist.test.labels)), 5000, replace=True)
            test_acc_, test_loss_ = sess.run([accuracy, cross_entropy],
                feed_dict={
                    x: mnist.test.images[indices_test],
                    y_: mnist.test.labels[indices_test],
                    keep_prob: 1.0})

            train_acc_all.append(train_acc_)
            train_loss_all.append(train_loss_)
            test_acc_all.append(test_acc_)
            test_loss_all.append(test_loss_)
            print('step %d, train acc=%.4f, train loss=%.4f, test acc=%.4f, test loss=%.4f, ' % \
                  (i, train_acc_, train_loss_, test_acc_, test_loss_))

            # Save tuple of arrays to a file
            np.savez('results_baseline_'+
                     FLAGS_NET['optimizer'].__name__+
                     '_lr='+str(FLAGS_NET['optimizer_lr']) +
                     str(FLAGS_NET['results_name_suffix']),
                     train_acc=np.array(train_acc_all),
                     train_loss=np.array(train_loss_all),
                     test_acc=np.array(test_acc_all),
                     test_loss=np.array(test_loss_all)
                     )

            # Here we will compute the variance of the gradients for all the variables
            if FLAGS_NET['get_gradients']:
                gradients_ = sess.run(var_grads[0][1],
                                      feed_dict={
                                          x: mnist.train.images[:1000],
                                          y_: mnist.train.labels[:1000],
                                          keep_prob: 1.0})
                train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
                gradients_new_ = sess.run(var_grads[0][1],
                                          feed_dict={
                                          x: mnist.train.images[:1000],
                                          y_: mnist.train.labels[:1000],
                                          keep_prob: 1.0})
                grads_vars_all.append(np.linalg.norm(gradients_new_-gradients_))

                # Save tuple of arrays to a file
                np.savez('results_baseline_'+
                         FLAGS_NET['optimizer'].__name__+
                         '_lr='+
                         str(FLAGS_NET['optimizer_lr'])+'_grads_vars'+
                         str(FLAGS_NET['results_name_suffix']),
                         grads_vars=np.array(grads_varss_all),
                         )
            saver.save(sess, './model')


        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})


  0%|          | 0/201 [00:00<?, ?it/s]

step 0, train acc=0.0508, train loss=4.3223, test acc=0.0502, test loss=4.2916, 


 50%|████▉     | 100/201 [00:06<00:03, 31.43it/s]

step 100, train acc=0.8650, train loss=0.4539, test acc=0.8724, test loss=0.4302, 


100%|██████████| 201/201 [00:14<00:00,  4.53it/s]

step 200, train acc=0.8906, train loss=0.3401, test acc=0.8970, test loss=0.3375, 





In [22]:
# tf.reset_default_graph()

## Prediction 

In [34]:
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    saver.restore( sess, './model')
    
    indices_train = np.random.choice(np.arange(len(mnist.train.labels)), 5000, replace=True)
    train_acc_, train_loss_ = sess.run([accuracy, cross_entropy],
        feed_dict={
            x: mnist.train.images[indices_train],
            y_: mnist.train.labels[indices_train],
            keep_prob: 1.0})
    indices_test = np.random.choice(np.arange(len(mnist.test.labels)), 5000, replace=True)
    test_acc_, test_loss_ = sess.run([accuracy, cross_entropy],
        feed_dict={
            x: mnist.test.images[indices_test],
            y_: mnist.test.labels[indices_test],
            keep_prob: 1.0})
    print('train acc=%.4f, train loss=%.4f, test acc=%.4f, test loss=%.4f, ' % \
          (train_acc_, train_loss_, test_acc_, test_loss_))
    predictions = sess.run(fetches=y_conv, 
                           feed_dict={x: mnist.test.images[:1000],
                                      keep_prob: 1.})
    
    

    for lbl_, pred_ in zip(np.argmax(mnist.test.labels[:1000], axis=1),
                           np.argmax(predictions, axis=1)):
        print("GT class: %d. Prediction: %d. " % (lbl_, pred_))
    #print(predictions.shape)

INFO:tensorflow:Restoring parameters from ./model
train acc=0.8902, train loss=0.3524, test acc=0.8936, test loss=0.3433, 
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4 9 6 6 5 4 0 7 4 0 1 3 1 3 4 7 2 7
 1 2 1 1 7 4 2 3 5 1 2 4 4 6 3 5 5 6 0 4 1 9 5 7 8 9 3 7 4 6 4 3 0 7 0 2 9
 1 7 3 2 9 7 7 6 2 7 8 4 7 3 6 1 3 6 9 3 1 4 1 7 6 9 6 0 5 4 9 9 2 1 9 4 8
 7 3 9 7 4 4 4 9 2 5 4 7 6 7 9 0 5 8 5 6 6 5 7 8 1 0 1 6 4 6 7 3 1 7 1 8 2
 0 2 9 9 5 5 1 5 6 0 3 4 4 6 5 4 6 5 4 5 1 4 4 7 2 3 2 7 1 8 1 8 1 8 5 0 8
 9 2 5 0 1 1 1 0 9 0 3 1 6 4 2 3 6 1 1 1 3 9 5 2 9 4 5 9 3 9 0 3 6 5 5 7 2
 2 7 1 2 8 4 1 7 3 3 8 8 7 9 2 2 4 1 5 9 8 7 2 3 0 4 4 2 4 1 9 5 7 7 2 8 2
 6 8 5 7 7 9 1 8 1 8 0 3 0 1 9 9 4 1 8 2 1 2 9 7 5 9 2 6 4 1 5 8 2 9 2 0 4
 0 0 2 8 4 7 1 2 4 0 2 7 4 3 3 0 0 3 1 9 6 5 2 5 9 2 9 3 0 4 2 0 7 1 1 2 1
 5 3 3 9 7 8 6 5 6 1 3 8 1 0 5 1 3 1 5 5 6 1 8 5 1 7 9 4 6 2 2 5 0 6 5 6 3
 7 2 0 8 8 5 4 1 1 4 0 3 3 7 6 1 6 2 1 9 2 8 6 1 9 5 2 5 4 4 2 8 3 8 2 4 5
 0 3 1 7 7 5 7 9 7 1 9 2 1 4 2 9 2 0 4 9 1 4 8 1 8 4

GT class: 9. Prediction: 9. 
GT class: 4. Prediction: 4. 
GT class: 5. Prediction: 5. 
GT class: 9. Prediction: 9. 
GT class: 3. Prediction: 3. 
GT class: 9. Prediction: 9. 
GT class: 0. Prediction: 0. 
GT class: 3. Prediction: 3. 
GT class: 6. Prediction: 8. 
GT class: 5. Prediction: 7. 
GT class: 5. Prediction: 3. 
GT class: 7. Prediction: 7. 
GT class: 2. Prediction: 2. 
GT class: 2. Prediction: 2. 
GT class: 7. Prediction: 7. 
GT class: 1. Prediction: 1. 
GT class: 2. Prediction: 2. 
GT class: 8. Prediction: 8. 
GT class: 4. Prediction: 4. 
GT class: 1. Prediction: 1. 
GT class: 7. Prediction: 7. 
GT class: 3. Prediction: 3. 
GT class: 3. Prediction: 3. 
GT class: 8. Prediction: 8. 
GT class: 8. Prediction: 8. 
GT class: 7. Prediction: 7. 
GT class: 9. Prediction: 9. 
GT class: 2. Prediction: 2. 
GT class: 2. Prediction: 2. 
GT class: 4. Prediction: 4. 
GT class: 1. Prediction: 1. 
GT class: 5. Prediction: 3. 
GT class: 9. Prediction: 3. 
GT class: 8. Prediction: 8. 
GT class: 7. P