### Questions

- Build a DNN with five hidden layers of 100 neurons each, He initialization, and the ELU activation function. 
- Using Adam optimization and early stopping, try training it on MNIST but only on digits 0 to 4, as we will use transfer learning for digits 5 to 9 in the next exercise. You will need a softmax output layer with five neurons, and as always make sure to save checkpoints at regular intervals and save the final model so you can reuse it later. 


### Data Processing

In [1]:
from tensorflow.examples.tutorials import mnist
mnist_data = mnist.input_data.read_data_sets("/tmp/data/")

  from ._conv import register_converters as _register_converters
W0819 14:04:36.623637 4760491456 deprecation.py:323] From <ipython-input-1-f648f33866b9>:2: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
W0819 14:04:36.625555 4760491456 deprecation.py:323] From /Users/devbhadurkhadka/.pyenv/versions/anaconda3-5.2.0/envs/scikit_practice/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Please write your own downloading logic.
W0819 14:04:36.627747 4760491456 deprecation.py:323] From /Users/devbhadurkhadka/.pyenv/versions/anaconda3-5.2.0/envs/scikit_practice/lib/python3.6/site-packages/tensorflow/contrib/lea

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz


W0819 14:04:46.018750 4760491456 deprecation.py:323] From /Users/devbhadurkhadka/.pyenv/versions/anaconda3-5.2.0/envs/scikit_practice/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.data to implement this functionality.


Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz


W0819 14:04:49.044633 4760491456 deprecation.py:323] From /Users/devbhadurkhadka/.pyenv/versions/anaconda3-5.2.0/envs/scikit_practice/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [2]:
import numpy as np


train_indxs_upto_4 = mnist_data.train.labels<5
train_images_upto_4 = mnist_data.train.images[train_indxs_upto_4]
train_labels_upto_4 = mnist_data.train.labels[train_indxs_upto_4]

val_indxs_upto_4 = mnist_data.validation.labels<5
val_images_upto_4 = mnist_data.validation.images[val_indxs_upto_4]
val_labels_upto_4 = mnist_data.validation.labels[val_indxs_upto_4]

test_indxs_upto_4 = mnist_data.test.labels<5
test_images_upto_4 = mnist_data.test.images[test_indxs_upto_4]
test_labels_upto_4 = mnist_data.test.labels[test_indxs_upto_4]


### Function Definitions 

In [3]:
import tensorflow as tf
from tensorflow.contrib.layers import variance_scaling_initializer as he_initializer
from tensorflow.nn import sparse_softmax_cross_entropy_with_logits as softmax_xentropy
from tensorflow.layers import dense
import numpy as np


def get_leaky_relu(alpha):
    return lambda z, name=None: tf.maximum(alpha*z,z, name=name)

def get_logits(x, layer_sizes):
    
    activation = get_leaky_relu(alpha=0.3)
#     activation = tf.nn.elu
    initializer = he_initializer()
    
    with tf.name_scope("DNN"):
        layer_input = x
        for i, layer_size in enumerate(layer_sizes[2:]):
            layer_name = "input" if i==0 else \
                ("output" if i==len(layer_sizes)-1 else "hidden%d"%i)
            
            act = None if i==len(layer_sizes)-1 else activation
            
            layer_input = dense(layer_input, layer_size, name=layer_name, 
                            kernel_initializer=initializer, activation= act)
            
    return layer_input


def get_softmax_xentropy_loss(logits,y):
    with tf.name_scope("loss"):
        xentropy = softmax_xentropy(labels=y, logits=logits)
        return tf.reduce_mean(xentropy)

def get_optimizer(loss, learning_rate=0.01):
    with tf.name_scope("train"):
        optimizer =  tf.train.AdamOptimizer (learning_rate=learning_rate)
        optimizer_op = optimizer.minimize(loss)
    return optimizer_op

def get_validation_score(logits,y):
    with tf.name_scope("validation"):
        preds = tf.nn.in_top_k(logits,y,1)
        return tf.reduce_mean(tf.cast(preds, dtype=np.float32))
    
def get_batch(x,y,batch_size):
    n_batches = len(y)//batch_size + 1
    for i in range(n_batches):
        indxes = np.random.choice(len(y), size=batch_size, replace=False)
        yield x[indxes], y[indxes]


### Training

In [4]:
from my_libs.tf_checkpoint import CheckpointSaver
from my_libs.tf_graph_saver import ScalerGraphSaver

def train_model(train_ds, validation_ds, restore_checkpoint=True):
    layer_sizes = [100,100,100,100,100,5]
    learning_rate = 0.01
    n_epochs = 300
    batch_size = 50
    MAX_EPOCHS_WO_IMPROVEMENT = 100

    train_x, train_y = train_ds
    val_x, val_y = validation_ds
    
    tf.reset_default_graph()
    
    var_epoch = tf.Variable(initial_value=0,dtype=np.int16, name="epoch")
    inc_epoch = tf.compat.v1.assign_add(var_epoch,1, name="inc_epoch")
    
    
    
    
    x = tf.placeholder(shape=(None, 28*28), dtype=np.float32,name="x")
    y = tf.placeholder(shape=(None), dtype=np.int32,name="y")
    logits = get_logits(x, layer_sizes)
    
    loss_op = get_softmax_xentropy_loss(logits,y)
    optimizer = get_optimizer(loss_op, learning_rate)
    validation_score = get_validation_score(logits,y)
    
    var_best_epoch = tf.Variable((0,0), dtype=np.float32, name="best_score")
    best_epoch_holder = tf.placeholder(shape=[2], dtype=np.float32, name="best_score_holder")
    update_best_epoch = tf.assign(var_best_epoch, best_epoch_holder)
    
    with tf.Session() as sess:
        chk_saver = CheckpointSaver()
        
        epoch_start=0
        best_score = 0
        best_score_epoch = 0
        if restore_checkpoint and chk_saver.restore_checkpoint("excercise1_epoch"):
            epoch_start = var_epoch.eval(sess)
            best_score_epoch, best_score = var_best_epoch.eval(sess)
            print("restored at epoch %d"%epoch_start)
        else:
            init = tf.global_variables_initializer()
#             init = tf.variables_initializer([var_epoch])
            sess.run(init)
        
        with ScalerGraphSaver("excercise1_graph") as graph_saver:
             for epoch in range(epoch_start, n_epochs):
                    
                if best_score>0 and epoch-best_score_epoch>MAX_EPOCHS_WO_IMPROVEMENT:
                    print("No improvement in %d epoches restoring best epoch" \
                          %MAX_EPOCHS_WO_IMPROVEMENT)
                    print("epoch %d, score %f"%(best_score_epoch, best_score))
                    if chk_saver.restore_checkpoint("excercise1_best_epoch"):
                        break 
                
                for batch_x, batch_y in get_batch(train_x,train_y, batch_size):
                    feed_dict={x:batch_x,y:batch_y}
                    loss,_ = sess.run([loss_op, optimizer], feed_dict = feed_dict)
                
                
                
                if (epoch>0 and (epoch%10==0 or epoch==n_epochs-1)):
                    chk_saver.save_checkpoint("excercise1_epoch")
                    
                    graph_saver.log_summary("batch_loss", loss_op, step=epoch, 
                                            feed_dict=feed_dict)
                    graph_saver.log_summary("validation_loss", loss_op, step=epoch, 
                                            feed_dict={x:val_x,y:val_y})
                    
                    val_score = validation_score.eval(session=sess, 
                                                      feed_dict={x:val_x,y:val_y}) 
                    graph_saver.log_summary("validation_accuracy", validation_score, 
                                            step=epoch, feed_dict={x:val_x,y:val_y})
                    
                    if val_score>best_score:
                        print("best epoch %d, score %f"%(epoch, val_score))
                        best_score = val_score
                        sess.run(update_best_epoch, 
                                 feed_dict={best_epoch_holder:(epoch,val_score)})
                        best_score_epoch = epoch
                        chk_saver.save_checkpoint("excercise1_best_epoch")
                    
                      
                    
                 
                sess.run(inc_epoch)   
        
        
        
    def predict(test_x):
        with tf.Session() as sess:
            chk_saver = CheckpointSaver()
            chk_saver.restore_checkpoint("excercise1_epoch")
            pred_op = tf.math.argmax(logits, axis=1)
            
            return sess.run(pred_op, feed_dict={x:test_x})
            
            
         
    return predict
    

In [5]:
predict = train_model((train_images_upto_4,train_labels_upto_4),
                      (val_images_upto_4,val_labels_upto_4), restore_checkpoint=True)

predictions = predict(test_images_upto_4)
accuracy = np.mean(test_labels_upto_4==predictions)
print("test accuracy: %f"%accuracy)

W0819 14:05:27.398180 4760491456 deprecation.py:323] From <ipython-input-3-de97af167ece>:25: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.
W0819 14:05:28.313132 4760491456 deprecation.py:323] From /Users/devbhadurkhadka/.pyenv/versions/anaconda3-5.2.0/envs/scikit_practice/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0819 14:05:28.850306 4760491456 deprecation_wrapper.py:119] From /Volumes/Projects/Machine Learning/tensorflow_practice/my_libs/tf_checkpoint.py:8: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0819 14:05:28.851590 4760491456 deprecation_wrapper.py:119] From /Volu

restored at epoch 170
No improvement in 100 epoches restoring best epoch
epoch 70, score 0.987881
test accuracy: 0.987741
