- Create a new DNN that reuses all the pre-trained hidden layers of the previous model, freezes them, and replaces the softmax output layer with a fresh new one. 
- Train this new DNN on digits 5 to 9, using only 100 images per digit, and time how long it takes. Despite this small number of examples, can you achieve high precision? 
- Try caching the frozen layers, and train the model again: how much faster is it now? 
- Try again reusing just four hidden layers instead of five. Can you achieve a higher precision? 
- Now unfreeze the top two hidden layers and continue training: can you get the model to perform even better?

### Get Data

In [100]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np

(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype(np.float32).reshape(-1, 28*28)/255.0

train_indxes_5to9 = y_train>=5
train_x_5to9 = x_train[train_indxes_5to9]
train_y_5to9 = y_train[train_indxes_5to9]-5


def get_random_indxs(y_train, group_value, count=10):
    train_indxs = np.arange(len(y_train), dtype=np.int32)
    group_indxs = train_indxs[y_train == group_value]
    indxs = np.random.choice(group_indxs,count)
    return indxs

def get_samples_of_each_group(y_train, count=400):
    rand_indxs = np.array([], dtype=np.int32)
    for group_val in np.unique(y_train):
        rand_indxs=np.r_[rand_indxs,get_random_indxs(y_train,group_val, count)]
        
#     np.random.shuffle(rand_indxs)
    return rand_indxs

rand_indxs = get_samples_of_each_group(train_y_5to9, count=500)
train_x_5to9 = train_x_5to9[rand_indxs]
train_y_5to9 = train_y_5to9[rand_indxs]


train_x_5to9, val_x_5to9, train_y_5to9, val_y_5to9 = \
    train_test_split(train_x_5to9, train_y_5to9, test_size=.66, stratify=train_y_5to9)

val_x_5to9, test_x_5to9, val_y_5to9, test_y_5to9 = \
    train_test_split(val_x_5to9,val_y_5to9, test_size=.6, stratify=val_y_5to9)

In [104]:
np.unique(val_y_5to9, return_counts=True)

(array([0, 1, 2, 3, 4], dtype=uint8), array([132, 132, 132, 132, 132]))

### Create DNN_Classifier_Transfer
which will transfer learning of DNN_Classifier from saved file

In [133]:
from imp import reload
import my_libs.dnn
reload(my_libs.dnn)
from my_libs.dnn import get_leaky_relu
import tensorflow as tf
from my_libs.dnn import DNN_Classifier, get_optimizer_op, get_validation_score, \
    get_softmax_xentropy_loss, get_batch
from tensorflow.train import AdamOptimizer, MomentumOptimizer

class DNN_Classifier_Transfer(DNN_Classifier):
    def __init__(self, checkpoint_name, use_hidden_layers=0):
        DNN_Classifier.__init__(self)
        self._checkpoint_name = checkpoint_name
        self._batch_norm_update_ops = None
        self.use_hidden_layers = use_hidden_layers
        
    
    def _initialize_session_and_graph(self):
        DNN_Classifier._restore_graph(self,self._checkpoint_name)
        
        self.restore_n_hidden_layers()
        
         
        trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 
                                                scope="output")
        self._batch_norm_update_ops = self._graph.get_collection(tf.GraphKeys.UPDATE_OPS)
        
        for i in range(self.n_hidden_layers, self.use_hidden_layers, -1):
            layer_scope = "hidden%d"%(i)
            layer_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 
                                                scope=layer_scope)
            trainable_variables = trainable_variables+layer_variables
            
        for i in range(self.n_hidden_layers, self.use_hidden_layers, -1):
            if len(self._batch_norm_update_ops)>0:
                batch_norm_scope = "batch_normalization_%d"%(i-1)
                batch_norm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 
                                                    scope= batch_norm_scope)
                trainable_variables = trainable_variables+batch_norm_variables
            
            
        print(trainable_variables)
        
        optimizer = AdamOptimizer(0.01, name="adam2")
        self._optimizer_op = optimizer.minimize(self._loss, var_list=trainable_variables)
        
        DNN_Classifier._restore_session(self, self._checkpoint_name)
        return self._graph
    
    def fit(self, x, y, val_x, val_y):
        self._initialize_session_and_graph()
        
        with self._session.as_default() as sess:
            for epoch in range(100):
                for batch_x, batch_y in get_batch(x,y, 10):
                    ops = [self._loss, self._optimizer_op]
                    if len(self._batch_norm_update_ops)>0:
                        opts = ops+self._batch_norm_update_ops
                        
                    res = sess.run(ops, 
                            feed_dict={
                                self._x: batch_x,
                                self._y: batch_y,
                                self._is_training: True
                            })
                
                tloss = res[0]
                score, vloss = sess.run([self._validation_score, self._loss], feed_dict={
                                self._x: val_x,
                                self._y: val_y,
                                self._is_training: False
                            })
                print("epoch: %d, train loss: %f, score: %f, loss: %f"%(epoch, tloss, score, vloss))
    
    def restore_n_hidden_layers(self):
        i=1
        tensors = []
        while True:
            try:       
                tensor = self._graph.get_tensor_by_name("DNN/hiden%d_out:0"%i)
                tensors.append(tensor)
                i+=1
            except:
                break
        self.n_hidden_layers = i-1
        
        

In [138]:
# np.random.seed(5)
# tf.random.set_random_seed(5)

transfer_classifier = DNN_Classifier_Transfer("Mnist-0to4-best1", 
                                              use_hidden_layers=0)
transfer_classifier.fit(train_x_5to9, train_y_5to9, val_x_5to9, val_y_5to9)


[<tf.Variable 'output/kernel:0' shape=(100, 5) dtype=float32_ref>, <tf.Variable 'output/bias:0' shape=(5,) dtype=float32_ref>, <tf.Variable 'hidden5/kernel:0' shape=(100, 100) dtype=float32_ref>, <tf.Variable 'hidden5/bias:0' shape=(100,) dtype=float32_ref>, <tf.Variable 'hidden4/kernel:0' shape=(100, 100) dtype=float32_ref>, <tf.Variable 'hidden4/bias:0' shape=(100,) dtype=float32_ref>, <tf.Variable 'hidden3/kernel:0' shape=(100, 100) dtype=float32_ref>, <tf.Variable 'hidden3/bias:0' shape=(100,) dtype=float32_ref>, <tf.Variable 'hidden2/kernel:0' shape=(100, 100) dtype=float32_ref>, <tf.Variable 'hidden2/bias:0' shape=(100,) dtype=float32_ref>, <tf.Variable 'hidden1/kernel:0' shape=(784, 100) dtype=float32_ref>, <tf.Variable 'hidden1/bias:0' shape=(100,) dtype=float32_ref>]
epoch: 0, train loss: 0.894499, score: 0.598485, loss: 1.125900
epoch: 1, train loss: 0.372319, score: 0.681818, loss: 2.012900
epoch: 2, train loss: 0.576928, score: 0.721212, loss: 0.885489
epoch: 3, train loss:

In [124]:
transfer_classifier = DNN_Classifier_Transfer("Mnist-0to4-best_batch_norm", 
                                              use_hidden_layers=4)
graph = transfer_classifier._initialize_session_and_graph()

var1= graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 
                                                scope="output")[1]
# tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="batch_normalization")
# graph.get_collection(scope="DNN/batch_normalization")

[<tf.Variable 'output/kernel:0' shape=(120, 5) dtype=float32_ref>, <tf.Variable 'output/bias:0' shape=(5,) dtype=float32_ref>, <tf.Variable 'hidden5/kernel:0' shape=(120, 120) dtype=float32_ref>, <tf.Variable 'hidden5/bias:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'batch_normalization_4/gamma:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'batch_normalization_4/beta:0' shape=(120,) dtype=float32_ref>]


<bound method RefVariable.value of <tf.Variable 'output/bias:0' shape=(5,) dtype=float32_ref>>

### Create DNN_Classifier_Frozen
- Same as DNN_Classifier_Transfer but it will pre calculate output of previous layer for all data

In [135]:
from imp import reload
import my_libs
reload(my_libs.dnn)
from my_libs.dnn import DNN_Classifier

class DNN_Classifier_Frozen(DNN_Classifier_Transfer):
        
    def _initialize_session_and_graph(self):
        DNN_Classifier_Transfer._initialize_session_and_graph(self)
        frozen_output_name = "DNN/hiden%d_out:0"%(self.use_hidden_layers)
        self._frozen_out = self._graph.get_tensor_by_name(frozen_output_name)
        ## replace input placeholder _x with output of hidden layer _frozen_out
        self._old_x = self._x
        self._x = self._frozen_out
    
    
    def fit(self, x, y, val_x, val_y):
        self._initialize_session_and_graph()
        with self._session.as_default() as sess:
            self._x_frozen_out = self._frozen_out.eval(session=sess,feed_dict={self._old_x:x})
            self._val_x_frozen_out = self._frozen_out.eval(feed_dict={self._old_x:val_x})
            
        return DNN_Classifier_Transfer.fit(self, self._x_frozen_out, y, self._val_x_frozen_out, val_y)
    

In [137]:
# np.random.seed(0)
# tf.random.set_random_seed(0)

frozen_classifier = DNN_Classifier_Frozen("Mnist-0to4-best_batch_norm", use_hidden_layers=3)
frozen_classifier.fit(train_x_5to9, train_y_5to9, val_x_5to9, val_y_5to9)

[<tf.Variable 'output/kernel:0' shape=(120, 5) dtype=float32_ref>, <tf.Variable 'output/bias:0' shape=(5,) dtype=float32_ref>, <tf.Variable 'hidden5/kernel:0' shape=(120, 120) dtype=float32_ref>, <tf.Variable 'hidden5/bias:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'hidden4/kernel:0' shape=(120, 120) dtype=float32_ref>, <tf.Variable 'hidden4/bias:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'batch_normalization_4/gamma:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'batch_normalization_4/beta:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'batch_normalization_3/gamma:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'batch_normalization_3/beta:0' shape=(120,) dtype=float32_ref>]
[<tf.Variable 'output/kernel:0' shape=(120, 5) dtype=float32_ref>, <tf.Variable 'output/bias:0' shape=(5,) dtype=float32_ref>, <tf.Variable 'hidden5/kernel:0' shape=(120, 120) dtype=float32_ref>, <tf.Variable 'hidden5/bias:0' shape=(120,) dtype=float32_ref>, <tf.Variable 'hidden4/kernel:0' shape=(1