# DeepLearning For MNIST DataSet

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('/tmp/data')

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


## Deep Neural Network 5 hidden layers with 100 units each

In [10]:
he_init = tf.contrib.layers.variance_scaling_initializer()

def create_dnn(inputs, n_hidden_layers = 5, n_neurons = 100,
              activation = tf.nn.elu, inizializer = he_init, name = None):
    with tf.variable_scope(name, "dnn"):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs, n_neurons, activation=activation,
                                     kernel_initializer=he_init,
                                     name="hidden%d" % (layer + 1))
        return inputs

In [15]:
n_inputs = 28 * 28 # MNIST
n_outputs = 10

tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

dnn_outputs = create_dnn(X)

logits = tf.layers.dense(dnn_outputs, n_outputs, name = 'outputs')
Y_proba = tf.nn.softmax(logits, name = 'Y_proba')

In [16]:
learning_rate = 0.01

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [17]:
X_train1 = mnist.train.images
y_train1 = mnist.train.labels
X_valid1 = mnist.validation.images
y_valid1 = mnist.validation.labels
X_test1 = mnist.test.images
y_test1 = mnist.test.labels

In [18]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.424886	Best loss: 0.424886	Accuracy: 89.74%
1	Validation loss: 0.269277	Best loss: 0.269277	Accuracy: 94.16%
2	Validation loss: 0.361843	Best loss: 0.269277	Accuracy: 92.16%
3	Validation loss: 0.339725	Best loss: 0.269277	Accuracy: 91.88%
4	Validation loss: 1.010029	Best loss: 0.269277	Accuracy: 61.90%
5	Validation loss: 0.576670	Best loss: 0.269277	Accuracy: 87.94%
6	Validation loss: 1.296640	Best loss: 0.269277	Accuracy: 55.16%
7	Validation loss: 0.271534	Best loss: 0.269277	Accuracy: 94.22%
8	Validation loss: 0.886465	Best loss: 0.269277	Accuracy: 86.60%
9	Validation loss: 1.224869	Best loss: 0.269277	Accuracy: 57.62%
10	Validation loss: 0.535940	Best loss: 0.269277	Accuracy: 86.40%
11	Validation loss: 2.435072	Best loss: 0.269277	Accuracy: 9.90%
12	Validation loss: 2.450409	Best loss: 0.269277	Accuracy: 9.76%
13	Validation loss: 2.447707	Best loss: 0.269277	Accuracy: 11.00%
14	Validation loss: 2.437099	Best loss: 0.269277	Accuracy: 9.90%
15	Validation loss: 2.4

In [19]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [20]:
dnn_clf = DNNClassifier(random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.676683	Best loss: 0.676683	Accuracy: 80.26%
1	Validation loss: 0.368517	Best loss: 0.368517	Accuracy: 92.32%
2	Validation loss: 0.490324	Best loss: 0.368517	Accuracy: 84.44%
3	Validation loss: 0.373531	Best loss: 0.368517	Accuracy: 91.94%
4	Validation loss: 0.686687	Best loss: 0.368517	Accuracy: 82.66%
5	Validation loss: 0.610952	Best loss: 0.368517	Accuracy: 86.42%
6	Validation loss: 0.484424	Best loss: 0.368517	Accuracy: 91.58%
7	Validation loss: 0.394269	Best loss: 0.368517	Accuracy: 92.84%
8	Validation loss: 0.354883	Best loss: 0.354883	Accuracy: 94.42%
9	Validation loss: 2.151319	Best loss: 0.354883	Accuracy: 68.28%
10	Validation loss: 0.348569	Best loss: 0.348569	Accuracy: 94.48%
11	Validation loss: 0.298487	Best loss: 0.298487	Accuracy: 95.26%
12	Validation loss: 0.923262	Best loss: 0.298487	Accuracy: 66.76%
13	Validation loss: 0.855145	Best loss: 0.298487	Accuracy: 69.52%
14	Validation loss: 0.961327	Best loss: 0.298487	Accuracy: 77.04%
15	Validation loss: 

DNNClassifier(activation=<function elu at 0x10ef23158>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x12429c2f0>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [21]:
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.94989999999999997

In [22]:
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                random_state=42, verbose=2)
rnd_search.fit(X_train1, y_train1)



Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=10, learning_rate=0.05, batch_size=100, activation=<function elu at 0x10ef23158> 
0	Validation loss: 0.850562	Best loss: 0.850562	Accuracy: 71.36%
1	Validation loss: 0.819912	Best loss: 0.819912	Accuracy: 71.32%
2	Validation loss: 1.004201	Best loss: 0.819912	Accuracy: 70.36%
3	Validation loss: 0.671365	Best loss: 0.671365	Accuracy: 80.86%
4	Validation loss: 0.540172	Best loss: 0.540172	Accuracy: 85.68%
5	Validation loss: 0.576899	Best loss: 0.540172	Accuracy: 84.90%
6	Validation loss: 0.645637	Best loss: 0.540172	Accuracy: 83.50%
7	Validation loss: 0.645217	Best loss: 0.540172	Accuracy: 83.78%
8	Validation loss: 0.610129	Best loss: 0.540172	Accuracy: 84.56%
9	Validation loss: 0.516322	Best loss: 0.516322	Accuracy: 84.90%
10	Validation loss: 0.748248	Best loss: 0.516322	Accuracy: 77.38%
11	Validation loss: 0.732158	Best loss: 0.516322	Accuracy: 81.72%
12	Validation loss: 0.763431	Best loss: 0.516322	Accuracy: 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   16.3s remaining:    0.0s


0	Validation loss: 0.919469	Best loss: 0.919469	Accuracy: 69.34%
1	Validation loss: 0.890178	Best loss: 0.890178	Accuracy: 70.62%
2	Validation loss: 0.798690	Best loss: 0.798690	Accuracy: 76.12%
3	Validation loss: 0.786786	Best loss: 0.786786	Accuracy: 78.18%
4	Validation loss: 0.760943	Best loss: 0.760943	Accuracy: 79.78%
5	Validation loss: 0.831693	Best loss: 0.760943	Accuracy: 78.28%
6	Validation loss: 1.081632	Best loss: 0.760943	Accuracy: 61.92%
7	Validation loss: 1.249531	Best loss: 0.760943	Accuracy: 56.54%
8	Validation loss: 1.408862	Best loss: 0.760943	Accuracy: 45.28%
9	Validation loss: 1.110846	Best loss: 0.760943	Accuracy: 62.16%
10	Validation loss: 1.130414	Best loss: 0.760943	Accuracy: 57.96%
11	Validation loss: 1.124216	Best loss: 0.760943	Accuracy: 60.00%
12	Validation loss: 1.134179	Best loss: 0.760943	Accuracy: 57.08%
13	Validation loss: 1.053920	Best loss: 0.760943	Accuracy: 60.68%
14	Validation loss: 1.164926	Best loss: 0.760943	Accuracy: 55.44%
15	Validation loss: 

KeyboardInterrupt: 

In [None]:
rnd_search.best_params_

In [None]:
y_pred = rnd_search.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:
dnn_clf = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                        n_neurons=140, random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

In [None]:
y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:
dnn_clf_bn = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                           n_neurons=90, random_state=42,
                           batch_norm_momentum=0.95)
dnn_clf_bn.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

In [None]:
y_pred = dnn_clf_bn.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "batch_norm_momentum": [0.9, 0.95, 0.98, 0.99, 0.999],
}

rnd_search_bn = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                   fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                   random_state=42, verbose=2)
rnd_search_bn.fit(X_train1, y_train1)

In [None]:
rnd_search_bn.best_params_

In [None]:
y_pred = rnd_search_bn.predict(X_test1)
accuracy_score(y_test1, y_pred)