# Exercise 8

## 8a: five hidden layers of 100 neurons each, He initialization, and the ELU activation function.

In [10]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import fully_connected
import numpy as np
tf.reset_default_graph()

# load data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]


he_init = tf.variance_scaling_initializer()

def dnn(inputs, n_hidden_layers=5, n_neurons=100, name=None,
        activation=tf.nn.elu, initializer=he_init):
    with tf.variable_scope(name, "dnn"):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs, n_neurons, activation=activation,
                                     kernel_initializer=initializer,
                                     name="hidden%d" % (layer + 1))
        return inputs

n_inputs = 28 * 28 # MNIST
n_outputs = 5


X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

dnn_outputs = dnn(X)

logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
Y_proba = tf.nn.softmax(logits, name="Y_proba")

## 8b: Adam and early stopping

In [23]:
learning_rate = 0.01

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

# digits 5-9

In [39]:
# 5-9
X_train1 = X_train[y_train >= 5]
y_train1 = y_train[y_train >= 5]-5
X_valid1 = X_valid[y_valid >= 5]
y_valid1 = y_valid[y_valid >= 5]-5
X_test1 = X_test[y_test >= 5]
y_test1 = y_test[y_test >= 5]-5

In [43]:
X_valid1.shape

(2442, 784)

In [26]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_5_to_9.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_5_to_9.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.152784	Best loss: 0.152784	Accuracy: 96.11%
1	Validation loss: 0.350911	Best loss: 0.152784	Accuracy: 92.34%
2	Validation loss: 0.241513	Best loss: 0.152784	Accuracy: 93.94%
3	Validation loss: 0.177708	Best loss: 0.152784	Accuracy: 95.90%
4	Validation loss: 0.220742	Best loss: 0.152784	Accuracy: 93.28%
5	Validation loss: 0.302672	Best loss: 0.152784	Accuracy: 93.65%
6	Validation loss: 0.479976	Best loss: 0.152784	Accuracy: 73.18%
7	Validation loss: 0.358498	Best loss: 0.152784	Accuracy: 90.50%
8	Validation loss: 0.196288	Best loss: 0.152784	Accuracy: 96.15%
9	Validation loss: 0.301731	Best loss: 0.152784	Accuracy: 93.24%
10	Validation loss: 0.171434	Best loss: 0.152784	Accuracy: 96.72%
11	Validation loss: 0.171523	Best loss: 0.152784	Accuracy: 96.56%
12	Validation loss: 0.586506	Best loss: 0.152784	Accuracy: 76.58%
13	Validation loss: 102.997467	Best loss: 0.152784	Accuracy: 59.09%
14	Validation loss: 1.738413	Best loss: 0.152784	Accuracy: 47.17%
15	Validation loss

<span style='color:blue'>**Baseline model without gets 95.31%**</span>

## c: hyperparameters, cross-validation and precision

In [27]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [28]:
dnn_clf = DNNClassifier(random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

0	Validation loss: 0.120634	Best loss: 0.120634	Accuracy: 96.76%
1	Validation loss: 0.189807	Best loss: 0.120634	Accuracy: 95.09%
2	Validation loss: 0.153287	Best loss: 0.120634	Accuracy: 96.23%
3	Validation loss: 0.167355	Best loss: 0.120634	Accuracy: 96.19%
4	Validation loss: 0.157093	Best loss: 0.120634	Accuracy: 96.11%
5	Validation loss: 0.141889	Best loss: 0.120634	Accuracy: 96.97%
6	Validation loss: 0.119795	Best loss: 0.119795	Accuracy: 97.50%
7	Validation loss: 0.160662	Best loss: 0.119795	Accuracy: 97.13%
8	Validation loss: 83.407814	Best loss: 0.119795	Accuracy: 95.66%
9	Validation loss: 0.434938	Best loss: 0.119795	Accuracy: 92.42%
10	Validation loss: 0.325412	Best loss: 0.119795	Accuracy: 95.33%
11	Validation loss: 0.279955	Best loss: 0.119795	Accuracy: 97.09%
12	Validation loss: 0.353423	Best loss: 0.119795	Accuracy: 95.25%
13	Validation loss: 0.284067	Best loss: 0.119795	Accuracy: 96.03%
14	Validation loss: 0.489670	Best loss: 0.119795	Accuracy: 75.88%
15	Validation loss:

0.974079407529315

<span style='color:blue'>**We can try to improve the above accuracy by searching for the best parameters.**</span>

In [29]:
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

param_distribs = {
    "n_neurons": [90, 100, 120],
    "batch_size": [50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.1)],
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                random_state=42, verbose=2)
rnd_search.fit(X_train1, y_train1)



Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=90, learning_rate=0.02, batch_size=50, activation=<function elu at 0x000001DBB876EE18> 
0	Validation loss: 0.226634	Best loss: 0.226634	Accuracy: 95.05%
1	Validation loss: 0.199168	Best loss: 0.199168	Accuracy: 93.98%
2	Validation loss: 0.396789	Best loss: 0.199168	Accuracy: 93.90%
3	Validation loss: 1.813763	Best loss: 0.199168	Accuracy: 17.77%
4	Validation loss: 1.645970	Best loss: 0.199168	Accuracy: 20.52%
5	Validation loss: 1.693021	Best loss: 0.199168	Accuracy: 20.27%
6	Validation loss: 1.710236	Best loss: 0.199168	Accuracy: 20.27%
7	Validation loss: 1.695486	Best loss: 0.199168	Accuracy: 20.52%
8	Validation loss: 1.647323	Best loss: 0.199168	Accuracy: 20.52%
9	Validation loss: 1.626712	Best loss: 0.199168	Accuracy: 22.52%
10	Validation loss: 1.656524	Best loss: 0.199168	Accuracy: 22.52%
11	Validation loss: 1.665243	Best loss: 0.199168	Accuracy: 17.77%
12	Validation loss: 1.656647	Best loss: 0.199168	Accu

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   45.6s remaining:    0.0s


0	Validation loss: 0.165186	Best loss: 0.165186	Accuracy: 95.13%
1	Validation loss: 0.174587	Best loss: 0.165186	Accuracy: 95.45%
2	Validation loss: 0.195295	Best loss: 0.165186	Accuracy: 93.94%
3	Validation loss: 2.347357	Best loss: 0.165186	Accuracy: 28.09%
4	Validation loss: 1.665614	Best loss: 0.165186	Accuracy: 18.92%
5	Validation loss: 1.627061	Best loss: 0.165186	Accuracy: 22.52%
6	Validation loss: 1.662882	Best loss: 0.165186	Accuracy: 20.27%
7	Validation loss: 1.617609	Best loss: 0.165186	Accuracy: 20.27%
8	Validation loss: 1.615494	Best loss: 0.165186	Accuracy: 22.52%
9	Validation loss: 1.637407	Best loss: 0.165186	Accuracy: 22.52%
10	Validation loss: 1.640272	Best loss: 0.165186	Accuracy: 20.27%
11	Validation loss: 1.646303	Best loss: 0.165186	Accuracy: 20.52%
12	Validation loss: 1.640249	Best loss: 0.165186	Accuracy: 17.77%
13	Validation loss: 1.822748	Best loss: 0.165186	Accuracy: 18.92%
14	Validation loss: 1.664664	Best loss: 0.165186	Accuracy: 22.52%
15	Validation loss: 

22	Validation loss: 0.083112	Best loss: 0.080894	Accuracy: 98.12%
23	Validation loss: 0.130043	Best loss: 0.080894	Accuracy: 97.83%
24	Validation loss: 0.090617	Best loss: 0.080894	Accuracy: 97.95%
25	Validation loss: 0.109532	Best loss: 0.080894	Accuracy: 97.62%
26	Validation loss: 0.128644	Best loss: 0.080894	Accuracy: 97.71%
27	Validation loss: 0.103468	Best loss: 0.080894	Accuracy: 97.17%
28	Validation loss: 0.208867	Best loss: 0.080894	Accuracy: 96.07%
29	Validation loss: 0.296316	Best loss: 0.080894	Accuracy: 95.17%
30	Validation loss: 0.217346	Best loss: 0.080894	Accuracy: 95.13%
31	Validation loss: 0.164624	Best loss: 0.080894	Accuracy: 96.93%
32	Validation loss: 0.115953	Best loss: 0.080894	Accuracy: 97.42%
Early stopping!
[CV]  n_neurons=90, learning_rate=0.01, batch_size=50, activation=<function relu at 0x000001DBB87851E0>, total=  57.5s
[CV] n_neurons=90, learning_rate=0.01, batch_size=50, activation=<function relu at 0x000001DBB87851E0> 
0	Validation loss: 0.135306	Best lo

6	Validation loss: 0.133276	Best loss: 0.120141	Accuracy: 96.31%
7	Validation loss: 0.109154	Best loss: 0.109154	Accuracy: 96.93%
8	Validation loss: 0.119809	Best loss: 0.109154	Accuracy: 96.48%
9	Validation loss: 0.140122	Best loss: 0.109154	Accuracy: 96.44%
10	Validation loss: 0.124634	Best loss: 0.109154	Accuracy: 96.89%
11	Validation loss: 0.134850	Best loss: 0.109154	Accuracy: 96.40%
12	Validation loss: 0.113006	Best loss: 0.109154	Accuracy: 96.93%
13	Validation loss: 0.161580	Best loss: 0.109154	Accuracy: 97.09%
14	Validation loss: 0.131128	Best loss: 0.109154	Accuracy: 96.76%
15	Validation loss: 0.117075	Best loss: 0.109154	Accuracy: 97.46%
16	Validation loss: 0.146278	Best loss: 0.109154	Accuracy: 96.56%
17	Validation loss: 0.137642	Best loss: 0.109154	Accuracy: 96.81%
18	Validation loss: 0.123444	Best loss: 0.109154	Accuracy: 97.46%
19	Validation loss: 0.151422	Best loss: 0.109154	Accuracy: 96.85%
20	Validation loss: 0.125321	Best loss: 0.109154	Accuracy: 97.26%
21	Validation 

17	Validation loss: 0.121101	Best loss: 0.081400	Accuracy: 97.54%
18	Validation loss: 0.113207	Best loss: 0.081400	Accuracy: 97.54%
19	Validation loss: 0.121442	Best loss: 0.081400	Accuracy: 97.50%
20	Validation loss: 0.096088	Best loss: 0.081400	Accuracy: 97.79%
21	Validation loss: 0.122996	Best loss: 0.081400	Accuracy: 97.54%
22	Validation loss: 0.120047	Best loss: 0.081400	Accuracy: 97.67%
23	Validation loss: 0.111827	Best loss: 0.081400	Accuracy: 97.87%
24	Validation loss: 0.093708	Best loss: 0.081400	Accuracy: 97.75%
25	Validation loss: 0.096818	Best loss: 0.081400	Accuracy: 97.87%
26	Validation loss: 0.097971	Best loss: 0.081400	Accuracy: 98.12%
27	Validation loss: 0.099962	Best loss: 0.081400	Accuracy: 98.16%
28	Validation loss: 0.122451	Best loss: 0.081400	Accuracy: 97.99%
29	Validation loss: 0.151017	Best loss: 0.081400	Accuracy: 97.99%
Early stopping!
[CV]  n_neurons=90, learning_rate=0.01, batch_size=500, activation=<function relu at 0x000001DBB87851E0>, total=  13.0s
[CV] n

17	Validation loss: 0.113469	Best loss: 0.086222	Accuracy: 97.38%
18	Validation loss: 0.106680	Best loss: 0.086222	Accuracy: 97.38%
19	Validation loss: 0.136331	Best loss: 0.086222	Accuracy: 97.13%
20	Validation loss: 0.251768	Best loss: 0.086222	Accuracy: 96.72%
21	Validation loss: 0.204762	Best loss: 0.086222	Accuracy: 96.89%
22	Validation loss: 0.111354	Best loss: 0.086222	Accuracy: 97.67%
23	Validation loss: 0.142084	Best loss: 0.086222	Accuracy: 97.62%
24	Validation loss: 0.157121	Best loss: 0.086222	Accuracy: 97.42%
25	Validation loss: 0.170754	Best loss: 0.086222	Accuracy: 96.36%
26	Validation loss: 0.144326	Best loss: 0.086222	Accuracy: 97.34%
27	Validation loss: 0.151742	Best loss: 0.086222	Accuracy: 97.26%
28	Validation loss: 0.137527	Best loss: 0.086222	Accuracy: 97.38%
29	Validation loss: 0.161820	Best loss: 0.086222	Accuracy: 97.42%
Early stopping!
[CV]  n_neurons=100, learning_rate=0.01, batch_size=50, activation=<function elu at 0x000001DBB876EE18>, total=  44.5s
[CV] n_

5	Validation loss: 0.107025	Best loss: 0.077190	Accuracy: 97.91%
6	Validation loss: 0.110078	Best loss: 0.077190	Accuracy: 97.67%
7	Validation loss: 0.162283	Best loss: 0.077190	Accuracy: 97.87%
8	Validation loss: 0.132679	Best loss: 0.077190	Accuracy: 97.26%
9	Validation loss: 0.124917	Best loss: 0.077190	Accuracy: 97.42%
10	Validation loss: 0.105515	Best loss: 0.077190	Accuracy: 98.20%
11	Validation loss: 0.102433	Best loss: 0.077190	Accuracy: 97.95%
12	Validation loss: 0.137512	Best loss: 0.077190	Accuracy: 98.36%
13	Validation loss: 0.125867	Best loss: 0.077190	Accuracy: 98.36%
14	Validation loss: 0.109442	Best loss: 0.077190	Accuracy: 97.91%
15	Validation loss: 0.175265	Best loss: 0.077190	Accuracy: 97.62%
16	Validation loss: 0.169557	Best loss: 0.077190	Accuracy: 98.03%
17	Validation loss: 0.139051	Best loss: 0.077190	Accuracy: 97.87%
18	Validation loss: 0.111145	Best loss: 0.077190	Accuracy: 97.83%
19	Validation loss: 0.160351	Best loss: 0.077190	Accuracy: 97.67%
20	Validation l

3	Validation loss: 0.205405	Best loss: 0.130986	Accuracy: 94.19%
4	Validation loss: 0.121474	Best loss: 0.121474	Accuracy: 96.76%
5	Validation loss: 0.169126	Best loss: 0.121474	Accuracy: 95.54%
6	Validation loss: 0.166001	Best loss: 0.121474	Accuracy: 95.86%
7	Validation loss: 0.149455	Best loss: 0.121474	Accuracy: 96.11%
8	Validation loss: 0.137856	Best loss: 0.121474	Accuracy: 96.93%
9	Validation loss: 0.118989	Best loss: 0.118989	Accuracy: 96.97%
10	Validation loss: 0.127583	Best loss: 0.118989	Accuracy: 96.76%
11	Validation loss: 0.169140	Best loss: 0.118989	Accuracy: 95.70%
12	Validation loss: 0.112960	Best loss: 0.112960	Accuracy: 97.09%
13	Validation loss: 1.371163	Best loss: 0.112960	Accuracy: 64.33%
14	Validation loss: 0.298617	Best loss: 0.112960	Accuracy: 92.51%
15	Validation loss: 0.245461	Best loss: 0.112960	Accuracy: 93.69%
16	Validation loss: 0.204105	Best loss: 0.112960	Accuracy: 94.80%
17	Validation loss: 0.276588	Best loss: 0.112960	Accuracy: 90.91%
18	Validation los

0	Validation loss: 0.158816	Best loss: 0.158816	Accuracy: 95.17%
1	Validation loss: 0.154211	Best loss: 0.154211	Accuracy: 95.70%
2	Validation loss: 0.151463	Best loss: 0.151463	Accuracy: 96.15%
3	Validation loss: 0.378364	Best loss: 0.151463	Accuracy: 92.47%
4	Validation loss: 0.232511	Best loss: 0.151463	Accuracy: 95.17%
5	Validation loss: 0.150908	Best loss: 0.150908	Accuracy: 96.27%
6	Validation loss: 0.167829	Best loss: 0.150908	Accuracy: 96.19%
7	Validation loss: 0.118572	Best loss: 0.118572	Accuracy: 97.01%
8	Validation loss: 0.113762	Best loss: 0.113762	Accuracy: 97.13%
9	Validation loss: 0.133392	Best loss: 0.113762	Accuracy: 96.27%
10	Validation loss: 0.137105	Best loss: 0.113762	Accuracy: 96.64%
11	Validation loss: 0.108201	Best loss: 0.108201	Accuracy: 97.01%
12	Validation loss: 0.123786	Best loss: 0.108201	Accuracy: 96.85%
13	Validation loss: 0.127593	Best loss: 0.108201	Accuracy: 96.27%
14	Validation loss: 0.137823	Best loss: 0.108201	Accuracy: 96.64%
15	Validation loss: 

16	Validation loss: 0.098137	Best loss: 0.086371	Accuracy: 97.79%
17	Validation loss: 0.110486	Best loss: 0.086371	Accuracy: 97.50%
18	Validation loss: 0.106624	Best loss: 0.086371	Accuracy: 97.75%
19	Validation loss: 0.115237	Best loss: 0.086371	Accuracy: 97.62%
20	Validation loss: 0.110602	Best loss: 0.086371	Accuracy: 97.71%
21	Validation loss: 0.117092	Best loss: 0.086371	Accuracy: 97.83%
22	Validation loss: 0.115032	Best loss: 0.086371	Accuracy: 97.58%
23	Validation loss: 0.103664	Best loss: 0.086371	Accuracy: 97.95%
24	Validation loss: 0.108991	Best loss: 0.086371	Accuracy: 97.99%
25	Validation loss: 0.115857	Best loss: 0.086371	Accuracy: 97.71%
26	Validation loss: 0.124864	Best loss: 0.086371	Accuracy: 97.91%
27	Validation loss: 0.127753	Best loss: 0.086371	Accuracy: 97.75%
28	Validation loss: 0.109537	Best loss: 0.086371	Accuracy: 97.79%
29	Validation loss: 0.138201	Best loss: 0.086371	Accuracy: 98.20%
30	Validation loss: 0.128184	Best loss: 0.086371	Accuracy: 97.50%
31	Validat

5	Validation loss: 1.685885	Best loss: 0.201429	Accuracy: 18.92%
6	Validation loss: 1.829875	Best loss: 0.201429	Accuracy: 18.92%
7	Validation loss: 1.653320	Best loss: 0.201429	Accuracy: 18.92%
8	Validation loss: 1.641654	Best loss: 0.201429	Accuracy: 22.52%
9	Validation loss: 2.109104	Best loss: 0.201429	Accuracy: 17.77%
10	Validation loss: 1.922982	Best loss: 0.201429	Accuracy: 22.52%
11	Validation loss: 1.728909	Best loss: 0.201429	Accuracy: 18.92%
12	Validation loss: 1.800233	Best loss: 0.201429	Accuracy: 22.52%
13	Validation loss: 1.625341	Best loss: 0.201429	Accuracy: 18.92%
14	Validation loss: 1.818354	Best loss: 0.201429	Accuracy: 18.92%
15	Validation loss: 1.851052	Best loss: 0.201429	Accuracy: 22.52%
16	Validation loss: 1.718351	Best loss: 0.201429	Accuracy: 18.92%
17	Validation loss: 1.728399	Best loss: 0.201429	Accuracy: 22.52%
18	Validation loss: 1.959564	Best loss: 0.201429	Accuracy: 22.52%
19	Validation loss: 1.808125	Best loss: 0.201429	Accuracy: 22.52%
20	Validation l

2	Validation loss: 0.415670	Best loss: 0.415670	Accuracy: 92.42%
3	Validation loss: 0.604379	Best loss: 0.415670	Accuracy: 88.53%
4	Validation loss: 1.667708	Best loss: 0.415670	Accuracy: 20.52%
5	Validation loss: 1.723541	Best loss: 0.415670	Accuracy: 20.27%
6	Validation loss: 1.654118	Best loss: 0.415670	Accuracy: 20.52%
7	Validation loss: 1.914592	Best loss: 0.415670	Accuracy: 20.27%
8	Validation loss: 1.639303	Best loss: 0.415670	Accuracy: 20.52%
9	Validation loss: 1.832917	Best loss: 0.415670	Accuracy: 18.92%
10	Validation loss: 2.051895	Best loss: 0.415670	Accuracy: 22.52%
11	Validation loss: 1.622982	Best loss: 0.415670	Accuracy: 18.92%
12	Validation loss: 1.864169	Best loss: 0.415670	Accuracy: 22.52%
13	Validation loss: 1.707094	Best loss: 0.415670	Accuracy: 18.92%
14	Validation loss: 1.680854	Best loss: 0.415670	Accuracy: 22.52%
15	Validation loss: 1.641315	Best loss: 0.415670	Accuracy: 22.52%
16	Validation loss: 1.862597	Best loss: 0.415670	Accuracy: 18.92%
17	Validation loss

21	Validation loss: 0.365682	Best loss: 0.141561	Accuracy: 94.80%
22	Validation loss: 0.289160	Best loss: 0.141561	Accuracy: 96.03%
Early stopping!
[CV]  n_neurons=120, learning_rate=0.02, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18>, total=  25.1s
[CV] n_neurons=120, learning_rate=0.02, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18> 
0	Validation loss: 0.167711	Best loss: 0.167711	Accuracy: 94.96%
1	Validation loss: 0.128051	Best loss: 0.128051	Accuracy: 96.19%
2	Validation loss: 0.111962	Best loss: 0.111962	Accuracy: 96.85%
3	Validation loss: 86.053055	Best loss: 0.111962	Accuracy: 30.30%
4	Validation loss: 13.840474	Best loss: 0.111962	Accuracy: 79.48%
5	Validation loss: 12.072459	Best loss: 0.111962	Accuracy: 79.73%
6	Validation loss: 7.410387	Best loss: 0.111962	Accuracy: 84.85%
7	Validation loss: 3.474402	Best loss: 0.111962	Accuracy: 88.04%
8	Validation loss: 2.149

3	Validation loss: 0.100764	Best loss: 0.100764	Accuracy: 96.93%
4	Validation loss: 0.084878	Best loss: 0.084878	Accuracy: 97.13%
5	Validation loss: 0.097392	Best loss: 0.084878	Accuracy: 97.26%
6	Validation loss: 0.088850	Best loss: 0.084878	Accuracy: 97.46%
7	Validation loss: 0.078294	Best loss: 0.078294	Accuracy: 97.75%
8	Validation loss: 0.104640	Best loss: 0.078294	Accuracy: 97.87%
9	Validation loss: 0.090695	Best loss: 0.078294	Accuracy: 98.03%
10	Validation loss: 0.091148	Best loss: 0.078294	Accuracy: 97.91%
11	Validation loss: 0.114163	Best loss: 0.078294	Accuracy: 97.30%
12	Validation loss: 0.084644	Best loss: 0.078294	Accuracy: 97.71%
13	Validation loss: 0.098565	Best loss: 0.078294	Accuracy: 97.71%
14	Validation loss: 0.078991	Best loss: 0.078294	Accuracy: 98.16%
15	Validation loss: 0.096756	Best loss: 0.078294	Accuracy: 98.03%
16	Validation loss: 0.126532	Best loss: 0.078294	Accuracy: 97.83%
17	Validation loss: 0.117825	Best loss: 0.078294	Accuracy: 97.91%
18	Validation los

10	Validation loss: 0.102865	Best loss: 0.074017	Accuracy: 97.79%
11	Validation loss: 0.098404	Best loss: 0.074017	Accuracy: 97.71%
12	Validation loss: 0.093788	Best loss: 0.074017	Accuracy: 97.50%
13	Validation loss: 0.085599	Best loss: 0.074017	Accuracy: 98.36%
14	Validation loss: 0.105307	Best loss: 0.074017	Accuracy: 97.99%
15	Validation loss: 0.111380	Best loss: 0.074017	Accuracy: 97.67%
16	Validation loss: 0.106102	Best loss: 0.074017	Accuracy: 98.08%
17	Validation loss: 0.113232	Best loss: 0.074017	Accuracy: 98.24%
18	Validation loss: 0.113851	Best loss: 0.074017	Accuracy: 97.79%
19	Validation loss: 0.103898	Best loss: 0.074017	Accuracy: 97.95%
20	Validation loss: 0.113696	Best loss: 0.074017	Accuracy: 97.99%
21	Validation loss: 0.126040	Best loss: 0.074017	Accuracy: 98.08%
22	Validation loss: 0.113638	Best loss: 0.074017	Accuracy: 97.83%
23	Validation loss: 0.115454	Best loss: 0.074017	Accuracy: 98.28%
24	Validation loss: 0.125804	Best loss: 0.074017	Accuracy: 98.03%
25	Validat

11	Validation loss: 0.143765	Best loss: 0.087566	Accuracy: 96.11%
12	Validation loss: 0.214297	Best loss: 0.087566	Accuracy: 94.96%
13	Validation loss: 0.165420	Best loss: 0.087566	Accuracy: 96.31%
14	Validation loss: 0.168653	Best loss: 0.087566	Accuracy: 95.90%
15	Validation loss: 0.146871	Best loss: 0.087566	Accuracy: 96.36%
16	Validation loss: 0.158357	Best loss: 0.087566	Accuracy: 96.40%
17	Validation loss: 0.169748	Best loss: 0.087566	Accuracy: 96.85%
18	Validation loss: 0.187397	Best loss: 0.087566	Accuracy: 96.64%
19	Validation loss: 0.129346	Best loss: 0.087566	Accuracy: 96.97%
20	Validation loss: 0.333776	Best loss: 0.087566	Accuracy: 94.39%
21	Validation loss: 0.173942	Best loss: 0.087566	Accuracy: 96.15%
22	Validation loss: 0.220229	Best loss: 0.087566	Accuracy: 96.72%
23	Validation loss: 65.756813	Best loss: 0.087566	Accuracy: 91.03%
24	Validation loss: 2.335477	Best loss: 0.087566	Accuracy: 96.68%
25	Validation loss: 0.769382	Best loss: 0.087566	Accuracy: 96.48%
26	Valida

24	Validation loss: 1.625813	Best loss: 0.080906	Accuracy: 22.52%
25	Validation loss: 1.735300	Best loss: 0.080906	Accuracy: 20.27%
26	Validation loss: 1.645727	Best loss: 0.080906	Accuracy: 22.52%
27	Validation loss: 1.654045	Best loss: 0.080906	Accuracy: 22.52%
28	Validation loss: 1.766256	Best loss: 0.080906	Accuracy: 20.52%
29	Validation loss: 1.679647	Best loss: 0.080906	Accuracy: 18.92%
30	Validation loss: 1.624645	Best loss: 0.080906	Accuracy: 18.92%
Early stopping!
[CV]  n_neurons=100, learning_rate=0.02, batch_size=100, activation=<function elu at 0x000001DBB876EE18>, total=  39.4s
[CV] n_neurons=100, learning_rate=0.02, batch_size=100, activation=<function elu at 0x000001DBB876EE18> 
0	Validation loss: 0.161024	Best loss: 0.161024	Accuracy: 95.66%
1	Validation loss: 0.129075	Best loss: 0.129075	Accuracy: 95.86%
2	Validation loss: 0.166432	Best loss: 0.129075	Accuracy: 95.78%
3	Validation loss: 0.197422	Best loss: 0.129075	Accuracy: 95.29%
4	Validation loss: 0.113637	Best loss

19	Validation loss: 0.086926	Best loss: 0.067731	Accuracy: 97.67%
20	Validation loss: 0.090660	Best loss: 0.067731	Accuracy: 98.03%
21	Validation loss: 0.086503	Best loss: 0.067731	Accuracy: 97.95%
22	Validation loss: 0.085310	Best loss: 0.067731	Accuracy: 98.12%
23	Validation loss: 0.130133	Best loss: 0.067731	Accuracy: 97.58%
24	Validation loss: 0.080495	Best loss: 0.067731	Accuracy: 98.08%
25	Validation loss: 0.101594	Best loss: 0.067731	Accuracy: 98.08%
26	Validation loss: 0.067588	Best loss: 0.067588	Accuracy: 98.40%
27	Validation loss: 0.098001	Best loss: 0.067588	Accuracy: 98.03%
28	Validation loss: 0.071784	Best loss: 0.067588	Accuracy: 98.16%
29	Validation loss: 0.246533	Best loss: 0.067588	Accuracy: 96.68%
30	Validation loss: 0.156397	Best loss: 0.067588	Accuracy: 96.64%
31	Validation loss: 0.075266	Best loss: 0.067588	Accuracy: 98.08%
32	Validation loss: 0.092533	Best loss: 0.067588	Accuracy: 98.28%
33	Validation loss: 0.123169	Best loss: 0.067588	Accuracy: 97.99%
34	Validat

1	Validation loss: 0.150217	Best loss: 0.150217	Accuracy: 96.19%
2	Validation loss: 0.097473	Best loss: 0.097473	Accuracy: 97.42%
3	Validation loss: 0.104131	Best loss: 0.097473	Accuracy: 97.38%
4	Validation loss: 0.102704	Best loss: 0.097473	Accuracy: 97.05%
5	Validation loss: 0.081941	Best loss: 0.081941	Accuracy: 97.46%
6	Validation loss: 0.249972	Best loss: 0.081941	Accuracy: 95.78%
7	Validation loss: 0.444130	Best loss: 0.081941	Accuracy: 95.58%
8	Validation loss: 0.232341	Best loss: 0.081941	Accuracy: 96.76%
9	Validation loss: 0.172308	Best loss: 0.081941	Accuracy: 97.01%
10	Validation loss: 0.147660	Best loss: 0.081941	Accuracy: 97.26%
11	Validation loss: 0.145383	Best loss: 0.081941	Accuracy: 97.26%
12	Validation loss: 0.138817	Best loss: 0.081941	Accuracy: 97.50%
13	Validation loss: 0.145628	Best loss: 0.081941	Accuracy: 97.42%
14	Validation loss: 0.150500	Best loss: 0.081941	Accuracy: 97.71%
15	Validation loss: 0.199799	Best loss: 0.081941	Accuracy: 97.09%
16	Validation loss:

28	Validation loss: 0.561762	Best loss: 0.153633	Accuracy: 70.76%
29	Validation loss: 0.685593	Best loss: 0.153633	Accuracy: 73.63%
30	Validation loss: 0.612563	Best loss: 0.153633	Accuracy: 74.00%
31	Validation loss: 0.527100	Best loss: 0.153633	Accuracy: 73.10%
Early stopping!
[CV]  n_neurons=120, learning_rate=0.02, batch_size=50, activation=<function relu at 0x000001DBB87851E0>, total=  34.0s
[CV] n_neurons=120, learning_rate=0.01, batch_size=500, activation=<function elu at 0x000001DBB876EE18> 
0	Validation loss: 0.182440	Best loss: 0.182440	Accuracy: 93.78%
1	Validation loss: 0.132538	Best loss: 0.132538	Accuracy: 95.25%
2	Validation loss: 0.117514	Best loss: 0.117514	Accuracy: 95.90%
3	Validation loss: 0.107658	Best loss: 0.107658	Accuracy: 96.27%
4	Validation loss: 0.112588	Best loss: 0.107658	Accuracy: 96.15%
5	Validation loss: 0.090789	Best loss: 0.090789	Accuracy: 97.13%
6	Validation loss: 0.087826	Best loss: 0.087826	Accuracy: 97.05%
7	Validation loss: 0.084948	Best loss: 0

4	Validation loss: 1.658839	Best loss: 0.198520	Accuracy: 17.77%
5	Validation loss: 1.686707	Best loss: 0.198520	Accuracy: 20.27%
6	Validation loss: 1.708120	Best loss: 0.198520	Accuracy: 20.27%
7	Validation loss: 1.765204	Best loss: 0.198520	Accuracy: 17.77%
8	Validation loss: 1.632323	Best loss: 0.198520	Accuracy: 17.77%
9	Validation loss: 1.727046	Best loss: 0.198520	Accuracy: 18.92%
10	Validation loss: 1.857647	Best loss: 0.198520	Accuracy: 17.77%
11	Validation loss: 1.635623	Best loss: 0.198520	Accuracy: 22.52%
12	Validation loss: 1.732827	Best loss: 0.198520	Accuracy: 22.52%
13	Validation loss: 1.757690	Best loss: 0.198520	Accuracy: 18.92%
14	Validation loss: 1.686242	Best loss: 0.198520	Accuracy: 22.52%
15	Validation loss: 1.627740	Best loss: 0.198520	Accuracy: 22.52%
16	Validation loss: 1.865395	Best loss: 0.198520	Accuracy: 17.77%
17	Validation loss: 1.779924	Best loss: 0.198520	Accuracy: 22.52%
18	Validation loss: 1.780273	Best loss: 0.198520	Accuracy: 22.52%
19	Validation lo

9	Validation loss: 6275.700195	Best loss: 12.102020	Accuracy: 91.85%
10	Validation loss: 15226.425781	Best loss: 12.102020	Accuracy: 92.67%
11	Validation loss: 5349.907227	Best loss: 12.102020	Accuracy: 90.99%
12	Validation loss: 3387.183105	Best loss: 12.102020	Accuracy: 93.33%
13	Validation loss: 2335.435547	Best loss: 12.102020	Accuracy: 94.72%
14	Validation loss: 2287.406494	Best loss: 12.102020	Accuracy: 94.59%
15	Validation loss: 1811.228149	Best loss: 12.102020	Accuracy: 94.84%
16	Validation loss: 3852.743652	Best loss: 12.102020	Accuracy: 94.39%
17	Validation loss: 27138.980469	Best loss: 12.102020	Accuracy: 88.33%
18	Validation loss: 2830.473389	Best loss: 12.102020	Accuracy: 94.35%
19	Validation loss: 2154.753174	Best loss: 12.102020	Accuracy: 93.57%
20	Validation loss: 1269.528564	Best loss: 12.102020	Accuracy: 94.51%
21	Validation loss: 1239.561401	Best loss: 12.102020	Accuracy: 92.18%
22	Validation loss: 862.865295	Best loss: 12.102020	Accuracy: 95.00%
23	Validation loss: 

2	Validation loss: 0.194399	Best loss: 0.194399	Accuracy: 94.35%
3	Validation loss: 0.248280	Best loss: 0.194399	Accuracy: 93.69%
4	Validation loss: 0.173775	Best loss: 0.173775	Accuracy: 94.84%
5	Validation loss: 0.172696	Best loss: 0.172696	Accuracy: 95.33%
6	Validation loss: 0.250396	Best loss: 0.172696	Accuracy: 94.59%
7	Validation loss: 0.253593	Best loss: 0.172696	Accuracy: 94.14%
8	Validation loss: 0.316901	Best loss: 0.172696	Accuracy: 93.08%
9	Validation loss: 0.188719	Best loss: 0.172696	Accuracy: 95.66%
10	Validation loss: 0.252247	Best loss: 0.172696	Accuracy: 94.80%
11	Validation loss: 0.546966	Best loss: 0.172696	Accuracy: 86.65%
12	Validation loss: 1.739594	Best loss: 0.172696	Accuracy: 22.52%
13	Validation loss: 1.681402	Best loss: 0.172696	Accuracy: 22.52%
14	Validation loss: 1.728568	Best loss: 0.172696	Accuracy: 22.52%
15	Validation loss: 1.630886	Best loss: 0.172696	Accuracy: 22.52%
16	Validation loss: 1.712477	Best loss: 0.172696	Accuracy: 22.52%
17	Validation loss

27	Validation loss: 2.607678	Best loss: 0.117488	Accuracy: 96.44%
28	Validation loss: 1.411996	Best loss: 0.117488	Accuracy: 96.68%
29	Validation loss: 0.976733	Best loss: 0.117488	Accuracy: 94.76%
Early stopping!
[CV]  n_neurons=90, learning_rate=0.01, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18>, total=  47.2s
[CV] n_neurons=100, learning_rate=0.05, batch_size=100, activation=<function relu at 0x000001DBB87851E0> 
0	Validation loss: 0.274369	Best loss: 0.274369	Accuracy: 93.65%
1	Validation loss: 1.554422	Best loss: 0.274369	Accuracy: 48.40%
2	Validation loss: 1.001108	Best loss: 0.274369	Accuracy: 56.06%
3	Validation loss: 0.745379	Best loss: 0.274369	Accuracy: 68.18%
4	Validation loss: 0.542543	Best loss: 0.274369	Accuracy: 81.74%
5	Validation loss: 0.493467	Best loss: 0.274369	Accuracy: 84.48%
6	Validation loss: 0.538418	Best loss: 0.274369	Accuracy: 83.99%
7	Validation loss: 0.472881	Best loss: 0.274369	Accuracy: 85.67%
8	

0	Validation loss: 0.141589	Best loss: 0.141589	Accuracy: 95.41%
1	Validation loss: 0.147942	Best loss: 0.141589	Accuracy: 95.50%
2	Validation loss: 0.124722	Best loss: 0.124722	Accuracy: 96.64%
3	Validation loss: 0.131114	Best loss: 0.124722	Accuracy: 97.05%
4	Validation loss: 0.133196	Best loss: 0.124722	Accuracy: 96.64%
5	Validation loss: 4.915750	Best loss: 0.124722	Accuracy: 22.52%
6	Validation loss: 1.625279	Best loss: 0.124722	Accuracy: 18.92%
7	Validation loss: 1.624417	Best loss: 0.124722	Accuracy: 20.52%
8	Validation loss: 1.621520	Best loss: 0.124722	Accuracy: 20.52%
9	Validation loss: 1.656580	Best loss: 0.124722	Accuracy: 20.52%
10	Validation loss: 1.701190	Best loss: 0.124722	Accuracy: 22.52%
11	Validation loss: 1.615906	Best loss: 0.124722	Accuracy: 18.92%
12	Validation loss: 1.630531	Best loss: 0.124722	Accuracy: 22.52%
13	Validation loss: 1.635197	Best loss: 0.124722	Accuracy: 17.77%
14	Validation loss: 1.628777	Best loss: 0.124722	Accuracy: 22.52%
15	Validation loss: 

29	Validation loss: 0.288246	Best loss: 0.093719	Accuracy: 96.52%
Early stopping!
[CV]  n_neurons=120, learning_rate=0.01, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18>, total=  32.6s
[CV] n_neurons=120, learning_rate=0.01, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18> 
0	Validation loss: 0.142435	Best loss: 0.142435	Accuracy: 95.90%
1	Validation loss: 0.371506	Best loss: 0.142435	Accuracy: 93.53%
2	Validation loss: 0.119013	Best loss: 0.119013	Accuracy: 96.64%
3	Validation loss: 0.099675	Best loss: 0.099675	Accuracy: 97.30%
4	Validation loss: 0.117187	Best loss: 0.099675	Accuracy: 96.72%
5	Validation loss: 0.121402	Best loss: 0.099675	Accuracy: 96.48%
6	Validation loss: 0.126992	Best loss: 0.099675	Accuracy: 96.76%
7	Validation loss: 0.089891	Best loss: 0.089891	Accuracy: 97.62%
8	Validation loss: 0.118491	Best loss: 0.089891	Accuracy: 97.42%
9	Validation loss: 246.643295	

14	Validation loss: 0.145859	Best loss: 0.124580	Accuracy: 96.19%
15	Validation loss: 0.119204	Best loss: 0.119204	Accuracy: 96.64%
16	Validation loss: 0.120694	Best loss: 0.119204	Accuracy: 96.93%
17	Validation loss: 0.117918	Best loss: 0.117918	Accuracy: 96.85%
18	Validation loss: 0.126395	Best loss: 0.117918	Accuracy: 97.13%
19	Validation loss: 0.123199	Best loss: 0.117918	Accuracy: 96.81%
20	Validation loss: 0.130156	Best loss: 0.117918	Accuracy: 97.17%
21	Validation loss: 0.139519	Best loss: 0.117918	Accuracy: 97.34%
22	Validation loss: 0.153553	Best loss: 0.117918	Accuracy: 97.22%
23	Validation loss: 0.156344	Best loss: 0.117918	Accuracy: 97.01%
24	Validation loss: 0.140552	Best loss: 0.117918	Accuracy: 97.05%
25	Validation loss: 0.124101	Best loss: 0.117918	Accuracy: 97.26%
26	Validation loss: 0.135383	Best loss: 0.117918	Accuracy: 97.30%
27	Validation loss: 0.154684	Best loss: 0.117918	Accuracy: 96.89%
28	Validation loss: 0.129204	Best loss: 0.117918	Accuracy: 97.30%
29	Validat

10	Validation loss: 1.608552	Best loss: 0.574924	Accuracy: 22.52%
11	Validation loss: 1.607549	Best loss: 0.574924	Accuracy: 22.52%
12	Validation loss: 1.609339	Best loss: 0.574924	Accuracy: 22.52%
13	Validation loss: 1.608889	Best loss: 0.574924	Accuracy: 20.27%
14	Validation loss: 1.614460	Best loss: 0.574924	Accuracy: 18.92%
15	Validation loss: 1.609021	Best loss: 0.574924	Accuracy: 22.52%
16	Validation loss: 1.610153	Best loss: 0.574924	Accuracy: 20.27%
17	Validation loss: 1.612441	Best loss: 0.574924	Accuracy: 18.92%
18	Validation loss: 1.609266	Best loss: 0.574924	Accuracy: 22.52%
19	Validation loss: 1.610245	Best loss: 0.574924	Accuracy: 22.52%
20	Validation loss: 1.610831	Best loss: 0.574924	Accuracy: 18.92%
21	Validation loss: 1.609140	Best loss: 0.574924	Accuracy: 22.52%
Early stopping!
[CV]  n_neurons=100, learning_rate=0.05, batch_size=50, activation=<function relu at 0x000001DBB87851E0>, total=  27.7s
[CV] n_neurons=100, learning_rate=0.05, batch_size=50, activation=<funct

31	Validation loss: 0.110898	Best loss: 0.080345	Accuracy: 97.71%
32	Validation loss: 0.143157	Best loss: 0.080345	Accuracy: 97.50%
33	Validation loss: 0.119610	Best loss: 0.080345	Accuracy: 98.08%
34	Validation loss: 0.129116	Best loss: 0.080345	Accuracy: 97.95%
35	Validation loss: 0.121932	Best loss: 0.080345	Accuracy: 97.58%
36	Validation loss: 0.122979	Best loss: 0.080345	Accuracy: 97.91%
37	Validation loss: 0.128291	Best loss: 0.080345	Accuracy: 97.34%
Early stopping!
[CV]  n_neurons=120, learning_rate=0.02, batch_size=500, activation=<function elu at 0x000001DBB876EE18>, total=  17.8s
[CV] n_neurons=120, learning_rate=0.02, batch_size=500, activation=<function elu at 0x000001DBB876EE18> 
0	Validation loss: 0.631134	Best loss: 0.631134	Accuracy: 78.05%
1	Validation loss: 0.318498	Best loss: 0.318498	Accuracy: 90.29%
2	Validation loss: 0.203637	Best loss: 0.203637	Accuracy: 93.28%
3	Validation loss: 0.158373	Best loss: 0.158373	Accuracy: 94.68%
4	Validation loss: 0.172113	Best loss

63	Validation loss: 0.152982	Best loss: 0.128915	Accuracy: 96.81%
64	Validation loss: 0.168486	Best loss: 0.128915	Accuracy: 96.60%
Early stopping!
[CV]  n_neurons=120, learning_rate=0.05, batch_size=500, activation=<function elu at 0x000001DBB876EE18>, total=  26.4s
[CV] n_neurons=120, learning_rate=0.05, batch_size=500, activation=<function elu at 0x000001DBB876EE18> 
0	Validation loss: 1.084943	Best loss: 1.084943	Accuracy: 59.17%
1	Validation loss: 0.485631	Best loss: 0.485631	Accuracy: 83.46%
2	Validation loss: 0.384008	Best loss: 0.384008	Accuracy: 86.53%
3	Validation loss: 0.272306	Best loss: 0.272306	Accuracy: 91.56%
4	Validation loss: 0.236313	Best loss: 0.236313	Accuracy: 92.22%
5	Validation loss: 0.225056	Best loss: 0.225056	Accuracy: 92.59%
6	Validation loss: 0.217490	Best loss: 0.217490	Accuracy: 92.51%
7	Validation loss: 0.189471	Best loss: 0.189471	Accuracy: 94.35%
8	Validation loss: 0.172379	Best loss: 0.172379	Accuracy: 94.51%
9	Validation loss: 0.268214	Best loss: 0.1

26	Validation loss: 0.126124	Best loss: 0.069684	Accuracy: 97.79%
27	Validation loss: 0.100760	Best loss: 0.069684	Accuracy: 97.95%
Early stopping!
[CV]  n_neurons=100, learning_rate=0.01, batch_size=500, activation=<function relu at 0x000001DBB87851E0>, total=   9.0s
[CV] n_neurons=100, learning_rate=0.01, batch_size=500, activation=<function relu at 0x000001DBB87851E0> 
0	Validation loss: 0.146515	Best loss: 0.146515	Accuracy: 95.62%
1	Validation loss: 0.100189	Best loss: 0.100189	Accuracy: 96.89%
2	Validation loss: 0.083100	Best loss: 0.083100	Accuracy: 97.46%
3	Validation loss: 0.084105	Best loss: 0.083100	Accuracy: 97.30%
4	Validation loss: 0.084039	Best loss: 0.083100	Accuracy: 97.79%
5	Validation loss: 0.074605	Best loss: 0.074605	Accuracy: 97.62%
6	Validation loss: 0.110594	Best loss: 0.074605	Accuracy: 97.30%
7	Validation loss: 0.081055	Best loss: 0.074605	Accuracy: 97.91%
8	Validation loss: 0.118129	Best loss: 0.074605	Accuracy: 97.13%
9	Validation loss: 0.096547	Best loss: 0

1	Validation loss: 0.121687	Best loss: 0.121687	Accuracy: 95.99%
2	Validation loss: 0.149477	Best loss: 0.121687	Accuracy: 96.27%
3	Validation loss: 560.613892	Best loss: 0.121687	Accuracy: 56.06%
4	Validation loss: 3.904345	Best loss: 0.121687	Accuracy: 84.44%
5	Validation loss: 1.223306	Best loss: 0.121687	Accuracy: 93.00%
6	Validation loss: 0.766810	Best loss: 0.121687	Accuracy: 90.17%
7	Validation loss: 0.830023	Best loss: 0.121687	Accuracy: 93.69%
8	Validation loss: 0.653867	Best loss: 0.121687	Accuracy: 92.71%
9	Validation loss: 0.624002	Best loss: 0.121687	Accuracy: 94.51%
10	Validation loss: 0.420696	Best loss: 0.121687	Accuracy: 95.21%
11	Validation loss: 0.412986	Best loss: 0.121687	Accuracy: 95.82%
12	Validation loss: 0.469361	Best loss: 0.121687	Accuracy: 91.69%
13	Validation loss: 0.414964	Best loss: 0.121687	Accuracy: 94.80%
14	Validation loss: 0.381101	Best loss: 0.121687	Accuracy: 94.64%
15	Validation loss: 0.612990	Best loss: 0.121687	Accuracy: 95.21%
16	Validation los

38	Validation loss: 0.313972	Best loss: 0.300991	Accuracy: 90.62%
39	Validation loss: 0.297335	Best loss: 0.297335	Accuracy: 91.32%
40	Validation loss: 0.354784	Best loss: 0.297335	Accuracy: 88.17%
41	Validation loss: 0.296445	Best loss: 0.296445	Accuracy: 91.15%
42	Validation loss: 0.298139	Best loss: 0.296445	Accuracy: 91.56%
43	Validation loss: 0.295864	Best loss: 0.295864	Accuracy: 91.36%
44	Validation loss: 0.294360	Best loss: 0.294360	Accuracy: 91.32%
45	Validation loss: 0.306723	Best loss: 0.294360	Accuracy: 91.28%
46	Validation loss: 0.370533	Best loss: 0.294360	Accuracy: 89.48%
47	Validation loss: 0.315876	Best loss: 0.294360	Accuracy: 90.75%
48	Validation loss: 0.532795	Best loss: 0.294360	Accuracy: 86.73%
49	Validation loss: 0.401873	Best loss: 0.294360	Accuracy: 89.89%
50	Validation loss: 0.338616	Best loss: 0.294360	Accuracy: 90.29%
51	Validation loss: 0.297558	Best loss: 0.294360	Accuracy: 91.20%
52	Validation loss: 0.317544	Best loss: 0.294360	Accuracy: 91.07%
53	Validat

2	Validation loss: 0.204507	Best loss: 0.164585	Accuracy: 93.90%
3	Validation loss: 0.172823	Best loss: 0.164585	Accuracy: 95.05%
4	Validation loss: 0.154979	Best loss: 0.154979	Accuracy: 95.37%
5	Validation loss: 0.128772	Best loss: 0.128772	Accuracy: 96.15%
6	Validation loss: 0.130422	Best loss: 0.128772	Accuracy: 96.36%
7	Validation loss: 0.115719	Best loss: 0.115719	Accuracy: 97.05%
8	Validation loss: 0.184653	Best loss: 0.115719	Accuracy: 95.86%
9	Validation loss: 0.197505	Best loss: 0.115719	Accuracy: 96.36%
10	Validation loss: 0.155123	Best loss: 0.115719	Accuracy: 96.56%
11	Validation loss: 0.176340	Best loss: 0.115719	Accuracy: 96.48%
12	Validation loss: 0.123238	Best loss: 0.115719	Accuracy: 96.97%
13	Validation loss: 0.194117	Best loss: 0.115719	Accuracy: 96.64%
14	Validation loss: 0.174027	Best loss: 0.115719	Accuracy: 95.86%
15	Validation loss: 1.753743	Best loss: 0.115719	Accuracy: 17.77%
16	Validation loss: 1.722157	Best loss: 0.115719	Accuracy: 20.27%
17	Validation loss

16	Validation loss: 0.296758	Best loss: 0.122920	Accuracy: 92.79%
17	Validation loss: 0.253684	Best loss: 0.122920	Accuracy: 94.51%
18	Validation loss: 0.255857	Best loss: 0.122920	Accuracy: 95.09%
19	Validation loss: 0.293938	Best loss: 0.122920	Accuracy: 95.13%
20	Validation loss: 0.394768	Best loss: 0.122920	Accuracy: 93.20%
21	Validation loss: 0.285196	Best loss: 0.122920	Accuracy: 94.88%
22	Validation loss: 0.247650	Best loss: 0.122920	Accuracy: 95.82%
23	Validation loss: 0.276665	Best loss: 0.122920	Accuracy: 95.25%
24	Validation loss: 0.363793	Best loss: 0.122920	Accuracy: 94.72%
25	Validation loss: 0.241702	Best loss: 0.122920	Accuracy: 95.99%
26	Validation loss: 0.326328	Best loss: 0.122920	Accuracy: 95.70%
27	Validation loss: 0.348693	Best loss: 0.122920	Accuracy: 96.15%
Early stopping!
[CV]  n_neurons=100, learning_rate=0.02, batch_size=100, activation=<function relu at 0x000001DBB87851E0>, total=  32.5s
[CV] n_neurons=120, learning_rate=0.02, batch_size=500, activation=<fun

24	Validation loss: 0.112644	Best loss: 0.084303	Accuracy: 97.75%
25	Validation loss: 0.155181	Best loss: 0.084303	Accuracy: 97.75%
26	Validation loss: 0.152459	Best loss: 0.084303	Accuracy: 97.17%
27	Validation loss: 0.203362	Best loss: 0.084303	Accuracy: 97.13%
28	Validation loss: 0.142772	Best loss: 0.084303	Accuracy: 97.83%
Early stopping!
[CV]  n_neurons=120, learning_rate=0.02, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18>, total=  16.1s
[CV] n_neurons=90, learning_rate=0.02, batch_size=50, activation=<function relu at 0x000001DBB87851E0> 
0	Validation loss: 0.287475	Best loss: 0.287475	Accuracy: 93.37%
1	Validation loss: 0.148280	Best loss: 0.148280	Accuracy: 96.19%
2	Validation loss: 0.129016	Best loss: 0.129016	Accuracy: 96.85%
3	Validation loss: 0.203004	Best loss: 0.129016	Accuracy: 94.92%
4	Validation loss: 0.893085	Best loss: 0.129016	Accuracy: 59.01%
5	Validation loss: 0.797097	Best loss: 0.129016	Accuracy: 75.88%


2	Validation loss: 0.255785	Best loss: 0.255785	Accuracy: 93.37%
3	Validation loss: 0.211079	Best loss: 0.211079	Accuracy: 93.82%
4	Validation loss: 0.206373	Best loss: 0.206373	Accuracy: 93.49%
5	Validation loss: 0.211803	Best loss: 0.206373	Accuracy: 93.28%
6	Validation loss: 0.228465	Best loss: 0.206373	Accuracy: 92.55%
7	Validation loss: 0.244211	Best loss: 0.206373	Accuracy: 93.04%
8	Validation loss: 0.883915	Best loss: 0.206373	Accuracy: 58.35%
9	Validation loss: 0.975894	Best loss: 0.206373	Accuracy: 51.60%
10	Validation loss: 1.238681	Best loss: 0.206373	Accuracy: 38.21%
11	Validation loss: 1.205303	Best loss: 0.206373	Accuracy: 41.52%
12	Validation loss: 1.210113	Best loss: 0.206373	Accuracy: 41.24%
13	Validation loss: 1.271917	Best loss: 0.206373	Accuracy: 41.44%
14	Validation loss: 1.518864	Best loss: 0.206373	Accuracy: 24.86%
15	Validation loss: 1.523955	Best loss: 0.206373	Accuracy: 27.72%
16	Validation loss: 1.604493	Best loss: 0.206373	Accuracy: 20.56%
17	Validation loss

15	Validation loss: 0.098707	Best loss: 0.097606	Accuracy: 97.54%
16	Validation loss: 0.152225	Best loss: 0.097606	Accuracy: 97.34%
17	Validation loss: 0.126338	Best loss: 0.097606	Accuracy: 97.91%
18	Validation loss: 0.119037	Best loss: 0.097606	Accuracy: 97.50%
19	Validation loss: 0.092649	Best loss: 0.092649	Accuracy: 97.95%
20	Validation loss: 0.119401	Best loss: 0.092649	Accuracy: 97.99%
21	Validation loss: 0.132776	Best loss: 0.092649	Accuracy: 98.08%
22	Validation loss: 1.194294	Best loss: 0.092649	Accuracy: 87.76%
23	Validation loss: 0.141813	Best loss: 0.092649	Accuracy: 97.17%
24	Validation loss: 0.081060	Best loss: 0.081060	Accuracy: 97.75%
25	Validation loss: 0.104229	Best loss: 0.081060	Accuracy: 97.71%
26	Validation loss: 0.111839	Best loss: 0.081060	Accuracy: 97.50%
27	Validation loss: 0.120751	Best loss: 0.081060	Accuracy: 97.99%
28	Validation loss: 0.160290	Best loss: 0.081060	Accuracy: 98.03%
29	Validation loss: 0.135998	Best loss: 0.081060	Accuracy: 97.99%
30	Validat

[CV]  n_neurons=120, learning_rate=0.05, batch_size=50, activation=<function relu at 0x000001DBB87851E0>, total=  22.2s
[CV] n_neurons=120, learning_rate=0.05, batch_size=50, activation=<function relu at 0x000001DBB87851E0> 
0	Validation loss: 0.452576	Best loss: 0.452576	Accuracy: 77.52%
1	Validation loss: 0.582067	Best loss: 0.452576	Accuracy: 74.28%
2	Validation loss: 0.703555	Best loss: 0.452576	Accuracy: 70.19%
3	Validation loss: 0.626039	Best loss: 0.452576	Accuracy: 72.56%
4	Validation loss: 0.599997	Best loss: 0.452576	Accuracy: 75.14%
5	Validation loss: 0.589577	Best loss: 0.452576	Accuracy: 74.28%
6	Validation loss: 0.556047	Best loss: 0.452576	Accuracy: 74.77%
7	Validation loss: 0.542996	Best loss: 0.452576	Accuracy: 75.35%
8	Validation loss: 0.628869	Best loss: 0.452576	Accuracy: 73.14%
9	Validation loss: 0.590276	Best loss: 0.452576	Accuracy: 73.79%
10	Validation loss: 0.722005	Best loss: 0.452576	Accuracy: 69.94%
11	Validation loss: 0.616919	Best loss: 0.452576	Accuracy: 

0	Validation loss: 0.157266	Best loss: 0.157266	Accuracy: 95.66%
1	Validation loss: 0.096256	Best loss: 0.096256	Accuracy: 96.44%
2	Validation loss: 0.080164	Best loss: 0.080164	Accuracy: 97.34%
3	Validation loss: 0.131610	Best loss: 0.080164	Accuracy: 96.76%
4	Validation loss: 1.941490	Best loss: 0.080164	Accuracy: 94.47%
5	Validation loss: 0.796277	Best loss: 0.080164	Accuracy: 92.47%
6	Validation loss: 0.348669	Best loss: 0.080164	Accuracy: 95.50%
7	Validation loss: 0.206894	Best loss: 0.080164	Accuracy: 96.40%
8	Validation loss: 0.234375	Best loss: 0.080164	Accuracy: 95.78%
9	Validation loss: 0.210666	Best loss: 0.080164	Accuracy: 96.15%
10	Validation loss: 0.219614	Best loss: 0.080164	Accuracy: 96.52%
11	Validation loss: 0.328918	Best loss: 0.080164	Accuracy: 94.10%
12	Validation loss: 0.201057	Best loss: 0.080164	Accuracy: 96.72%
13	Validation loss: 0.188481	Best loss: 0.080164	Accuracy: 97.01%
14	Validation loss: 0.237111	Best loss: 0.080164	Accuracy: 96.81%
15	Validation loss: 

14	Validation loss: 0.123414	Best loss: 0.090592	Accuracy: 97.71%
15	Validation loss: 0.093999	Best loss: 0.090592	Accuracy: 97.62%
16	Validation loss: 0.197780	Best loss: 0.090592	Accuracy: 96.60%
17	Validation loss: 0.093276	Best loss: 0.090592	Accuracy: 97.99%
18	Validation loss: 0.106279	Best loss: 0.090592	Accuracy: 97.71%
19	Validation loss: 0.103347	Best loss: 0.090592	Accuracy: 97.67%
20	Validation loss: 0.093916	Best loss: 0.090592	Accuracy: 98.20%
21	Validation loss: 0.086489	Best loss: 0.086489	Accuracy: 98.20%
22	Validation loss: 0.102151	Best loss: 0.086489	Accuracy: 98.28%
23	Validation loss: 0.159231	Best loss: 0.086489	Accuracy: 97.79%
24	Validation loss: 0.098899	Best loss: 0.086489	Accuracy: 98.32%
25	Validation loss: 0.118231	Best loss: 0.086489	Accuracy: 98.03%
26	Validation loss: 0.110744	Best loss: 0.086489	Accuracy: 98.03%
27	Validation loss: 0.159253	Best loss: 0.086489	Accuracy: 96.60%
28	Validation loss: 0.138830	Best loss: 0.086489	Accuracy: 97.30%
29	Validat

21	Validation loss: 0.111143	Best loss: 0.071783	Accuracy: 97.91%
22	Validation loss: 0.099074	Best loss: 0.071783	Accuracy: 98.12%
23	Validation loss: 0.124269	Best loss: 0.071783	Accuracy: 97.87%
24	Validation loss: 0.134115	Best loss: 0.071783	Accuracy: 98.20%
25	Validation loss: 0.118934	Best loss: 0.071783	Accuracy: 98.32%
26	Validation loss: 0.132926	Best loss: 0.071783	Accuracy: 97.99%
27	Validation loss: 0.118444	Best loss: 0.071783	Accuracy: 97.83%
28	Validation loss: 0.175514	Best loss: 0.071783	Accuracy: 97.22%
Early stopping!
[CV]  n_neurons=90, learning_rate=0.01, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18>, total=  14.6s
[CV] n_neurons=90, learning_rate=0.01, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001DBC12DFE18> 
0	Validation loss: 0.137630	Best loss: 0.137630	Accuracy: 95.37%
1	Validation loss: 0.097791	Best loss: 0.097791	Accuracy: 96.81%
2	Validation loss: 0.08

0	Validation loss: 8.534271	Best loss: 8.534271	Accuracy: 70.52%
1	Validation loss: 0.658389	Best loss: 0.658389	Accuracy: 90.29%
2	Validation loss: 0.410687	Best loss: 0.410687	Accuracy: 94.76%
3	Validation loss: 0.204046	Best loss: 0.204046	Accuracy: 95.78%
4	Validation loss: 0.410064	Best loss: 0.204046	Accuracy: 95.13%
5	Validation loss: 0.254917	Best loss: 0.204046	Accuracy: 94.64%
6	Validation loss: 0.213640	Best loss: 0.204046	Accuracy: 95.95%
7	Validation loss: 0.210021	Best loss: 0.204046	Accuracy: 95.99%
8	Validation loss: 0.131732	Best loss: 0.131732	Accuracy: 96.56%
9	Validation loss: 0.325229	Best loss: 0.131732	Accuracy: 94.80%
10	Validation loss: 0.180961	Best loss: 0.131732	Accuracy: 95.05%
11	Validation loss: 0.132068	Best loss: 0.131732	Accuracy: 96.52%
12	Validation loss: 0.106536	Best loss: 0.106536	Accuracy: 97.01%
13	Validation loss: 0.193390	Best loss: 0.106536	Accuracy: 94.55%
14	Validation loss: 0.129314	Best loss: 0.106536	Accuracy: 96.68%
15	Validation loss: 

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 74.6min finished


0	Validation loss: 0.120294	Best loss: 0.120294	Accuracy: 96.11%
1	Validation loss: 0.086881	Best loss: 0.086881	Accuracy: 96.81%
2	Validation loss: 0.088388	Best loss: 0.086881	Accuracy: 97.26%
3	Validation loss: 0.078840	Best loss: 0.078840	Accuracy: 97.50%
4	Validation loss: 0.073457	Best loss: 0.073457	Accuracy: 97.17%
5	Validation loss: 0.064692	Best loss: 0.064692	Accuracy: 97.99%
6	Validation loss: 0.059079	Best loss: 0.059079	Accuracy: 98.24%
7	Validation loss: 0.067496	Best loss: 0.059079	Accuracy: 98.20%
8	Validation loss: 0.109874	Best loss: 0.059079	Accuracy: 97.22%
9	Validation loss: 0.078171	Best loss: 0.059079	Accuracy: 97.95%
10	Validation loss: 0.061475	Best loss: 0.059079	Accuracy: 98.12%
11	Validation loss: 0.066839	Best loss: 0.059079	Accuracy: 98.44%
12	Validation loss: 0.084210	Best loss: 0.059079	Accuracy: 98.16%
13	Validation loss: 0.096546	Best loss: 0.059079	Accuracy: 98.24%
14	Validation loss: 0.077524	Best loss: 0.059079	Accuracy: 98.28%
15	Validation loss: 

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=DNNClassifier(activation=<function elu at 0x000001DBB876EE18>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0x000001DBC0DB25F8>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42),
          fit_params={'X_valid': array([[0., 0., ..., 0., 0.],
       [0., 0., ..., 0., 0.],
       ...,
       [0., 0., ..., 0., 0.],
       [0., 0., ..., 0., 0.]], dtype=float32), 'y_valid': array([0, 4, ..., 3, 2]), 'n_epochs': 1000},
          iid=True, n_iter=50, n_jobs=1,
          param_distributions={'n_neurons': [90, 100, 120], 'batch_size': [50, 100, 500], 'learning_rate': [0.01, 0.02, 0.05], 'activation': [<function relu at 0x000001DBB87851E0>, <function elu at 0x000001DBB876EE18>, <function leaky_relu.<locals>.parametri

In [30]:
rnd_search.best_params_

{'n_neurons': 100,
 'learning_rate': 0.01,
 'batch_size': 500,
 'activation': <function __main__.leaky_relu.<locals>.parametrized_leaky_relu(z, name=None)>}

In [31]:
y_pred = rnd_search.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9827196050195433

In [32]:
rnd_search.best_estimator_.save("./my_best_mnist_model_5_to_9")

## d: Batch Normalization

Without Batch Normalization

In [34]:
dnn_clf = DNNClassifier(activation= tf.nn.relu, batch_size=500, learning_rate=0.01,
                        n_neurons=120, random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)
y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

0	Validation loss: 0.095713	Best loss: 0.095713	Accuracy: 97.01%
1	Validation loss: 0.089349	Best loss: 0.089349	Accuracy: 97.26%
2	Validation loss: 0.080769	Best loss: 0.080769	Accuracy: 97.71%
3	Validation loss: 0.071718	Best loss: 0.071718	Accuracy: 97.75%
4	Validation loss: 0.077401	Best loss: 0.071718	Accuracy: 98.08%
5	Validation loss: 0.064507	Best loss: 0.064507	Accuracy: 98.28%
6	Validation loss: 0.063026	Best loss: 0.063026	Accuracy: 98.08%
7	Validation loss: 0.069661	Best loss: 0.063026	Accuracy: 98.20%
8	Validation loss: 0.088067	Best loss: 0.063026	Accuracy: 97.99%
9	Validation loss: 0.066131	Best loss: 0.063026	Accuracy: 98.28%
10	Validation loss: 0.069176	Best loss: 0.063026	Accuracy: 98.28%
11	Validation loss: 0.079528	Best loss: 0.063026	Accuracy: 98.40%
12	Validation loss: 0.091006	Best loss: 0.063026	Accuracy: 98.12%
13	Validation loss: 0.086730	Best loss: 0.063026	Accuracy: 98.73%
14	Validation loss: 0.086006	Best loss: 0.063026	Accuracy: 98.48%
15	Validation loss: 

0.9788109442501542

With Batch Normalization

In [35]:
dnn_clf_bn = DNNClassifier(activation= tf.nn.relu, batch_size=500, learning_rate=0.01,
                        n_neurons=120, random_state=42,
                           batch_norm_momentum=0.95)
dnn_clf_bn.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.093395	Best loss: 0.093395	Accuracy: 96.93%
1	Validation loss: 0.087441	Best loss: 0.087441	Accuracy: 97.30%
2	Validation loss: 0.083300	Best loss: 0.083300	Accuracy: 97.54%
3	Validation loss: 0.084403	Best loss: 0.083300	Accuracy: 97.54%
4	Validation loss: 0.065189	Best loss: 0.065189	Accuracy: 98.20%
5	Validation loss: 0.064206	Best loss: 0.064206	Accuracy: 98.20%
6	Validation loss: 0.082979	Best loss: 0.064206	Accuracy: 98.12%
7	Validation loss: 0.058734	Best loss: 0.058734	Accuracy: 98.65%
8	Validation loss: 0.060116	Best loss: 0.058734	Accuracy: 98.40%
9	Validation loss: 0.068496	Best loss: 0.058734	Accuracy: 98.36%
10	Validation loss: 0.073361	Best loss: 0.058734	Accuracy: 98.16%
11	Validation loss: 0.072878	Best loss: 0.058734	Accuracy: 98.32%
12	Validation loss: 0.076926	Best loss: 0.058734	Accuracy: 98.44%
13	Validation loss: 0.072155	Best loss: 0.058734	Accuracy: 98.40%
14	Validation loss: 0.075707	Best loss: 0.058734	Accuracy: 98.16%
15	Validation loss: 

DNNClassifier(activation=<function relu at 0x000001DBB87851E0>,
       batch_norm_momentum=0.95, batch_size=500, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0x000001DBC0DB25F8>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=120,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

New Accuracy

In [36]:
y_pred = dnn_clf_bn.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9812795721045052

<span style='color:blue'>**As we can see Batch normlaization slightly increases the accuracy but not by much. We can try and find the optimal paramters through a randomized search.**</span>

Hyperparameter search

In [38]:
from sklearn.model_selection import RandomizedSearchCV
# reduce parameters to minimize runtime
param_distribs = {
    "n_neurons": [70, 90, 100, 120, 140],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "batch_norm_momentum": [0.9, 0.95, 0.98, 0.99, 0.999],
}

rnd_search_bn = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                   fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                   random_state=42, verbose=2)
rnd_search_bn.fit(X_train1, y_train1)



Fitting 3 folds for each of 40 candidates, totalling 120 fits
[CV] n_neurons=100, learning_rate=0.1, batch_size=500, batch_norm_momentum=0.99, activation=<function elu at 0x000001DBB876EE18> 
0	Validation loss: 6.223390	Best loss: 6.223390	Accuracy: 85.38%
1	Validation loss: 1.008969	Best loss: 1.008969	Accuracy: 91.15%
2	Validation loss: 0.593358	Best loss: 0.593358	Accuracy: 90.75%
3	Validation loss: 0.218692	Best loss: 0.218692	Accuracy: 95.66%
4	Validation loss: 0.190663	Best loss: 0.190663	Accuracy: 95.50%
5	Validation loss: 0.142535	Best loss: 0.142535	Accuracy: 96.64%
6	Validation loss: 0.099302	Best loss: 0.099302	Accuracy: 97.22%
7	Validation loss: 0.106431	Best loss: 0.099302	Accuracy: 97.17%
8	Validation loss: 0.127342	Best loss: 0.099302	Accuracy: 96.85%
9	Validation loss: 0.108061	Best loss: 0.099302	Accuracy: 97.38%
10	Validation loss: 0.121818	Best loss: 0.099302	Accuracy: 97.50%
11	Validation loss: 0.092477	Best loss: 0.092477	Accuracy: 97.54%
12	Validation loss: 0.1069

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   53.0s remaining:    0.0s


0	Validation loss: 6.609016	Best loss: 6.609016	Accuracy: 83.17%
1	Validation loss: 1.411314	Best loss: 1.411314	Accuracy: 89.19%
2	Validation loss: 0.385388	Best loss: 0.385388	Accuracy: 94.14%
3	Validation loss: 0.192494	Best loss: 0.192494	Accuracy: 95.74%
4	Validation loss: 0.181447	Best loss: 0.181447	Accuracy: 95.50%
5	Validation loss: 0.248616	Best loss: 0.181447	Accuracy: 94.80%
6	Validation loss: 0.122197	Best loss: 0.122197	Accuracy: 96.81%
7	Validation loss: 0.136368	Best loss: 0.122197	Accuracy: 96.48%
8	Validation loss: 0.124136	Best loss: 0.122197	Accuracy: 96.56%
9	Validation loss: 0.137573	Best loss: 0.122197	Accuracy: 97.09%
10	Validation loss: 0.153580	Best loss: 0.122197	Accuracy: 96.60%
11	Validation loss: 0.144136	Best loss: 0.122197	Accuracy: 96.72%
12	Validation loss: 0.126216	Best loss: 0.122197	Accuracy: 97.13%
13	Validation loss: 0.125819	Best loss: 0.122197	Accuracy: 97.09%
14	Validation loss: 0.251742	Best loss: 0.122197	Accuracy: 95.37%
15	Validation loss: 

0	Validation loss: 0.215207	Best loss: 0.215207	Accuracy: 94.80%
1	Validation loss: 0.103947	Best loss: 0.103947	Accuracy: 96.76%
2	Validation loss: 0.088134	Best loss: 0.088134	Accuracy: 97.38%
3	Validation loss: 0.123802	Best loss: 0.088134	Accuracy: 96.31%
4	Validation loss: 0.076430	Best loss: 0.076430	Accuracy: 97.62%
5	Validation loss: 0.082163	Best loss: 0.076430	Accuracy: 97.50%
6	Validation loss: 0.066941	Best loss: 0.066941	Accuracy: 98.28%
7	Validation loss: 0.079818	Best loss: 0.066941	Accuracy: 97.95%
8	Validation loss: 0.075184	Best loss: 0.066941	Accuracy: 97.95%
9	Validation loss: 0.084328	Best loss: 0.066941	Accuracy: 97.87%
10	Validation loss: 0.060912	Best loss: 0.060912	Accuracy: 98.36%
11	Validation loss: 0.074874	Best loss: 0.060912	Accuracy: 97.91%
12	Validation loss: 0.048939	Best loss: 0.048939	Accuracy: 98.44%
13	Validation loss: 0.071268	Best loss: 0.048939	Accuracy: 98.20%
14	Validation loss: 0.082506	Best loss: 0.048939	Accuracy: 98.12%
15	Validation loss: 

KeyboardInterrupt: 

In [None]:
rnd_search_bn.best_params_

In [None]:
y_pred = rnd_search_bn.predict(X_test1)
accuracy_score(y_test1, y_pred)

## e: Dropout

In [None]:
y_pred = dnn_clf.predict(X_train1)
accuracy_score(y_train1, y_pred)

In [None]:
dnn_clf_dropout = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                                n_neurons=90, random_state=42,
                                dropout_rate=0.5)
dnn_clf_dropout.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

Accuracy

In [None]:
y_pred = dnn_clf_dropout.predict(X_test1)
accuracy_score(y_test1, y_pred)

Hyperparameter search

In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "dropout_rate": [0.2, 0.3, 0.4, 0.5, 0.6],
}

rnd_search_dropout = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                        fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                        random_state=42, verbose=2)
rnd_search_dropout.fit(X_train1, y_train1)

In [None]:
rnd_search_dropout.best_params_

In [None]:
y_pred = rnd_search_dropout.predict(X_test1)
accuracy_score(y_test1, y_pred)

# Exercise 9

## a: pretrained hidden layers of the previous model, freezes them, and replaces the softmax output layer with a fresh new one.

In [None]:
tf.reset_default_graph()

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_5_to_9.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

In [None]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

In [None]:
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

# b: 0-4

In [None]:
X_train2_full = mnist.train.images[mnist.train.labels < 5]
y_train2_full = mnist.train.labels[mnist.train.labels < 5] 
X_valid2_full = mnist.validation.images[mnist.validation.labels < 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels < 5] 
X_test2 = mnist.test.images[mnist.test.labels < 5]
y_test2 = mnist.test.labels[mnist.test.labels < 5] 

In [None]:
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

In [None]:
X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

 Retrain last layer after freezing all other layers

In [None]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_5_to_9")
    for var in output_layer_vars:
        var.initializer.run()

    t0 = time.time()
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_0_to_4_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_0_to_4_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

## c: caching the frozen layers, and train the model again

In [None]:
hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden5_out:0")

In [None]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_5_to_9")
    for var in output_layer_vars:
        var.initializer.run()

    t0 = time.time()
    
    hidden5_train = hidden5_out.eval(feed_dict={X: X_train2, y: y_train2})
    hidden5_valid = hidden5_out.eval(feed_dict={X: X_valid2, y: y_valid2})
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            h5_batch, y_batch = hidden5_train[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: hidden5_valid, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_0_to_4_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_0_to_4_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

## d: reusing just four hidden layers instead of five

In [None]:
tf.reset_default_graph()

n_outputs = 5

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_5_to_9.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [None]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()


In [None]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_5_to_9")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./my_mnist_model_0_to_4_four_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./my_mnist_model_0_to_4_four_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

## e: unfreeze the top two hidden layers and continue training

In [None]:
learning_rate = 0.01

unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam3")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

In [None]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./my_mnist_model_0_to_4_four_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./my_mnist_model_0_to_4_two_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./my_mnist_model_0_to_4_two_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [None]:
learning_rate = 0.01

optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam4")
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
no_frozen_saver = tf.train.Saver()

In [None]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    two_frozen_saver.restore(sess, "./my_mnist_model_0_to_4_two_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = no_frozen_saver.save(sess, "./my_mnist_model_0_to_4_no_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    no_frozen_saver.restore(sess, "./my_mnist_model_0_to_4_no_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [None]:
dnn_clf_0_to_4 = DNNClassifier(n_hidden_layers=4, random_state=42)
dnn_clf_0_to_4.fit(X_train2, y_train2, n_epochs=1000, X_valid=X_valid2, y_valid=y_valid2)

In [None]:
y_pred = dnn_clf_0_to_4.predict(X_test2)
accuracy_score(y_test2, y_pred)