In [1]:
# Common imports
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "nn_ex"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [2]:
import tensorflow as tf

## Data: MNIST

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

## Simple Neural Network

In [4]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [5]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [10]:
reset_graph()


# Parameters/Variables/Placeholders

n_features = X_train.shape[1]
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

# Dropout parameters
training = tf.placeholder_with_default(False, shape=(), name='training')
dropout_rate = 0  # == 1 - keep_prob
X_drop = tf.layers.dropout(X, dropout_rate, training=training)


# NN Model

from functools import partial

he_init = tf.variance_scaling_initializer()
#scale = 0.01
#kernel_regularizer = tf.contrib.layers.l1_regularizer(scale)
my_dense_layer = partial(tf.layers.dense, activation=tf.nn.elu, kernel_initializer=he_init) 
                            # option: kernel_regularizer= kernel_regularizer

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X_drop, n_hidden1, name="hidden1")
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training)   
    hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name="hidden2")
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)   
    logits = my_dense_layer(hidden1_drop, n_outputs, activation=None, name="outputs")
    

# Loss/Training/Evaluation

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy") 
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name="loss")

with tf.name_scope("train"):
    learning_rate = 0.01
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)    

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [15]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 1000
batch_size = 20

# Early Stopping Parameters
max_checks_without_progress = 10
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
            loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./mnist_nn_model_01.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess,"./mnist_nn_model_01.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test*100))

0	Validation loss: 0.218353	Best loss: 0.218353	Accuracy: 94.72%
1	Validation loss: 0.279882	Best loss: 0.218353	Accuracy: 94.78%
2	Validation loss: 0.245630	Best loss: 0.218353	Accuracy: 95.72%
3	Validation loss: 0.262549	Best loss: 0.218353	Accuracy: 95.64%
4	Validation loss: 0.272777	Best loss: 0.218353	Accuracy: 96.16%
5	Validation loss: 0.316229	Best loss: 0.218353	Accuracy: 95.66%
6	Validation loss: 0.353235	Best loss: 0.218353	Accuracy: 96.38%
7	Validation loss: 0.328619	Best loss: 0.218353	Accuracy: 96.18%
8	Validation loss: 0.491521	Best loss: 0.218353	Accuracy: 95.38%
9	Validation loss: 0.421706	Best loss: 0.218353	Accuracy: 96.44%
10	Validation loss: 0.387184	Best loss: 0.218353	Accuracy: 96.64%
Early stopping!
INFO:tensorflow:Restoring parameters from ./mnist_nn_model_01.ckpt
Final test accuracy: 94.55%


### 0-4 MNIST data

In [24]:
id_train04 = (y_train < 5)
id_val04 = (y_valid < 5)
id_test04 = (y_test < 5)

X_train04 = X_train[id_train04]
y_train04 = y_train[id_train04]
X_val04 = X_valid[id_val04]
y_val04 = y_valid[id_val04]
X_test04 = X_test[id_test04]
y_test04 = y_test[id_test04]

In [25]:
print(X_train04.shape, y_train04.shape)
print(X_val04.shape, y_val04.shape)
print(X_test04.shape, y_test04.shape)

(28038, 784) (28038,)
(2558, 784) (2558,)
(5139, 784) (5139,)


## Deeper model
(MNIST 0-4 digits)

In [16]:
he_init = tf.variance_scaling_initializer()

def dnn(inputs, n_hidden_layers, n_neurons, name=None, 
        activation = tf.nn.elu, initializer= he_init):
    with tf.variable_scope(name,'dnn'):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs,n_neurons, activation=activation, 
                                     kernel_initializer = initializer,
                                    name = "hidden%d" % (layer+1))
        return inputs

In [26]:
reset_graph()


# Parameters/Variables/Placeholders

n_features = X_train04.shape[1]
n_hidden = 100
n_outputs = 5

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")


# NN Model

with tf.name_scope("dnn"):
    dnn_outputs = dnn(X, n_hidden_layers=5, n_neurons =n_hidden) 
    logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="outputs")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")
    

# Loss/Training/Evaluation

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy") 
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name="loss")

with tf.name_scope("train"):
    learning_rate = 0.01
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)    

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [27]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 1000
batch_size = 20

# Early Stopping Parameters
max_checks_without_progress = 10
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train04, y_train04, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_val04, y: y_val04})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./mnist_nn_model_01.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess,"./mnist_nn_model_01.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test04, y: y_test04})
    print("Final test accuracy: {:.2f}%".format(acc_test*100))

0	Validation loss: 0.116407	Best loss: 0.116407	Accuracy: 97.58%
1	Validation loss: 0.180534	Best loss: 0.116407	Accuracy: 97.11%
2	Validation loss: 0.227535	Best loss: 0.116407	Accuracy: 93.86%
3	Validation loss: 0.107346	Best loss: 0.107346	Accuracy: 97.54%
4	Validation loss: 0.302668	Best loss: 0.107346	Accuracy: 95.35%
5	Validation loss: 1.631054	Best loss: 0.107346	Accuracy: 22.01%
6	Validation loss: 1.635262	Best loss: 0.107346	Accuracy: 18.73%
7	Validation loss: 1.671200	Best loss: 0.107346	Accuracy: 22.01%
8	Validation loss: 1.695277	Best loss: 0.107346	Accuracy: 19.27%
9	Validation loss: 1.744607	Best loss: 0.107346	Accuracy: 20.91%
10	Validation loss: 1.629857	Best loss: 0.107346	Accuracy: 22.01%
11	Validation loss: 1.810803	Best loss: 0.107346	Accuracy: 22.01%
12	Validation loss: 1.675703	Best loss: 0.107346	Accuracy: 18.73%
13	Validation loss: 1.633233	Best loss: 0.107346	Accuracy: 20.91%
Early stopping!
INFO:tensorflow:Restoring parameters from ./mnist_nn_model_01.ckpt
Fin

## DNN with hyperparameter tuning

We create a `DNNClassifier` class compatible with Scikit-Learn's `RandomizedSearchCV` class, to perform hyperparameter tuning.

In [33]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 10
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

Testing the DNNClassifier

In [34]:
dnn_clf = DNNClassifier(random_state=42)
dnn_clf.fit(X_train04, y_train04, n_epochs=1000, X_valid=X_val04, y_valid=y_val04)

0	Validation loss: 0.116407	Best loss: 0.116407	Accuracy: 97.58%
1	Validation loss: 0.180534	Best loss: 0.116407	Accuracy: 97.11%
2	Validation loss: 0.227535	Best loss: 0.116407	Accuracy: 93.86%
3	Validation loss: 0.107346	Best loss: 0.107346	Accuracy: 97.54%
4	Validation loss: 0.302668	Best loss: 0.107346	Accuracy: 95.35%
5	Validation loss: 1.631054	Best loss: 0.107346	Accuracy: 22.01%
6	Validation loss: 1.635262	Best loss: 0.107346	Accuracy: 18.73%
7	Validation loss: 1.671200	Best loss: 0.107346	Accuracy: 22.01%
8	Validation loss: 1.695277	Best loss: 0.107346	Accuracy: 19.27%
9	Validation loss: 1.744607	Best loss: 0.107346	Accuracy: 20.91%
10	Validation loss: 1.629857	Best loss: 0.107346	Accuracy: 22.01%
11	Validation loss: 1.810803	Best loss: 0.107346	Accuracy: 22.01%
12	Validation loss: 1.675703	Best loss: 0.107346	Accuracy: 18.73%
13	Validation loss: 1.633233	Best loss: 0.107346	Accuracy: 20.91%
14	Validation loss: 1.652905	Best loss: 0.107346	Accuracy: 20.91%
Early stopping!


DNNClassifier(activation=<function elu at 0x125c65268>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0xb305edf28>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [35]:
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test04)
accuracy_score(y_test04, y_pred)

0.9725627553998832

Now tuning hyperparameters

In [37]:
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

param_distribs = {
    "n_neurons": [50, 100, 150],
    #"batch_size": [10, 50, 100],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    "n_hidden_layers": [2, 4, 6],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                cv=3, random_state=42, verbose=2)
rnd_search.fit(X_train04, y_train04, X_valid=X_val04, y_valid=y_val04, n_epochs=1000)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=50, n_hidden_layers=2, learning_rate=0.02, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7620> 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


0	Validation loss: 0.249434	Best loss: 0.249434	Accuracy: 95.04%
1	Validation loss: 0.123172	Best loss: 0.123172	Accuracy: 96.83%
2	Validation loss: 0.243961	Best loss: 0.123172	Accuracy: 96.87%
3	Validation loss: 0.302742	Best loss: 0.123172	Accuracy: 95.54%
4	Validation loss: 0.202096	Best loss: 0.123172	Accuracy: 97.26%
5	Validation loss: 0.289979	Best loss: 0.123172	Accuracy: 97.34%
6	Validation loss: 0.492861	Best loss: 0.123172	Accuracy: 96.95%
7	Validation loss: 0.518559	Best loss: 0.123172	Accuracy: 96.91%
8	Validation loss: 0.331733	Best loss: 0.123172	Accuracy: 97.69%
9	Validation loss: 0.553148	Best loss: 0.123172	Accuracy: 97.54%
10	Validation loss: 0.611440	Best loss: 0.123172	Accuracy: 96.99%
11	Validation loss: 0.572985	Best loss: 0.123172	Accuracy: 97.30%
12	Validation loss: 0.868629	Best loss: 0.123172	Accuracy: 97.26%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=2, learning_rate=0.02, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c76

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.7s remaining:    0.0s


0	Validation loss: 0.228452	Best loss: 0.228452	Accuracy: 95.93%
1	Validation loss: 0.136174	Best loss: 0.136174	Accuracy: 97.30%
2	Validation loss: 0.185548	Best loss: 0.136174	Accuracy: 96.83%
3	Validation loss: 0.320776	Best loss: 0.136174	Accuracy: 96.36%
4	Validation loss: 0.230437	Best loss: 0.136174	Accuracy: 96.95%
5	Validation loss: 0.360208	Best loss: 0.136174	Accuracy: 97.26%
6	Validation loss: 0.397652	Best loss: 0.136174	Accuracy: 96.21%
7	Validation loss: 0.370255	Best loss: 0.136174	Accuracy: 97.42%
8	Validation loss: 0.457577	Best loss: 0.136174	Accuracy: 98.12%
9	Validation loss: 0.414796	Best loss: 0.136174	Accuracy: 97.77%
10	Validation loss: 0.729730	Best loss: 0.136174	Accuracy: 96.64%
11	Validation loss: 0.519125	Best loss: 0.136174	Accuracy: 97.58%
12	Validation loss: 0.834429	Best loss: 0.136174	Accuracy: 97.73%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=2, learning_rate=0.02, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c76

4	Validation loss: 0.239799	Best loss: 0.093471	Accuracy: 97.11%
5	Validation loss: 0.380274	Best loss: 0.093471	Accuracy: 97.03%
6	Validation loss: 0.153387	Best loss: 0.093471	Accuracy: 97.22%
7	Validation loss: 0.191121	Best loss: 0.093471	Accuracy: 97.22%
8	Validation loss: 0.156504	Best loss: 0.093471	Accuracy: 98.32%
9	Validation loss: 0.331054	Best loss: 0.093471	Accuracy: 97.26%
10	Validation loss: 0.647462	Best loss: 0.093471	Accuracy: 97.11%
11	Validation loss: 0.301074	Best loss: 0.093471	Accuracy: 98.28%
12	Validation loss: 0.764735	Best loss: 0.093471	Accuracy: 97.07%
13	Validation loss: 0.309268	Best loss: 0.093471	Accuracy: 97.69%
14	Validation loss: 0.442050	Best loss: 0.093471	Accuracy: 96.91%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=2, learning_rate=0.02, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c79d8>, total=  16.8s
[CV] n_neurons=100, n_hidden_layers=6, learning_rate=0.05, activation=<function leaky_relu.<locals>.parametr

6	Validation loss: 0.171143	Best loss: 0.091835	Accuracy: 96.91%
7	Validation loss: 0.110233	Best loss: 0.091835	Accuracy: 97.54%
8	Validation loss: 0.192995	Best loss: 0.091835	Accuracy: 94.18%
9	Validation loss: 0.177854	Best loss: 0.091835	Accuracy: 96.72%
10	Validation loss: 0.130556	Best loss: 0.091835	Accuracy: 96.79%
11	Validation loss: 0.177134	Best loss: 0.091835	Accuracy: 95.70%
12	Validation loss: 0.102804	Best loss: 0.091835	Accuracy: 97.34%
13	Validation loss: 0.133042	Best loss: 0.091835	Accuracy: 96.87%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=4, learning_rate=0.02, activation=<function relu at 0x125c7e598>, total=  16.2s
[CV] n_neurons=50, n_hidden_layers=4, learning_rate=0.02, activation=<function relu at 0x125c7e598> 
0	Validation loss: 0.217125	Best loss: 0.217125	Accuracy: 95.47%
1	Validation loss: 0.207101	Best loss: 0.207101	Accuracy: 97.07%
2	Validation loss: 0.157320	Best loss: 0.157320	Accuracy: 96.52%
3	Validation loss: 0.093785	Best loss: 0.093785	

1	Validation loss: 4.380506	Best loss: 2.190116	Accuracy: 25.25%
2	Validation loss: 2.581820	Best loss: 2.190116	Accuracy: 30.84%
3	Validation loss: 3.704922	Best loss: 2.190116	Accuracy: 31.74%
4	Validation loss: 4.257008	Best loss: 2.190116	Accuracy: 31.90%
5	Validation loss: 2.159677	Best loss: 2.159677	Accuracy: 32.96%
6	Validation loss: 2.408008	Best loss: 2.159677	Accuracy: 19.27%
7	Validation loss: 1.700167	Best loss: 1.700167	Accuracy: 19.08%
8	Validation loss: 5.693505	Best loss: 1.700167	Accuracy: 18.73%
9	Validation loss: 2.105020	Best loss: 1.700167	Accuracy: 18.73%
10	Validation loss: 2.289117	Best loss: 1.700167	Accuracy: 20.91%
11	Validation loss: 2.280354	Best loss: 1.700167	Accuracy: 20.91%
12	Validation loss: 2.512654	Best loss: 1.700167	Accuracy: 20.91%
13	Validation loss: 2.248724	Best loss: 1.700167	Accuracy: 22.01%
14	Validation loss: 1.758352	Best loss: 1.700167	Accuracy: 20.91%
15	Validation loss: 3.171873	Best loss: 1.700167	Accuracy: 18.73%
16	Validation loss:

12	Validation loss: 0.416355	Best loss: 0.196758	Accuracy: 94.45%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=2, learning_rate=0.05, activation=<function relu at 0x125c7e598>, total=   9.4s
[CV] n_neurons=50, n_hidden_layers=2, learning_rate=0.05, activation=<function relu at 0x125c7e598> 
0	Validation loss: 0.263345	Best loss: 0.263345	Accuracy: 94.64%
1	Validation loss: 1.146418	Best loss: 0.263345	Accuracy: 82.10%
2	Validation loss: 0.212643	Best loss: 0.212643	Accuracy: 92.73%
3	Validation loss: 0.155928	Best loss: 0.155928	Accuracy: 95.54%
4	Validation loss: 0.387159	Best loss: 0.155928	Accuracy: 92.92%
5	Validation loss: 0.187934	Best loss: 0.155928	Accuracy: 94.41%
6	Validation loss: 0.214012	Best loss: 0.155928	Accuracy: 93.12%
7	Validation loss: 0.383921	Best loss: 0.155928	Accuracy: 91.24%
8	Validation loss: 0.595195	Best loss: 0.155928	Accuracy: 69.66%
9	Validation loss: 0.549807	Best loss: 0.155928	Accuracy: 72.83%
10	Validation loss: 0.565958	Best loss: 0.155928	Ac

1	Validation loss: 0.092136	Best loss: 0.092136	Accuracy: 97.73%
2	Validation loss: 0.107236	Best loss: 0.092136	Accuracy: 97.54%
3	Validation loss: 0.260911	Best loss: 0.092136	Accuracy: 95.74%
4	Validation loss: 0.114584	Best loss: 0.092136	Accuracy: 97.15%
5	Validation loss: 0.083375	Best loss: 0.083375	Accuracy: 98.24%
6	Validation loss: 0.101032	Best loss: 0.083375	Accuracy: 97.65%
7	Validation loss: 0.297169	Best loss: 0.083375	Accuracy: 91.13%
8	Validation loss: 0.158218	Best loss: 0.083375	Accuracy: 97.26%
9	Validation loss: 0.110842	Best loss: 0.083375	Accuracy: 97.54%
10	Validation loss: 0.113640	Best loss: 0.083375	Accuracy: 97.58%
11	Validation loss: 0.179759	Best loss: 0.083375	Accuracy: 95.70%
12	Validation loss: 0.171990	Best loss: 0.083375	Accuracy: 96.25%
13	Validation loss: 0.074769	Best loss: 0.074769	Accuracy: 98.32%
14	Validation loss: 0.089680	Best loss: 0.074769	Accuracy: 97.38%
15	Validation loss: 0.452734	Best loss: 0.074769	Accuracy: 93.94%
16	Validation loss:

0	Validation loss: 27.712013	Best loss: 27.712013	Accuracy: 93.75%
1	Validation loss: 5335956.500000	Best loss: 27.712013	Accuracy: 30.34%
2	Validation loss: 22412.179688	Best loss: 27.712013	Accuracy: 88.58%
3	Validation loss: 8955.676758	Best loss: 27.712013	Accuracy: 93.32%
4	Validation loss: 6851.161621	Best loss: 27.712013	Accuracy: 94.06%
5	Validation loss: 8857.863281	Best loss: 27.712013	Accuracy: 95.39%
6	Validation loss: 12449.430664	Best loss: 27.712013	Accuracy: 94.92%
7	Validation loss: 5160.800781	Best loss: 27.712013	Accuracy: 93.94%
8	Validation loss: 2495156.750000	Best loss: 27.712013	Accuracy: 67.08%
9	Validation loss: 173163.875000	Best loss: 27.712013	Accuracy: 87.88%
10	Validation loss: 37803.972656	Best loss: 27.712013	Accuracy: 95.07%
11	Validation loss: 18721.080078	Best loss: 27.712013	Accuracy: 97.07%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=6, learning_rate=0.05, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7620>, to

15	Validation loss: 2.464459	Best loss: 1.668957	Accuracy: 18.73%
16	Validation loss: 2.047794	Best loss: 1.668957	Accuracy: 19.27%
17	Validation loss: 2.922874	Best loss: 1.668957	Accuracy: 20.91%
18	Validation loss: 2.102265	Best loss: 1.668957	Accuracy: 22.01%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=2, learning_rate=0.1, activation=<function elu at 0x125c65268>, total=  18.1s
[CV] n_neurons=100, n_hidden_layers=2, learning_rate=0.1, activation=<function elu at 0x125c65268> 
0	Validation loss: 8.174923	Best loss: 8.174923	Accuracy: 40.62%
1	Validation loss: 6.146972	Best loss: 6.146972	Accuracy: 45.97%
2	Validation loss: 2.134912	Best loss: 2.134912	Accuracy: 30.14%
3	Validation loss: 2.383816	Best loss: 2.134912	Accuracy: 22.28%
4	Validation loss: 3.239392	Best loss: 2.134912	Accuracy: 19.08%
5	Validation loss: 2.239773	Best loss: 2.134912	Accuracy: 33.54%
6	Validation loss: 2.036376	Best loss: 2.036376	Accuracy: 27.95%
7	Validation loss: 2.704327	Best loss: 2.036376	Ac

5	Validation loss: 1.804752	Best loss: 1.621263	Accuracy: 20.91%
6	Validation loss: 1.909755	Best loss: 1.621263	Accuracy: 19.27%
7	Validation loss: 2.252261	Best loss: 1.621263	Accuracy: 22.01%
8	Validation loss: 1.937688	Best loss: 1.621263	Accuracy: 19.08%
9	Validation loss: 1.924858	Best loss: 1.621263	Accuracy: 22.01%
10	Validation loss: 1.844979	Best loss: 1.621263	Accuracy: 22.01%
11	Validation loss: 1.978264	Best loss: 1.621263	Accuracy: 19.27%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=6, learning_rate=0.1, activation=<function elu at 0x125c65268>, total=  14.8s
[CV] n_neurons=150, n_hidden_layers=6, learning_rate=0.02, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7620> 
0	Validation loss: 4.901204	Best loss: 4.901204	Accuracy: 67.20%
1	Validation loss: 0.152272	Best loss: 0.152272	Accuracy: 95.43%
2	Validation loss: 0.334681	Best loss: 0.152272	Accuracy: 94.37%
3	Validation loss: 2876.576660	Best loss: 0.152272	Accuracy: 56.06%
4	Validat

1	Validation loss: 0.067535	Best loss: 0.067535	Accuracy: 98.08%
2	Validation loss: 0.096995	Best loss: 0.067535	Accuracy: 97.65%
3	Validation loss: 0.069862	Best loss: 0.067535	Accuracy: 98.28%
4	Validation loss: 0.070557	Best loss: 0.067535	Accuracy: 98.40%
5	Validation loss: 0.070488	Best loss: 0.067535	Accuracy: 98.55%
6	Validation loss: 0.070821	Best loss: 0.067535	Accuracy: 98.28%
7	Validation loss: 0.094266	Best loss: 0.067535	Accuracy: 98.12%
8	Validation loss: 0.098590	Best loss: 0.067535	Accuracy: 98.24%
9	Validation loss: 0.077481	Best loss: 0.067535	Accuracy: 98.28%
10	Validation loss: 0.155069	Best loss: 0.067535	Accuracy: 98.12%
11	Validation loss: 0.084216	Best loss: 0.067535	Accuracy: 98.44%
12	Validation loss: 0.125920	Best loss: 0.067535	Accuracy: 98.20%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=2, learning_rate=0.01, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c79d8>, total=  19.0s
[CV] n_neurons=150, n_hidden_layers=2, learni

0	Validation loss: 0.287127	Best loss: 0.287127	Accuracy: 92.65%
1	Validation loss: 0.175098	Best loss: 0.175098	Accuracy: 97.65%
2	Validation loss: 0.196351	Best loss: 0.175098	Accuracy: 97.30%
3	Validation loss: 0.401580	Best loss: 0.175098	Accuracy: 96.76%
4	Validation loss: 0.385993	Best loss: 0.175098	Accuracy: 97.77%
5	Validation loss: 0.327463	Best loss: 0.175098	Accuracy: 98.01%
6	Validation loss: 0.661817	Best loss: 0.175098	Accuracy: 97.97%
7	Validation loss: 0.550150	Best loss: 0.175098	Accuracy: 97.38%
8	Validation loss: 1.570348	Best loss: 0.175098	Accuracy: 96.25%
9	Validation loss: 0.679434	Best loss: 0.175098	Accuracy: 97.89%
10	Validation loss: 0.765558	Best loss: 0.175098	Accuracy: 97.77%
11	Validation loss: 0.723242	Best loss: 0.175098	Accuracy: 98.16%
12	Validation loss: 0.574852	Best loss: 0.175098	Accuracy: 98.55%
Early stopping!
[CV]  n_neurons=150, n_hidden_layers=2, learning_rate=0.01, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7

5	Validation loss: 4.903808	Best loss: 0.643269	Accuracy: 57.70%
6	Validation loss: 1.562397	Best loss: 0.643269	Accuracy: 55.86%
7	Validation loss: 1.507043	Best loss: 0.643269	Accuracy: 55.28%
8	Validation loss: 1.360579	Best loss: 0.643269	Accuracy: 59.70%
9	Validation loss: 1.733593	Best loss: 0.643269	Accuracy: 53.52%
10	Validation loss: 1.310998	Best loss: 0.643269	Accuracy: 54.53%
11	Validation loss: 1.756903	Best loss: 0.643269	Accuracy: 44.45%
12	Validation loss: 1.021507	Best loss: 0.643269	Accuracy: 58.41%
13	Validation loss: 1.295424	Best loss: 0.643269	Accuracy: 51.25%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=2, learning_rate=0.05, activation=<function elu at 0x125c65268>, total=  15.4s
[CV] n_neurons=100, n_hidden_layers=2, learning_rate=0.05, activation=<function elu at 0x125c65268> 
0	Validation loss: 1.087453	Best loss: 1.087453	Accuracy: 55.04%
1	Validation loss: 1.154865	Best loss: 1.087453	Accuracy: 70.80%
2	Validation loss: 2.200966	Best loss: 1.087453	

13	Validation loss: 0.108267	Best loss: 0.087023	Accuracy: 98.12%
14	Validation loss: 0.103217	Best loss: 0.087023	Accuracy: 98.01%
15	Validation loss: 0.141140	Best loss: 0.087023	Accuracy: 96.95%
16	Validation loss: 0.075286	Best loss: 0.075286	Accuracy: 97.93%
17	Validation loss: 0.129409	Best loss: 0.075286	Accuracy: 97.65%
18	Validation loss: 0.116168	Best loss: 0.075286	Accuracy: 97.65%
19	Validation loss: 0.123179	Best loss: 0.075286	Accuracy: 98.32%
20	Validation loss: 0.111581	Best loss: 0.075286	Accuracy: 97.93%
21	Validation loss: 0.141800	Best loss: 0.075286	Accuracy: 97.77%
22	Validation loss: 0.142180	Best loss: 0.075286	Accuracy: 97.69%
23	Validation loss: 0.134567	Best loss: 0.075286	Accuracy: 97.50%
24	Validation loss: 0.146764	Best loss: 0.075286	Accuracy: 97.58%
25	Validation loss: 0.114230	Best loss: 0.075286	Accuracy: 97.69%
26	Validation loss: 0.126271	Best loss: 0.075286	Accuracy: 97.54%
27	Validation loss: 0.131998	Best loss: 0.075286	Accuracy: 98.28%
Early stop

[CV]  n_neurons=150, n_hidden_layers=6, learning_rate=0.05, activation=<function relu at 0x125c7e598>, total=  27.6s
[CV] n_neurons=150, n_hidden_layers=6, learning_rate=0.05, activation=<function relu at 0x125c7e598> 
0	Validation loss: 0.865589	Best loss: 0.865589	Accuracy: 56.41%
1	Validation loss: 1.619276	Best loss: 0.865589	Accuracy: 19.27%
2	Validation loss: 1.609748	Best loss: 0.865589	Accuracy: 22.01%
3	Validation loss: 1.615066	Best loss: 0.865589	Accuracy: 18.73%
4	Validation loss: 1.614738	Best loss: 0.865589	Accuracy: 22.01%
5	Validation loss: 1.616514	Best loss: 0.865589	Accuracy: 22.01%
6	Validation loss: 1.629260	Best loss: 0.865589	Accuracy: 19.08%
7	Validation loss: 1.612704	Best loss: 0.865589	Accuracy: 18.73%
8	Validation loss: 1.612846	Best loss: 0.865589	Accuracy: 19.27%
9	Validation loss: 1.611298	Best loss: 0.865589	Accuracy: 22.01%
10	Validation loss: 1.610328	Best loss: 0.865589	Accuracy: 19.27%
11	Validation loss: 1.611546	Best loss: 0.865589	Accuracy: 22.01%

4	Validation loss: 1.603168	Best loss: 0.816165	Accuracy: 23.49%
5	Validation loss: 1.609507	Best loss: 0.816165	Accuracy: 23.49%
6	Validation loss: 1.624682	Best loss: 0.816165	Accuracy: 20.76%
7	Validation loss: 1.638216	Best loss: 0.816165	Accuracy: 18.73%
8	Validation loss: 1.633062	Best loss: 0.816165	Accuracy: 20.76%
9	Validation loss: 1.598757	Best loss: 0.816165	Accuracy: 18.73%
10	Validation loss: 1.590511	Best loss: 0.816165	Accuracy: 23.49%
11	Validation loss: 1.599634	Best loss: 0.816165	Accuracy: 23.49%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=4, learning_rate=0.1, activation=<function relu at 0x125c7e598>, total=  15.4s
[CV] n_neurons=100, n_hidden_layers=4, learning_rate=0.1, activation=<function relu at 0x125c7e598> 
0	Validation loss: 1.637712	Best loss: 1.637712	Accuracy: 22.01%
1	Validation loss: 1.629014	Best loss: 1.629014	Accuracy: 19.27%
2	Validation loss: 1.618641	Best loss: 1.618641	Accuracy: 19.27%
3	Validation loss: 1.615541	Best loss: 1.615541	Ac

9	Validation loss: 0.142771	Best loss: 0.076477	Accuracy: 98.01%
10	Validation loss: 0.175796	Best loss: 0.076477	Accuracy: 98.28%
11	Validation loss: 0.135857	Best loss: 0.076477	Accuracy: 98.44%
12	Validation loss: 0.127126	Best loss: 0.076477	Accuracy: 98.32%
13	Validation loss: 0.189785	Best loss: 0.076477	Accuracy: 98.40%
14	Validation loss: 0.270304	Best loss: 0.076477	Accuracy: 98.08%
15	Validation loss: 0.141056	Best loss: 0.076477	Accuracy: 98.51%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=2, learning_rate=0.01, activation=<function elu at 0x125c65268>, total=  12.8s
[CV] n_neurons=50, n_hidden_layers=2, learning_rate=0.01, activation=<function elu at 0x125c65268> 
0	Validation loss: 0.074235	Best loss: 0.074235	Accuracy: 97.85%
1	Validation loss: 0.085433	Best loss: 0.074235	Accuracy: 98.08%
2	Validation loss: 0.105408	Best loss: 0.074235	Accuracy: 97.34%
3	Validation loss: 0.089623	Best loss: 0.074235	Accuracy: 98.28%
4	Validation loss: 0.090220	Best loss: 0.074235	

2	Validation loss: 0.126630	Best loss: 0.126630	Accuracy: 97.03%
3	Validation loss: 0.172662	Best loss: 0.126630	Accuracy: 96.64%
4	Validation loss: 0.129461	Best loss: 0.126630	Accuracy: 97.07%
5	Validation loss: 0.137002	Best loss: 0.126630	Accuracy: 96.52%
6	Validation loss: 0.118538	Best loss: 0.118538	Accuracy: 97.03%
7	Validation loss: 0.091000	Best loss: 0.091000	Accuracy: 97.07%
8	Validation loss: 0.137986	Best loss: 0.091000	Accuracy: 98.05%
9	Validation loss: 0.104134	Best loss: 0.091000	Accuracy: 97.65%
10	Validation loss: 0.202891	Best loss: 0.091000	Accuracy: 97.77%
11	Validation loss: 0.234934	Best loss: 0.091000	Accuracy: 97.50%
12	Validation loss: 0.247384	Best loss: 0.091000	Accuracy: 97.38%
13	Validation loss: 0.159267	Best loss: 0.091000	Accuracy: 97.19%
14	Validation loss: 0.248893	Best loss: 0.091000	Accuracy: 97.81%
15	Validation loss: 0.140309	Best loss: 0.091000	Accuracy: 97.11%
16	Validation loss: 0.137784	Best loss: 0.091000	Accuracy: 97.62%
17	Validation loss

14	Validation loss: 0.241308	Best loss: 0.091477	Accuracy: 98.28%
15	Validation loss: 0.131940	Best loss: 0.091477	Accuracy: 97.30%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=4, learning_rate=0.01, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7620>, total=  16.8s
[CV] n_neurons=50, n_hidden_layers=4, learning_rate=0.01, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7620> 
0	Validation loss: 0.090054	Best loss: 0.090054	Accuracy: 97.77%
1	Validation loss: 0.099206	Best loss: 0.090054	Accuracy: 97.97%
2	Validation loss: 0.077201	Best loss: 0.077201	Accuracy: 97.93%
3	Validation loss: 0.116850	Best loss: 0.077201	Accuracy: 96.68%
4	Validation loss: 0.111029	Best loss: 0.077201	Accuracy: 97.30%
5	Validation loss: 0.081431	Best loss: 0.077201	Accuracy: 98.08%
6	Validation loss: 0.169818	Best loss: 0.077201	Accuracy: 97.65%
7	Validation loss: 0.109413	Best loss: 0.077201	Accuracy: 97.58%
8	Validation loss: 0.092445	Best loss

0	Validation loss: 0.652247	Best loss: 0.652247	Accuracy: 94.64%
1	Validation loss: 0.259268	Best loss: 0.259268	Accuracy: 97.03%
2	Validation loss: 2.664658	Best loss: 0.259268	Accuracy: 95.78%
3	Validation loss: 0.658152	Best loss: 0.259268	Accuracy: 97.50%
4	Validation loss: 1.044078	Best loss: 0.259268	Accuracy: 96.29%
5	Validation loss: 0.663108	Best loss: 0.259268	Accuracy: 97.38%
6	Validation loss: 3.101695	Best loss: 0.259268	Accuracy: 93.94%
7	Validation loss: 1.633014	Best loss: 0.259268	Accuracy: 97.34%
8	Validation loss: 1.725119	Best loss: 0.259268	Accuracy: 97.97%
9	Validation loss: 1.612104	Best loss: 0.259268	Accuracy: 97.97%
10	Validation loss: 1.633060	Best loss: 0.259268	Accuracy: 97.62%
11	Validation loss: 2.014644	Best loss: 0.259268	Accuracy: 97.62%
12	Validation loss: 3.563205	Best loss: 0.259268	Accuracy: 97.62%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=2, learning_rate=0.02, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7

1	Validation loss: 1.989716	Best loss: 1.760455	Accuracy: 18.73%
2	Validation loss: 1.698209	Best loss: 1.698209	Accuracy: 19.27%
3	Validation loss: 2.037113	Best loss: 1.698209	Accuracy: 19.27%
4	Validation loss: 1.748402	Best loss: 1.698209	Accuracy: 19.08%
5	Validation loss: 1.727974	Best loss: 1.698209	Accuracy: 19.08%
6	Validation loss: 1.742245	Best loss: 1.698209	Accuracy: 22.01%
7	Validation loss: 1.936478	Best loss: 1.698209	Accuracy: 20.91%
8	Validation loss: 1.788774	Best loss: 1.698209	Accuracy: 18.73%
9	Validation loss: 1.757687	Best loss: 1.698209	Accuracy: 18.73%
10	Validation loss: 1.788234	Best loss: 1.698209	Accuracy: 22.01%
11	Validation loss: 1.700852	Best loss: 1.698209	Accuracy: 18.73%
12	Validation loss: 1.981314	Best loss: 1.698209	Accuracy: 20.91%
13	Validation loss: 1.818315	Best loss: 1.698209	Accuracy: 22.01%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=6, learning_rate=0.05, activation=<function elu at 0x125c65268>, total=  23.2s
[CV] n_neurons=50, n

3	Validation loss: 0.301411	Best loss: 0.165885	Accuracy: 91.59%
4	Validation loss: 0.138047	Best loss: 0.138047	Accuracy: 96.25%
5	Validation loss: 0.108308	Best loss: 0.108308	Accuracy: 96.91%
6	Validation loss: 0.246153	Best loss: 0.108308	Accuracy: 97.30%
7	Validation loss: 0.908382	Best loss: 0.108308	Accuracy: 70.95%
8	Validation loss: 0.223584	Best loss: 0.108308	Accuracy: 94.41%
9	Validation loss: 0.136208	Best loss: 0.108308	Accuracy: 96.76%
10	Validation loss: 0.142548	Best loss: 0.108308	Accuracy: 96.79%
11	Validation loss: 4.025273	Best loss: 0.108308	Accuracy: 87.80%
12	Validation loss: 0.279693	Best loss: 0.108308	Accuracy: 93.00%
13	Validation loss: 0.236199	Best loss: 0.108308	Accuracy: 96.40%
14	Validation loss: 0.278720	Best loss: 0.108308	Accuracy: 95.43%
15	Validation loss: 0.425620	Best loss: 0.108308	Accuracy: 96.56%
16	Validation loss: 0.602814	Best loss: 0.108308	Accuracy: 97.03%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=4, learning_rate=0.02, activati

7	Validation loss: 1.505320	Best loss: 0.111053	Accuracy: 97.54%
8	Validation loss: 0.491240	Best loss: 0.111053	Accuracy: 96.64%
9	Validation loss: 0.162897	Best loss: 0.111053	Accuracy: 97.42%
10	Validation loss: 0.160172	Best loss: 0.111053	Accuracy: 96.99%
11	Validation loss: 0.146806	Best loss: 0.111053	Accuracy: 98.01%
12	Validation loss: 0.169123	Best loss: 0.111053	Accuracy: 97.34%
13	Validation loss: 0.093574	Best loss: 0.093574	Accuracy: 98.28%
14	Validation loss: 0.120187	Best loss: 0.093574	Accuracy: 98.12%
15	Validation loss: 0.852941	Best loss: 0.093574	Accuracy: 96.48%
16	Validation loss: 0.132369	Best loss: 0.093574	Accuracy: 97.03%
17	Validation loss: 0.101769	Best loss: 0.093574	Accuracy: 97.93%
18	Validation loss: 0.169496	Best loss: 0.093574	Accuracy: 97.54%
19	Validation loss: 0.288150	Best loss: 0.093574	Accuracy: 93.82%
20	Validation loss: 0.382809	Best loss: 0.093574	Accuracy: 94.21%
21	Validation loss: 0.340673	Best loss: 0.093574	Accuracy: 93.35%
22	Validation

7	Validation loss: 1.612707	Best loss: 1.609777	Accuracy: 18.73%
8	Validation loss: 1.612845	Best loss: 1.609777	Accuracy: 19.27%
9	Validation loss: 1.611299	Best loss: 1.609777	Accuracy: 22.01%
10	Validation loss: 1.610328	Best loss: 1.609777	Accuracy: 19.27%
11	Validation loss: 1.611546	Best loss: 1.609777	Accuracy: 22.01%
12	Validation loss: 1.612742	Best loss: 1.609777	Accuracy: 19.27%
13	Validation loss: 1.613987	Best loss: 1.609777	Accuracy: 19.08%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=6, learning_rate=0.05, activation=<function relu at 0x125c7e598>, total=  15.9s
[CV] n_neurons=50, n_hidden_layers=6, learning_rate=0.05, activation=<function relu at 0x125c7e598> 
0	Validation loss: 1.208835	Best loss: 1.208835	Accuracy: 39.29%
1	Validation loss: 1.180306	Best loss: 1.180306	Accuracy: 35.81%
2	Validation loss: 1.616475	Best loss: 1.180306	Accuracy: 19.27%
3	Validation loss: 1.622266	Best loss: 1.180306	Accuracy: 22.01%
4	Validation loss: 1.608986	Best loss: 1.180306	

33	Validation loss: 0.779403	Best loss: 0.767368	Accuracy: 62.04%
34	Validation loss: 0.773933	Best loss: 0.767368	Accuracy: 59.30%
35	Validation loss: 0.800630	Best loss: 0.767368	Accuracy: 60.79%
Early stopping!
[CV]  n_neurons=150, n_hidden_layers=4, learning_rate=0.05, activation=<function relu at 0x125c7e598>, total= 1.2min
[CV] n_neurons=150, n_hidden_layers=4, learning_rate=0.05, activation=<function relu at 0x125c7e598> 
0	Validation loss: 1.668997	Best loss: 1.668997	Accuracy: 21.97%
1	Validation loss: 1.618384	Best loss: 1.618384	Accuracy: 19.27%
2	Validation loss: 1.613710	Best loss: 1.613710	Accuracy: 19.27%
3	Validation loss: 1.622370	Best loss: 1.613710	Accuracy: 22.01%
4	Validation loss: 1.608999	Best loss: 1.608999	Accuracy: 22.01%
5	Validation loss: 1.616001	Best loss: 1.608999	Accuracy: 22.01%
6	Validation loss: 1.615687	Best loss: 1.608999	Accuracy: 19.27%
7	Validation loss: 1.612685	Best loss: 1.608999	Accuracy: 22.01%
8	Validation loss: 1.613897	Best loss: 1.608999

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 55.1min finished


0	Validation loss: 0.112554	Best loss: 0.112554	Accuracy: 97.34%
1	Validation loss: 0.086099	Best loss: 0.086099	Accuracy: 97.69%
2	Validation loss: 0.130937	Best loss: 0.086099	Accuracy: 98.05%
3	Validation loss: 0.069808	Best loss: 0.069808	Accuracy: 98.20%
4	Validation loss: 0.060656	Best loss: 0.060656	Accuracy: 98.83%
5	Validation loss: 0.098828	Best loss: 0.060656	Accuracy: 97.89%
6	Validation loss: 0.099327	Best loss: 0.060656	Accuracy: 98.05%
7	Validation loss: 0.092333	Best loss: 0.060656	Accuracy: 98.44%
8	Validation loss: 0.092289	Best loss: 0.060656	Accuracy: 98.08%
9	Validation loss: 0.057853	Best loss: 0.057853	Accuracy: 98.51%
10	Validation loss: 0.386360	Best loss: 0.057853	Accuracy: 97.93%
11	Validation loss: 0.143013	Best loss: 0.057853	Accuracy: 98.48%
12	Validation loss: 0.117396	Best loss: 0.057853	Accuracy: 98.44%
13	Validation loss: 0.175498	Best loss: 0.057853	Accuracy: 98.05%
14	Validation loss: 0.089757	Best loss: 0.057853	Accuracy: 98.59%
15	Validation loss: 

RandomizedSearchCV(cv=3, error_score='raise-deprecating',
          estimator=DNNClassifier(activation=<function elu at 0x125c65268>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0xb305edf28>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42),
          fit_params=None, iid='warn', n_iter=50, n_jobs=None,
          param_distributions={'n_neurons': [50, 100, 150], 'learning_rate': [0.01, 0.02, 0.05, 0.1], 'activation': [<function relu at 0x125c7e598>, <function elu at 0x125c65268>, <function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c79d8>, <function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7620>], 'n_hidden_layers': [2, 4, 6]},
          pre_dispatch='2*n_jobs', random_state=42, refit=True,
          return_train_score='warn', scoring=None, verbose=2)

In [38]:
rnd_search.best_params_

{'activation': <function __main__.leaky_relu.<locals>.parametrized_leaky_relu>,
 'learning_rate': 0.01,
 'n_hidden_layers': 2,
 'n_neurons': 100}

In [40]:
y_pred = rnd_search.predict(X_test04)
accuracy_score(y_test04, y_pred)

0.9859894921190894

In [41]:
rnd_search.best_estimator_.save("./mnist04_best_dnn_model")

### Comparison with/without batch normalization

In [42]:
rnd_search.best_estimator_

DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c79d8>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0xb2c86a320>,
       learning_rate=0.01, n_hidden_layers=2, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [56]:
dnn_classifier = DNNClassifier(n_hidden_layers=2, n_neurons = 100, learning_rate = 0.01, 
                               activation = leaky_relu(alpha=0.01))
dnn_classifier.fit(X_train04, y_train04, n_epochs=1000, X_valid=X_val04, y_valid=y_val04)

0	Validation loss: 0.061794	Best loss: 0.061794	Accuracy: 98.32%
1	Validation loss: 0.073716	Best loss: 0.061794	Accuracy: 97.73%
2	Validation loss: 0.061774	Best loss: 0.061774	Accuracy: 98.36%
3	Validation loss: 0.072710	Best loss: 0.061774	Accuracy: 98.48%
4	Validation loss: 0.062690	Best loss: 0.061774	Accuracy: 98.44%
5	Validation loss: 0.093599	Best loss: 0.061774	Accuracy: 97.73%
6	Validation loss: 0.117929	Best loss: 0.061774	Accuracy: 98.44%
7	Validation loss: 0.068822	Best loss: 0.061774	Accuracy: 98.75%
8	Validation loss: 0.156111	Best loss: 0.061774	Accuracy: 97.54%
9	Validation loss: 0.053247	Best loss: 0.053247	Accuracy: 98.63%
10	Validation loss: 0.047688	Best loss: 0.047688	Accuracy: 98.91%
11	Validation loss: 0.087656	Best loss: 0.047688	Accuracy: 98.67%
12	Validation loss: 0.072743	Best loss: 0.047688	Accuracy: 98.59%
13	Validation loss: 0.085336	Best loss: 0.047688	Accuracy: 98.71%
14	Validation loss: 0.051302	Best loss: 0.047688	Accuracy: 98.75%
15	Validation loss: 

DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d56cc80>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0xb305edf28>,
       learning_rate=0.01, n_hidden_layers=2, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=None)

In [57]:
y_pred = dnn_classifier.predict(X_test04)
accuracy_score(y_test04, y_pred)

0.9846273594084453

In [58]:
dnn_BatchNorm_clf = DNNClassifier(n_hidden_layers=2, n_neurons = 100, learning_rate = 0.01, 
                               activation = leaky_relu(alpha=0.01), batch_norm_momentum=0.9)
dnn_BatchNorm_clf.fit(X_train04, y_train04, n_epochs=1000, X_valid=X_val04, y_valid=y_val04)

0	Validation loss: 0.049123	Best loss: 0.049123	Accuracy: 98.51%
1	Validation loss: 0.040708	Best loss: 0.040708	Accuracy: 98.59%
2	Validation loss: 0.040982	Best loss: 0.040708	Accuracy: 98.67%
3	Validation loss: 0.040708	Best loss: 0.040708	Accuracy: 98.71%
4	Validation loss: 0.031689	Best loss: 0.031689	Accuracy: 98.94%
5	Validation loss: 0.047257	Best loss: 0.031689	Accuracy: 98.87%
6	Validation loss: 0.029413	Best loss: 0.029413	Accuracy: 99.06%
7	Validation loss: 0.040007	Best loss: 0.029413	Accuracy: 98.98%
8	Validation loss: 0.029558	Best loss: 0.029413	Accuracy: 99.10%
9	Validation loss: 0.044492	Best loss: 0.029413	Accuracy: 98.98%
10	Validation loss: 0.034986	Best loss: 0.029413	Accuracy: 99.14%
11	Validation loss: 0.030472	Best loss: 0.029413	Accuracy: 99.10%
12	Validation loss: 0.028867	Best loss: 0.028867	Accuracy: 99.10%
13	Validation loss: 0.022478	Best loss: 0.022478	Accuracy: 99.37%
14	Validation loss: 0.026996	Best loss: 0.022478	Accuracy: 99.45%
15	Validation loss: 

DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d554598>,
       batch_norm_momentum=0.9, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0xb305edf28>,
       learning_rate=0.01, n_hidden_layers=2, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=None)

In [59]:
y_pred = dnn_BatchNorm_clf.predict(X_test04)
accuracy_score(y_test04, y_pred)

0.9931893364467795

RandomSearch with Batch Normalization

In [60]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [80, 90, 100, 110, 120],
    "batch_size": [20, 100, 500],
    "learning_rate": [0.003, 0.01, 0.03],
    "activation": [tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    "n_hidden_layers": [2, 3, 4, 5],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "batch_norm_momentum": [0.9, 0.95, 0.99],
}

rnd_search_bn = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50, cv=3,
                                   random_state=42, verbose=2)
rnd_search_bn.fit(X_train04, y_train04, X_valid=X_val04, y_valid=y_val04, n_epochs=1000)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=80, n_hidden_layers=5, learning_rate=0.003, batch_size=500, batch_norm_momentum=0.9, activation=<function elu at 0x125c65268> 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


0	Validation loss: 0.082314	Best loss: 0.082314	Accuracy: 97.30%
1	Validation loss: 0.071050	Best loss: 0.071050	Accuracy: 97.81%
2	Validation loss: 0.054695	Best loss: 0.054695	Accuracy: 98.24%
3	Validation loss: 0.048123	Best loss: 0.048123	Accuracy: 98.44%
4	Validation loss: 0.045360	Best loss: 0.045360	Accuracy: 98.48%
5	Validation loss: 0.061085	Best loss: 0.045360	Accuracy: 97.89%
6	Validation loss: 0.070212	Best loss: 0.045360	Accuracy: 97.85%
7	Validation loss: 0.051763	Best loss: 0.045360	Accuracy: 98.28%
8	Validation loss: 0.039991	Best loss: 0.039991	Accuracy: 98.91%
9	Validation loss: 0.044530	Best loss: 0.039991	Accuracy: 98.75%
10	Validation loss: 0.041383	Best loss: 0.039991	Accuracy: 98.67%
11	Validation loss: 0.048564	Best loss: 0.039991	Accuracy: 98.67%
12	Validation loss: 0.040174	Best loss: 0.039991	Accuracy: 98.79%
13	Validation loss: 0.042781	Best loss: 0.039991	Accuracy: 99.02%
14	Validation loss: 0.041190	Best loss: 0.039991	Accuracy: 98.87%
15	Validation loss: 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   17.6s remaining:    0.0s


0	Validation loss: 0.078693	Best loss: 0.078693	Accuracy: 97.46%
1	Validation loss: 0.054710	Best loss: 0.054710	Accuracy: 98.32%
2	Validation loss: 0.054882	Best loss: 0.054710	Accuracy: 98.28%
3	Validation loss: 0.045572	Best loss: 0.045572	Accuracy: 98.83%
4	Validation loss: 0.044001	Best loss: 0.044001	Accuracy: 98.40%
5	Validation loss: 0.039862	Best loss: 0.039862	Accuracy: 98.63%
6	Validation loss: 0.042154	Best loss: 0.039862	Accuracy: 98.75%
7	Validation loss: 0.041349	Best loss: 0.039862	Accuracy: 98.83%
8	Validation loss: 0.046293	Best loss: 0.039862	Accuracy: 98.83%
9	Validation loss: 0.035974	Best loss: 0.035974	Accuracy: 98.79%
10	Validation loss: 0.037526	Best loss: 0.035974	Accuracy: 98.75%
11	Validation loss: 0.042792	Best loss: 0.035974	Accuracy: 98.87%
12	Validation loss: 0.040872	Best loss: 0.035974	Accuracy: 98.91%
13	Validation loss: 0.031125	Best loss: 0.031125	Accuracy: 98.91%
14	Validation loss: 0.041382	Best loss: 0.031125	Accuracy: 98.71%
15	Validation loss: 

27	Validation loss: 0.051007	Best loss: 0.039121	Accuracy: 99.02%
28	Validation loss: 0.049642	Best loss: 0.039121	Accuracy: 98.87%
29	Validation loss: 0.056047	Best loss: 0.039121	Accuracy: 98.98%
30	Validation loss: 0.057572	Best loss: 0.039121	Accuracy: 98.94%
31	Validation loss: 0.062178	Best loss: 0.039121	Accuracy: 98.79%
32	Validation loss: 0.126563	Best loss: 0.039121	Accuracy: 98.48%
33	Validation loss: 0.072048	Best loss: 0.039121	Accuracy: 98.91%
34	Validation loss: 0.077400	Best loss: 0.039121	Accuracy: 98.75%
35	Validation loss: 0.056872	Best loss: 0.039121	Accuracy: 99.06%
36	Validation loss: 0.047515	Best loss: 0.039121	Accuracy: 98.91%
37	Validation loss: 0.042923	Best loss: 0.039121	Accuracy: 99.30%
Early stopping!
[CV]  n_neurons=120, n_hidden_layers=5, learning_rate=0.03, batch_size=100, batch_norm_momentum=0.99, activation=<function elu at 0x125c65268>, total= 1.0min
[CV] n_neurons=120, n_hidden_layers=5, learning_rate=0.03, batch_size=100, batch_norm_momentum=0.99,

30	Validation loss: 0.030202	Best loss: 0.028845	Accuracy: 99.18%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=4, learning_rate=0.003, batch_size=500, batch_norm_momentum=0.99, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7730>, total=  28.0s
[CV] n_neurons=100, n_hidden_layers=3, learning_rate=0.01, batch_size=20, batch_norm_momentum=0.9, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0> 
0	Validation loss: 0.078365	Best loss: 0.078365	Accuracy: 97.62%
1	Validation loss: 0.055894	Best loss: 0.055894	Accuracy: 98.24%
2	Validation loss: 0.073542	Best loss: 0.055894	Accuracy: 98.05%
3	Validation loss: 0.063043	Best loss: 0.055894	Accuracy: 98.24%
4	Validation loss: 0.044864	Best loss: 0.044864	Accuracy: 98.67%
5	Validation loss: 0.044796	Best loss: 0.044796	Accuracy: 98.67%
6	Validation loss: 0.039730	Best loss: 0.039730	Accuracy: 98.75%
7	Validation loss: 0.037619	Best loss: 0.037619	Accuracy: 99.10%
8	Validation loss:

18	Validation loss: 0.048655	Best loss: 0.041902	Accuracy: 98.79%
19	Validation loss: 0.044488	Best loss: 0.041902	Accuracy: 98.91%
20	Validation loss: 0.073479	Best loss: 0.041902	Accuracy: 98.71%
21	Validation loss: 0.047249	Best loss: 0.041902	Accuracy: 98.83%
22	Validation loss: 0.055115	Best loss: 0.041902	Accuracy: 98.98%
23	Validation loss: 0.186011	Best loss: 0.041902	Accuracy: 97.15%
24	Validation loss: 0.061443	Best loss: 0.041902	Accuracy: 98.83%
25	Validation loss: 0.092973	Best loss: 0.041902	Accuracy: 98.28%
26	Validation loss: 0.048371	Best loss: 0.041902	Accuracy: 99.02%
27	Validation loss: 0.042544	Best loss: 0.041902	Accuracy: 99.10%
Early stopping!
[CV]  n_neurons=80, n_hidden_layers=4, learning_rate=0.01, batch_size=500, batch_norm_momentum=0.99, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7730>, total=  15.7s
[CV] n_neurons=80, n_hidden_layers=4, learning_rate=0.01, batch_size=500, batch_norm_momentum=0.99, activation=<function leaky_

11	Validation loss: 0.036930	Best loss: 0.036930	Accuracy: 99.02%
12	Validation loss: 0.049784	Best loss: 0.036930	Accuracy: 98.75%
13	Validation loss: 0.036862	Best loss: 0.036862	Accuracy: 99.10%
14	Validation loss: 0.045335	Best loss: 0.036862	Accuracy: 98.75%
15	Validation loss: 0.048900	Best loss: 0.036862	Accuracy: 98.94%
16	Validation loss: 0.050669	Best loss: 0.036862	Accuracy: 98.83%
17	Validation loss: 0.040372	Best loss: 0.036862	Accuracy: 99.10%
18	Validation loss: 0.060895	Best loss: 0.036862	Accuracy: 98.59%
19	Validation loss: 0.064184	Best loss: 0.036862	Accuracy: 98.48%
20	Validation loss: 0.054854	Best loss: 0.036862	Accuracy: 98.98%
21	Validation loss: 0.061945	Best loss: 0.036862	Accuracy: 98.91%
22	Validation loss: 0.062857	Best loss: 0.036862	Accuracy: 98.55%
23	Validation loss: 0.035851	Best loss: 0.035851	Accuracy: 98.98%
24	Validation loss: 0.043757	Best loss: 0.035851	Accuracy: 98.98%
25	Validation loss: 0.064171	Best loss: 0.035851	Accuracy: 99.02%
26	Validat

2	Validation loss: 0.053180	Best loss: 0.053180	Accuracy: 98.28%
3	Validation loss: 0.036035	Best loss: 0.036035	Accuracy: 98.79%
4	Validation loss: 0.036629	Best loss: 0.036035	Accuracy: 98.94%
5	Validation loss: 0.042967	Best loss: 0.036035	Accuracy: 98.83%
6	Validation loss: 0.050573	Best loss: 0.036035	Accuracy: 98.75%
7	Validation loss: 0.045273	Best loss: 0.036035	Accuracy: 98.94%
8	Validation loss: 0.051396	Best loss: 0.036035	Accuracy: 98.79%
9	Validation loss: 0.056698	Best loss: 0.036035	Accuracy: 98.71%
10	Validation loss: 0.044307	Best loss: 0.036035	Accuracy: 98.98%
11	Validation loss: 0.050576	Best loss: 0.036035	Accuracy: 98.71%
12	Validation loss: 0.052364	Best loss: 0.036035	Accuracy: 98.79%
13	Validation loss: 0.057068	Best loss: 0.036035	Accuracy: 98.67%
14	Validation loss: 0.047216	Best loss: 0.036035	Accuracy: 98.91%
Early stopping!
[CV]  n_neurons=90, n_hidden_layers=4, learning_rate=0.01, batch_size=100, batch_norm_momentum=0.95, activation=<function leaky_relu.<

0	Validation loss: 0.070646	Best loss: 0.070646	Accuracy: 97.89%
1	Validation loss: 0.063875	Best loss: 0.063875	Accuracy: 98.05%
2	Validation loss: 0.066650	Best loss: 0.063875	Accuracy: 97.89%
3	Validation loss: 0.045321	Best loss: 0.045321	Accuracy: 98.55%
4	Validation loss: 0.036026	Best loss: 0.036026	Accuracy: 98.79%
5	Validation loss: 0.042276	Best loss: 0.036026	Accuracy: 98.71%
6	Validation loss: 0.048797	Best loss: 0.036026	Accuracy: 98.67%
7	Validation loss: 0.039687	Best loss: 0.036026	Accuracy: 99.02%
8	Validation loss: 0.044084	Best loss: 0.036026	Accuracy: 98.87%
9	Validation loss: 0.045001	Best loss: 0.036026	Accuracy: 98.63%
10	Validation loss: 0.039334	Best loss: 0.036026	Accuracy: 98.94%
11	Validation loss: 0.039832	Best loss: 0.036026	Accuracy: 99.02%
12	Validation loss: 0.040641	Best loss: 0.036026	Accuracy: 98.98%
13	Validation loss: 0.073865	Best loss: 0.036026	Accuracy: 98.55%
14	Validation loss: 0.043820	Best loss: 0.036026	Accuracy: 98.79%
15	Validation loss: 

12	Validation loss: 0.041310	Best loss: 0.036459	Accuracy: 99.02%
13	Validation loss: 0.054682	Best loss: 0.036459	Accuracy: 98.79%
14	Validation loss: 0.039639	Best loss: 0.036459	Accuracy: 98.91%
15	Validation loss: 0.053051	Best loss: 0.036459	Accuracy: 98.94%
16	Validation loss: 0.040675	Best loss: 0.036459	Accuracy: 98.91%
17	Validation loss: 0.042795	Best loss: 0.036459	Accuracy: 98.91%
Early stopping!
[CV]  n_neurons=90, n_hidden_layers=4, learning_rate=0.01, batch_size=100, batch_norm_momentum=0.95, activation=<function elu at 0x125c65268>, total=  20.9s
[CV] n_neurons=80, n_hidden_layers=2, learning_rate=0.01, batch_size=100, batch_norm_momentum=0.9, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0> 
0	Validation loss: 0.060490	Best loss: 0.060490	Accuracy: 98.01%
1	Validation loss: 0.055942	Best loss: 0.055942	Accuracy: 98.12%
2	Validation loss: 0.042255	Best loss: 0.042255	Accuracy: 98.79%
3	Validation loss: 0.039633	Best loss: 0.039633	Accurac

2	Validation loss: 0.039588	Best loss: 0.039588	Accuracy: 98.59%
3	Validation loss: 0.034794	Best loss: 0.034794	Accuracy: 98.87%
4	Validation loss: 0.038998	Best loss: 0.034794	Accuracy: 98.71%
5	Validation loss: 0.030354	Best loss: 0.030354	Accuracy: 98.94%
6	Validation loss: 0.030271	Best loss: 0.030271	Accuracy: 99.10%
7	Validation loss: 0.033780	Best loss: 0.030271	Accuracy: 98.94%
8	Validation loss: 0.036073	Best loss: 0.030271	Accuracy: 98.94%
9	Validation loss: 0.034199	Best loss: 0.030271	Accuracy: 99.06%
10	Validation loss: 0.032898	Best loss: 0.030271	Accuracy: 99.18%
11	Validation loss: 0.033171	Best loss: 0.030271	Accuracy: 99.02%
12	Validation loss: 0.033450	Best loss: 0.030271	Accuracy: 99.06%
13	Validation loss: 0.033562	Best loss: 0.030271	Accuracy: 99.06%
14	Validation loss: 0.033471	Best loss: 0.030271	Accuracy: 99.06%
15	Validation loss: 0.034848	Best loss: 0.030271	Accuracy: 99.02%
16	Validation loss: 0.032522	Best loss: 0.030271	Accuracy: 99.02%
17	Validation loss

1	Validation loss: 0.077764	Best loss: 0.077764	Accuracy: 97.89%
2	Validation loss: 0.056320	Best loss: 0.056320	Accuracy: 98.63%
3	Validation loss: 0.074790	Best loss: 0.056320	Accuracy: 98.01%
4	Validation loss: 0.043743	Best loss: 0.043743	Accuracy: 98.79%
5	Validation loss: 0.039700	Best loss: 0.039700	Accuracy: 98.91%
6	Validation loss: 0.037239	Best loss: 0.037239	Accuracy: 98.75%
7	Validation loss: 0.034802	Best loss: 0.034802	Accuracy: 98.94%
8	Validation loss: 0.041730	Best loss: 0.034802	Accuracy: 99.06%
9	Validation loss: 0.038189	Best loss: 0.034802	Accuracy: 99.14%
10	Validation loss: 0.048274	Best loss: 0.034802	Accuracy: 98.87%
11	Validation loss: 0.034473	Best loss: 0.034473	Accuracy: 99.06%
12	Validation loss: 0.046466	Best loss: 0.034473	Accuracy: 98.83%
13	Validation loss: 0.038304	Best loss: 0.034473	Accuracy: 98.98%
14	Validation loss: 0.038895	Best loss: 0.034473	Accuracy: 99.26%
15	Validation loss: 0.050151	Best loss: 0.034473	Accuracy: 98.87%
16	Validation loss:

2	Validation loss: 0.048639	Best loss: 0.048639	Accuracy: 98.36%
3	Validation loss: 0.058412	Best loss: 0.048639	Accuracy: 98.28%
4	Validation loss: 0.037438	Best loss: 0.037438	Accuracy: 98.83%
5	Validation loss: 0.052172	Best loss: 0.037438	Accuracy: 98.63%
6	Validation loss: 0.036348	Best loss: 0.036348	Accuracy: 98.79%
7	Validation loss: 0.037469	Best loss: 0.036348	Accuracy: 98.94%
8	Validation loss: 0.039386	Best loss: 0.036348	Accuracy: 98.91%
9	Validation loss: 0.039138	Best loss: 0.036348	Accuracy: 98.91%
10	Validation loss: 0.045111	Best loss: 0.036348	Accuracy: 98.79%
11	Validation loss: 0.029238	Best loss: 0.029238	Accuracy: 99.26%
12	Validation loss: 0.032708	Best loss: 0.029238	Accuracy: 99.06%
13	Validation loss: 0.034601	Best loss: 0.029238	Accuracy: 99.18%
14	Validation loss: 0.036875	Best loss: 0.029238	Accuracy: 99.06%
15	Validation loss: 0.031708	Best loss: 0.029238	Accuracy: 99.26%
16	Validation loss: 0.058297	Best loss: 0.029238	Accuracy: 98.75%
17	Validation loss

47	Validation loss: 0.033833	Best loss: 0.029975	Accuracy: 99.14%
48	Validation loss: 0.042684	Best loss: 0.029975	Accuracy: 99.02%
49	Validation loss: 0.038690	Best loss: 0.029975	Accuracy: 99.14%
50	Validation loss: 0.036706	Best loss: 0.029975	Accuracy: 99.10%
Early stopping!
[CV]  n_neurons=120, n_hidden_layers=5, learning_rate=0.01, batch_size=20, batch_norm_momentum=0.95, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0>, total= 3.8min
[CV] n_neurons=120, n_hidden_layers=5, learning_rate=0.01, batch_size=20, batch_norm_momentum=0.95, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0> 
0	Validation loss: 0.082914	Best loss: 0.082914	Accuracy: 97.85%
1	Validation loss: 0.063535	Best loss: 0.063535	Accuracy: 97.89%
2	Validation loss: 0.059998	Best loss: 0.059998	Accuracy: 97.97%
3	Validation loss: 0.055281	Best loss: 0.055281	Accuracy: 98.59%
4	Validation loss: 0.044707	Best loss: 0.044707	Accuracy: 98.48%
5	Validation los

6	Validation loss: 0.136406	Best loss: 0.047718	Accuracy: 96.64%
7	Validation loss: 0.039558	Best loss: 0.039558	Accuracy: 98.87%
8	Validation loss: 0.052795	Best loss: 0.039558	Accuracy: 98.55%
9	Validation loss: 0.043408	Best loss: 0.039558	Accuracy: 98.75%
10	Validation loss: 0.066298	Best loss: 0.039558	Accuracy: 98.05%
11	Validation loss: 0.044623	Best loss: 0.039558	Accuracy: 98.71%
12	Validation loss: 0.048691	Best loss: 0.039558	Accuracy: 98.98%
13	Validation loss: 0.039562	Best loss: 0.039558	Accuracy: 98.94%
14	Validation loss: 0.061315	Best loss: 0.039558	Accuracy: 98.67%
15	Validation loss: 0.039117	Best loss: 0.039117	Accuracy: 98.98%
16	Validation loss: 0.076495	Best loss: 0.039117	Accuracy: 98.71%
17	Validation loss: 0.049246	Best loss: 0.039117	Accuracy: 98.91%
18	Validation loss: 0.036711	Best loss: 0.036711	Accuracy: 98.98%
19	Validation loss: 0.043441	Best loss: 0.036711	Accuracy: 99.14%
20	Validation loss: 0.090536	Best loss: 0.036711	Accuracy: 98.01%
21	Validation 

15	Validation loss: 0.052416	Best loss: 0.031023	Accuracy: 98.98%
Early stopping!
[CV]  n_neurons=110, n_hidden_layers=5, learning_rate=0.01, batch_size=100, batch_norm_momentum=0.95, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0>, total=  28.9s
[CV] n_neurons=110, n_hidden_layers=5, learning_rate=0.01, batch_size=100, batch_norm_momentum=0.95, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0> 
0	Validation loss: 0.079256	Best loss: 0.079256	Accuracy: 97.73%
1	Validation loss: 0.060502	Best loss: 0.060502	Accuracy: 98.28%
2	Validation loss: 0.047731	Best loss: 0.047731	Accuracy: 98.20%
3	Validation loss: 0.061005	Best loss: 0.047731	Accuracy: 98.08%
4	Validation loss: 0.048224	Best loss: 0.047731	Accuracy: 98.63%
5	Validation loss: 0.055931	Best loss: 0.047731	Accuracy: 98.40%
6	Validation loss: 0.036896	Best loss: 0.036896	Accuracy: 98.91%
7	Validation loss: 0.051967	Best loss: 0.036896	Accuracy: 98.94%
8	Validation loss

22	Validation loss: 0.070301	Best loss: 0.041160	Accuracy: 98.55%
Early stopping!
[CV]  n_neurons=120, n_hidden_layers=3, learning_rate=0.03, batch_size=100, batch_norm_momentum=0.95, activation=<function elu at 0x125c65268>, total=  24.3s
[CV] n_neurons=120, n_hidden_layers=3, learning_rate=0.03, batch_size=100, batch_norm_momentum=0.95, activation=<function elu at 0x125c65268> 
0	Validation loss: 0.100434	Best loss: 0.100434	Accuracy: 97.22%
1	Validation loss: 0.063758	Best loss: 0.063758	Accuracy: 97.89%
2	Validation loss: 0.060429	Best loss: 0.060429	Accuracy: 98.08%
3	Validation loss: 0.049297	Best loss: 0.049297	Accuracy: 98.51%
4	Validation loss: 0.064105	Best loss: 0.049297	Accuracy: 98.36%
5	Validation loss: 0.044971	Best loss: 0.044971	Accuracy: 98.63%
6	Validation loss: 0.051911	Best loss: 0.044971	Accuracy: 98.48%
7	Validation loss: 0.096074	Best loss: 0.044971	Accuracy: 97.89%
8	Validation loss: 0.045858	Best loss: 0.044971	Accuracy: 98.71%
9	Validation loss: 0.048140	Best

19	Validation loss: 0.040347	Best loss: 0.040347	Accuracy: 98.98%
20	Validation loss: 0.043122	Best loss: 0.040347	Accuracy: 98.91%
21	Validation loss: 0.043185	Best loss: 0.040347	Accuracy: 98.91%
22	Validation loss: 0.041772	Best loss: 0.040347	Accuracy: 98.94%
23	Validation loss: 0.041925	Best loss: 0.040347	Accuracy: 98.98%
24	Validation loss: 0.043636	Best loss: 0.040347	Accuracy: 98.98%
25	Validation loss: 0.042523	Best loss: 0.040347	Accuracy: 98.98%
26	Validation loss: 0.043346	Best loss: 0.040347	Accuracy: 98.98%
27	Validation loss: 0.044409	Best loss: 0.040347	Accuracy: 98.98%
28	Validation loss: 0.043134	Best loss: 0.040347	Accuracy: 98.98%
29	Validation loss: 0.044635	Best loss: 0.040347	Accuracy: 99.02%
30	Validation loss: 0.043393	Best loss: 0.040347	Accuracy: 99.02%
Early stopping!
[CV]  n_neurons=110, n_hidden_layers=2, learning_rate=0.003, batch_size=500, batch_norm_momentum=0.95, activation=<function elu at 0x125c65268>, total=  14.2s
[CV] n_neurons=120, n_hidden_laye

18	Validation loss: 0.045259	Best loss: 0.036936	Accuracy: 98.87%
19	Validation loss: 0.044240	Best loss: 0.036936	Accuracy: 98.98%
20	Validation loss: 0.047102	Best loss: 0.036936	Accuracy: 98.98%
21	Validation loss: 0.037078	Best loss: 0.036936	Accuracy: 99.02%
22	Validation loss: 0.040338	Best loss: 0.036936	Accuracy: 98.91%
23	Validation loss: 0.048699	Best loss: 0.036936	Accuracy: 98.83%
24	Validation loss: 0.052641	Best loss: 0.036936	Accuracy: 98.48%
Early stopping!
[CV]  n_neurons=80, n_hidden_layers=2, learning_rate=0.003, batch_size=100, batch_norm_momentum=0.95, activation=<function elu at 0x125c65268>, total=  30.5s
[CV] n_neurons=80, n_hidden_layers=2, learning_rate=0.003, batch_size=100, batch_norm_momentum=0.95, activation=<function elu at 0x125c65268> 
0	Validation loss: 0.081119	Best loss: 0.081119	Accuracy: 97.73%
1	Validation loss: 0.077528	Best loss: 0.077528	Accuracy: 97.50%
2	Validation loss: 0.057437	Best loss: 0.057437	Accuracy: 98.16%
3	Validation loss: 0.04429

2	Validation loss: 0.058208	Best loss: 0.058208	Accuracy: 98.20%
3	Validation loss: 0.038785	Best loss: 0.038785	Accuracy: 98.59%
4	Validation loss: 0.037872	Best loss: 0.037872	Accuracy: 98.87%
5	Validation loss: 0.035601	Best loss: 0.035601	Accuracy: 98.71%
6	Validation loss: 0.033471	Best loss: 0.033471	Accuracy: 98.98%
7	Validation loss: 0.040291	Best loss: 0.033471	Accuracy: 98.59%
8	Validation loss: 0.032451	Best loss: 0.032451	Accuracy: 98.75%
9	Validation loss: 0.034004	Best loss: 0.032451	Accuracy: 98.91%
10	Validation loss: 0.032044	Best loss: 0.032044	Accuracy: 98.91%
11	Validation loss: 0.032428	Best loss: 0.032044	Accuracy: 98.94%
12	Validation loss: 0.032565	Best loss: 0.032044	Accuracy: 98.98%
13	Validation loss: 0.030083	Best loss: 0.030083	Accuracy: 99.14%
14	Validation loss: 0.030689	Best loss: 0.030083	Accuracy: 99.02%
15	Validation loss: 0.032418	Best loss: 0.030083	Accuracy: 99.06%
16	Validation loss: 0.032239	Best loss: 0.030083	Accuracy: 99.06%
17	Validation loss

19	Validation loss: 0.040068	Best loss: 0.033067	Accuracy: 98.98%
20	Validation loss: 0.039672	Best loss: 0.033067	Accuracy: 98.87%
21	Validation loss: 0.032938	Best loss: 0.032938	Accuracy: 99.18%
22	Validation loss: 0.055710	Best loss: 0.032938	Accuracy: 98.48%
23	Validation loss: 0.049513	Best loss: 0.032938	Accuracy: 98.91%
24	Validation loss: 0.028300	Best loss: 0.028300	Accuracy: 99.18%
25	Validation loss: 0.040999	Best loss: 0.028300	Accuracy: 99.18%
26	Validation loss: 0.034339	Best loss: 0.028300	Accuracy: 99.18%
27	Validation loss: 0.041839	Best loss: 0.028300	Accuracy: 99.14%
28	Validation loss: 0.043178	Best loss: 0.028300	Accuracy: 99.06%
29	Validation loss: 0.049534	Best loss: 0.028300	Accuracy: 99.06%
30	Validation loss: 0.037452	Best loss: 0.028300	Accuracy: 99.06%
31	Validation loss: 0.035419	Best loss: 0.028300	Accuracy: 99.22%
32	Validation loss: 0.048046	Best loss: 0.028300	Accuracy: 98.91%
33	Validation loss: 0.047010	Best loss: 0.028300	Accuracy: 98.91%
34	Validat

0	Validation loss: 0.143345	Best loss: 0.143345	Accuracy: 96.17%
1	Validation loss: 0.141373	Best loss: 0.141373	Accuracy: 96.68%
2	Validation loss: 0.093315	Best loss: 0.093315	Accuracy: 97.58%
3	Validation loss: 0.059139	Best loss: 0.059139	Accuracy: 98.12%
4	Validation loss: 0.057299	Best loss: 0.057299	Accuracy: 98.24%
5	Validation loss: 0.053550	Best loss: 0.053550	Accuracy: 98.48%
6	Validation loss: 0.056019	Best loss: 0.053550	Accuracy: 98.05%
7	Validation loss: 0.048512	Best loss: 0.048512	Accuracy: 98.36%
8	Validation loss: 0.045462	Best loss: 0.045462	Accuracy: 98.55%
9	Validation loss: 0.048579	Best loss: 0.045462	Accuracy: 98.59%
10	Validation loss: 0.041216	Best loss: 0.041216	Accuracy: 98.91%
11	Validation loss: 0.046828	Best loss: 0.041216	Accuracy: 98.71%
12	Validation loss: 0.044372	Best loss: 0.041216	Accuracy: 98.63%
13	Validation loss: 0.044362	Best loss: 0.041216	Accuracy: 98.75%
14	Validation loss: 0.047430	Best loss: 0.041216	Accuracy: 98.71%
15	Validation loss: 

[CV]  n_neurons=80, n_hidden_layers=4, learning_rate=0.003, batch_size=100, batch_norm_momentum=0.9, activation=<function elu at 0x125c65268>, total=  18.6s
[CV] n_neurons=80, n_hidden_layers=4, learning_rate=0.003, batch_size=100, batch_norm_momentum=0.9, activation=<function elu at 0x125c65268> 
0	Validation loss: 0.086210	Best loss: 0.086210	Accuracy: 96.72%
1	Validation loss: 0.054511	Best loss: 0.054511	Accuracy: 98.01%
2	Validation loss: 0.054936	Best loss: 0.054511	Accuracy: 98.40%
3	Validation loss: 0.064003	Best loss: 0.054511	Accuracy: 98.12%
4	Validation loss: 0.052582	Best loss: 0.052582	Accuracy: 98.40%
5	Validation loss: 0.031940	Best loss: 0.031940	Accuracy: 98.91%
6	Validation loss: 0.041562	Best loss: 0.031940	Accuracy: 98.91%
7	Validation loss: 0.035638	Best loss: 0.031940	Accuracy: 98.91%
8	Validation loss: 0.053695	Best loss: 0.031940	Accuracy: 98.75%
9	Validation loss: 0.046251	Best loss: 0.031940	Accuracy: 98.75%
10	Validation loss: 0.065640	Best loss: 0.031940	Ac

2	Validation loss: 0.059177	Best loss: 0.059177	Accuracy: 98.63%
3	Validation loss: 0.044797	Best loss: 0.044797	Accuracy: 98.71%
4	Validation loss: 0.035915	Best loss: 0.035915	Accuracy: 98.71%
5	Validation loss: 0.036052	Best loss: 0.035915	Accuracy: 98.83%
6	Validation loss: 0.034999	Best loss: 0.034999	Accuracy: 98.71%
7	Validation loss: 0.037333	Best loss: 0.034999	Accuracy: 98.63%
8	Validation loss: 0.038198	Best loss: 0.034999	Accuracy: 99.02%
9	Validation loss: 0.034337	Best loss: 0.034337	Accuracy: 98.79%
10	Validation loss: 0.044411	Best loss: 0.034337	Accuracy: 98.79%
11	Validation loss: 0.073629	Best loss: 0.034337	Accuracy: 98.51%
12	Validation loss: 0.051741	Best loss: 0.034337	Accuracy: 98.59%
13	Validation loss: 0.053013	Best loss: 0.034337	Accuracy: 98.63%
14	Validation loss: 0.040601	Best loss: 0.034337	Accuracy: 98.94%
15	Validation loss: 0.029907	Best loss: 0.029907	Accuracy: 99.18%
16	Validation loss: 0.027601	Best loss: 0.027601	Accuracy: 99.14%
17	Validation loss

12	Validation loss: 0.045580	Best loss: 0.031172	Accuracy: 98.98%
13	Validation loss: 0.046844	Best loss: 0.031172	Accuracy: 98.91%
14	Validation loss: 0.037059	Best loss: 0.031172	Accuracy: 99.34%
15	Validation loss: 0.040383	Best loss: 0.031172	Accuracy: 98.98%
16	Validation loss: 0.033786	Best loss: 0.031172	Accuracy: 99.06%
17	Validation loss: 0.031932	Best loss: 0.031172	Accuracy: 99.18%
18	Validation loss: 0.034394	Best loss: 0.031172	Accuracy: 99.26%
19	Validation loss: 0.047728	Best loss: 0.031172	Accuracy: 98.94%
20	Validation loss: 0.047535	Best loss: 0.031172	Accuracy: 98.98%
21	Validation loss: 0.040736	Best loss: 0.031172	Accuracy: 99.02%
Early stopping!
[CV]  n_neurons=110, n_hidden_layers=5, learning_rate=0.003, batch_size=100, batch_norm_momentum=0.95, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0>, total=  34.2s
[CV] n_neurons=120, n_hidden_layers=3, learning_rate=0.003, batch_size=500, batch_norm_momentum=0.95, activation=<function el

56	Validation loss: 0.033181	Best loss: 0.031572	Accuracy: 99.02%
Early stopping!
[CV]  n_neurons=120, n_hidden_layers=3, learning_rate=0.003, batch_size=500, batch_norm_momentum=0.95, activation=<function elu at 0x125c65268>, total=  30.7s
[CV] n_neurons=90, n_hidden_layers=2, learning_rate=0.01, batch_size=20, batch_norm_momentum=0.95, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0> 
0	Validation loss: 0.072326	Best loss: 0.072326	Accuracy: 97.42%
1	Validation loss: 0.050787	Best loss: 0.050787	Accuracy: 98.40%
2	Validation loss: 0.055510	Best loss: 0.050787	Accuracy: 98.28%
3	Validation loss: 0.052136	Best loss: 0.050787	Accuracy: 98.48%
4	Validation loss: 0.041189	Best loss: 0.041189	Accuracy: 98.91%
5	Validation loss: 0.059810	Best loss: 0.041189	Accuracy: 98.32%
6	Validation loss: 0.038890	Best loss: 0.038890	Accuracy: 98.94%
7	Validation loss: 0.044048	Best loss: 0.038890	Accuracy: 98.94%
8	Validation loss: 0.042262	Best loss: 0.038890	Accuracy: 

35	Validation loss: 0.047162	Best loss: 0.034064	Accuracy: 98.98%
Early stopping!
[CV]  n_neurons=90, n_hidden_layers=4, learning_rate=0.003, batch_size=20, batch_norm_momentum=0.9, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0>, total= 2.1min
[CV] n_neurons=90, n_hidden_layers=4, learning_rate=0.003, batch_size=20, batch_norm_momentum=0.9, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0> 
0	Validation loss: 0.062975	Best loss: 0.062975	Accuracy: 97.69%
1	Validation loss: 0.040109	Best loss: 0.040109	Accuracy: 98.98%
2	Validation loss: 0.038756	Best loss: 0.038756	Accuracy: 98.67%
3	Validation loss: 0.051087	Best loss: 0.038756	Accuracy: 98.20%
4	Validation loss: 0.044525	Best loss: 0.038756	Accuracy: 98.63%
5	Validation loss: 0.038473	Best loss: 0.038473	Accuracy: 99.02%
6	Validation loss: 0.048169	Best loss: 0.038473	Accuracy: 98.75%
7	Validation loss: 0.040799	Best loss: 0.038473	Accuracy: 98.98%
8	Validation loss: 0.

9	Validation loss: 0.051667	Best loss: 0.034134	Accuracy: 98.48%
10	Validation loss: 0.044843	Best loss: 0.034134	Accuracy: 98.83%
11	Validation loss: 0.052647	Best loss: 0.034134	Accuracy: 98.91%
12	Validation loss: 0.052877	Best loss: 0.034134	Accuracy: 98.51%
13	Validation loss: 0.058045	Best loss: 0.034134	Accuracy: 98.71%
14	Validation loss: 0.047253	Best loss: 0.034134	Accuracy: 98.67%
15	Validation loss: 0.048453	Best loss: 0.034134	Accuracy: 98.48%
16	Validation loss: 0.042528	Best loss: 0.034134	Accuracy: 98.51%
17	Validation loss: 0.053994	Best loss: 0.034134	Accuracy: 98.55%
18	Validation loss: 0.041247	Best loss: 0.034134	Accuracy: 98.94%
Early stopping!
[CV]  n_neurons=80, n_hidden_layers=4, learning_rate=0.01, batch_size=20, batch_norm_momentum=0.9, activation=<function elu at 0x125c65268>, total=  54.2s
[CV] n_neurons=120, n_hidden_layers=2, learning_rate=0.003, batch_size=500, batch_norm_momentum=0.9, activation=<function elu at 0x125c65268> 
0	Validation loss: 0.106441

10	Validation loss: 0.049257	Best loss: 0.040936	Accuracy: 98.63%
11	Validation loss: 0.053464	Best loss: 0.040936	Accuracy: 98.71%
12	Validation loss: 0.058453	Best loss: 0.040936	Accuracy: 98.55%
13	Validation loss: 0.069714	Best loss: 0.040936	Accuracy: 98.51%
14	Validation loss: 0.101748	Best loss: 0.040936	Accuracy: 97.93%
15	Validation loss: 0.088538	Best loss: 0.040936	Accuracy: 98.05%
16	Validation loss: 0.082872	Best loss: 0.040936	Accuracy: 98.20%
17	Validation loss: 0.055057	Best loss: 0.040936	Accuracy: 98.51%
18	Validation loss: 0.057082	Best loss: 0.040936	Accuracy: 98.59%
19	Validation loss: 0.048720	Best loss: 0.040936	Accuracy: 98.83%
20	Validation loss: 0.061335	Best loss: 0.040936	Accuracy: 98.59%
Early stopping!
[CV]  n_neurons=90, n_hidden_layers=3, learning_rate=0.01, batch_size=500, batch_norm_momentum=0.95, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7730>, total=  10.9s
[CV] n_neurons=90, n_hidden_layers=3, learning_rate=0.01, bat

0	Validation loss: 0.121053	Best loss: 0.121053	Accuracy: 96.95%
1	Validation loss: 0.138152	Best loss: 0.121053	Accuracy: 96.56%
2	Validation loss: 0.072413	Best loss: 0.072413	Accuracy: 98.16%
3	Validation loss: 0.063864	Best loss: 0.063864	Accuracy: 98.40%
4	Validation loss: 0.057695	Best loss: 0.057695	Accuracy: 98.51%
5	Validation loss: 0.055672	Best loss: 0.055672	Accuracy: 98.59%
6	Validation loss: 0.055455	Best loss: 0.055455	Accuracy: 98.48%
7	Validation loss: 0.057868	Best loss: 0.055455	Accuracy: 98.59%
8	Validation loss: 0.058638	Best loss: 0.055455	Accuracy: 98.59%
9	Validation loss: 0.052310	Best loss: 0.052310	Accuracy: 98.55%
10	Validation loss: 0.074229	Best loss: 0.052310	Accuracy: 98.55%
11	Validation loss: 0.094839	Best loss: 0.052310	Accuracy: 97.89%
12	Validation loss: 0.045027	Best loss: 0.045027	Accuracy: 98.87%
13	Validation loss: 0.056950	Best loss: 0.045027	Accuracy: 98.51%
14	Validation loss: 0.057330	Best loss: 0.045027	Accuracy: 98.67%
15	Validation loss: 

9	Validation loss: 0.053832	Best loss: 0.038760	Accuracy: 98.63%
10	Validation loss: 0.054894	Best loss: 0.038760	Accuracy: 98.63%
11	Validation loss: 0.053596	Best loss: 0.038760	Accuracy: 98.48%
12	Validation loss: 0.061668	Best loss: 0.038760	Accuracy: 98.75%
13	Validation loss: 0.050524	Best loss: 0.038760	Accuracy: 98.79%
14	Validation loss: 0.058236	Best loss: 0.038760	Accuracy: 98.59%
15	Validation loss: 0.079778	Best loss: 0.038760	Accuracy: 98.05%
Early stopping!
[CV]  n_neurons=110, n_hidden_layers=5, learning_rate=0.003, batch_size=100, batch_norm_momentum=0.99, activation=<function elu at 0x125c65268>, total=  25.0s
[CV] n_neurons=110, n_hidden_layers=5, learning_rate=0.003, batch_size=100, batch_norm_momentum=0.99, activation=<function elu at 0x125c65268> 
0	Validation loss: 0.107197	Best loss: 0.107197	Accuracy: 96.68%
1	Validation loss: 0.053498	Best loss: 0.053498	Accuracy: 98.28%
2	Validation loss: 0.048684	Best loss: 0.048684	Accuracy: 98.55%
3	Validation loss: 0.0954

10	Validation loss: 0.049974	Best loss: 0.036750	Accuracy: 98.55%
11	Validation loss: 0.050982	Best loss: 0.036750	Accuracy: 98.63%
12	Validation loss: 0.040031	Best loss: 0.036750	Accuracy: 99.02%
13	Validation loss: 0.051155	Best loss: 0.036750	Accuracy: 98.71%
14	Validation loss: 0.059475	Best loss: 0.036750	Accuracy: 98.36%
15	Validation loss: 0.045111	Best loss: 0.036750	Accuracy: 98.83%
16	Validation loss: 0.056460	Best loss: 0.036750	Accuracy: 98.67%
Early stopping!
[CV]  n_neurons=80, n_hidden_layers=4, learning_rate=0.01, batch_size=100, batch_norm_momentum=0.99, activation=<function elu at 0x125c65268>, total=  25.3s
[CV] n_neurons=80, n_hidden_layers=4, learning_rate=0.03, batch_size=500, batch_norm_momentum=0.9, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0> 
0	Validation loss: 0.076435	Best loss: 0.076435	Accuracy: 97.89%
1	Validation loss: 0.060698	Best loss: 0.060698	Accuracy: 97.85%
2	Validation loss: 0.076046	Best loss: 0.060698	Accura

9	Validation loss: 0.106884	Best loss: 0.048762	Accuracy: 97.22%
10	Validation loss: 0.069944	Best loss: 0.048762	Accuracy: 98.24%
11	Validation loss: 0.055331	Best loss: 0.048762	Accuracy: 98.75%
12	Validation loss: 0.053855	Best loss: 0.048762	Accuracy: 98.59%
13	Validation loss: 0.044267	Best loss: 0.044267	Accuracy: 98.91%
14	Validation loss: 0.042666	Best loss: 0.042666	Accuracy: 98.87%
15	Validation loss: 0.043932	Best loss: 0.042666	Accuracy: 99.02%
16	Validation loss: 0.051784	Best loss: 0.042666	Accuracy: 98.79%
17	Validation loss: 0.098582	Best loss: 0.042666	Accuracy: 98.32%
18	Validation loss: 0.064625	Best loss: 0.042666	Accuracy: 98.51%
19	Validation loss: 0.061078	Best loss: 0.042666	Accuracy: 98.79%
20	Validation loss: 0.042386	Best loss: 0.042386	Accuracy: 99.14%
21	Validation loss: 0.053101	Best loss: 0.042386	Accuracy: 99.06%
22	Validation loss: 0.066776	Best loss: 0.042386	Accuracy: 98.67%
23	Validation loss: 0.056161	Best loss: 0.042386	Accuracy: 98.94%
24	Validati

16	Validation loss: 0.045083	Best loss: 0.045030	Accuracy: 98.67%
17	Validation loss: 0.046641	Best loss: 0.045030	Accuracy: 98.40%
18	Validation loss: 0.048500	Best loss: 0.045030	Accuracy: 98.44%
19	Validation loss: 0.047368	Best loss: 0.045030	Accuracy: 98.48%
20	Validation loss: 0.044416	Best loss: 0.044416	Accuracy: 98.51%
21	Validation loss: 0.046184	Best loss: 0.044416	Accuracy: 98.44%
22	Validation loss: 0.046579	Best loss: 0.044416	Accuracy: 98.51%
23	Validation loss: 0.047055	Best loss: 0.044416	Accuracy: 98.59%
24	Validation loss: 0.060248	Best loss: 0.044416	Accuracy: 98.48%
25	Validation loss: 0.070128	Best loss: 0.044416	Accuracy: 98.28%
26	Validation loss: 0.086052	Best loss: 0.044416	Accuracy: 97.85%
27	Validation loss: 0.114800	Best loss: 0.044416	Accuracy: 97.19%
28	Validation loss: 0.068736	Best loss: 0.044416	Accuracy: 97.97%
29	Validation loss: 0.049908	Best loss: 0.044416	Accuracy: 98.71%
30	Validation loss: 0.037151	Best loss: 0.037151	Accuracy: 98.91%
31	Validat

2	Validation loss: 0.062450	Best loss: 0.062450	Accuracy: 98.55%
3	Validation loss: 0.047294	Best loss: 0.047294	Accuracy: 98.94%
4	Validation loss: 0.038173	Best loss: 0.038173	Accuracy: 98.91%
5	Validation loss: 0.036076	Best loss: 0.036076	Accuracy: 98.94%
6	Validation loss: 0.033978	Best loss: 0.033978	Accuracy: 98.94%
7	Validation loss: 0.036485	Best loss: 0.033978	Accuracy: 98.91%
8	Validation loss: 0.032557	Best loss: 0.032557	Accuracy: 98.98%
9	Validation loss: 0.032058	Best loss: 0.032058	Accuracy: 98.98%
10	Validation loss: 0.030721	Best loss: 0.030721	Accuracy: 99.02%
11	Validation loss: 0.031670	Best loss: 0.030721	Accuracy: 98.98%
12	Validation loss: 0.031494	Best loss: 0.030721	Accuracy: 99.06%
13	Validation loss: 0.031987	Best loss: 0.030721	Accuracy: 99.02%
14	Validation loss: 0.032253	Best loss: 0.030721	Accuracy: 99.06%
15	Validation loss: 0.031930	Best loss: 0.030721	Accuracy: 99.10%
16	Validation loss: 0.031825	Best loss: 0.030721	Accuracy: 98.98%
17	Validation loss

1	Validation loss: 0.055790	Best loss: 0.055790	Accuracy: 98.28%
2	Validation loss: 0.045159	Best loss: 0.045159	Accuracy: 98.55%
3	Validation loss: 0.039252	Best loss: 0.039252	Accuracy: 98.83%
4	Validation loss: 0.063669	Best loss: 0.039252	Accuracy: 98.24%
5	Validation loss: 0.055980	Best loss: 0.039252	Accuracy: 98.40%
6	Validation loss: 0.043836	Best loss: 0.039252	Accuracy: 98.94%
7	Validation loss: 0.050197	Best loss: 0.039252	Accuracy: 98.83%
8	Validation loss: 0.040159	Best loss: 0.039252	Accuracy: 98.75%
9	Validation loss: 0.050722	Best loss: 0.039252	Accuracy: 98.75%
10	Validation loss: 0.053338	Best loss: 0.039252	Accuracy: 99.02%
11	Validation loss: 0.056385	Best loss: 0.039252	Accuracy: 98.63%
12	Validation loss: 0.058328	Best loss: 0.039252	Accuracy: 98.55%
13	Validation loss: 0.041297	Best loss: 0.039252	Accuracy: 99.06%
14	Validation loss: 0.053294	Best loss: 0.039252	Accuracy: 98.79%
Early stopping!
[CV]  n_neurons=110, n_hidden_layers=5, learning_rate=0.03, batch_siz

0	Validation loss: 0.065286	Best loss: 0.065286	Accuracy: 97.97%
1	Validation loss: 0.053653	Best loss: 0.053653	Accuracy: 98.20%
2	Validation loss: 0.057911	Best loss: 0.053653	Accuracy: 97.85%
3	Validation loss: 0.045305	Best loss: 0.045305	Accuracy: 98.55%
4	Validation loss: 0.046986	Best loss: 0.045305	Accuracy: 98.55%
5	Validation loss: 0.035887	Best loss: 0.035887	Accuracy: 98.83%
6	Validation loss: 0.036079	Best loss: 0.035887	Accuracy: 98.98%
7	Validation loss: 0.048601	Best loss: 0.035887	Accuracy: 98.79%
8	Validation loss: 0.037421	Best loss: 0.035887	Accuracy: 98.83%
9	Validation loss: 0.036969	Best loss: 0.035887	Accuracy: 98.83%
10	Validation loss: 0.035614	Best loss: 0.035614	Accuracy: 99.02%
11	Validation loss: 0.098005	Best loss: 0.035614	Accuracy: 97.85%
12	Validation loss: 0.055398	Best loss: 0.035614	Accuracy: 98.40%
13	Validation loss: 0.063848	Best loss: 0.035614	Accuracy: 98.51%
14	Validation loss: 0.033603	Best loss: 0.033603	Accuracy: 98.87%
15	Validation loss: 

8	Validation loss: 0.029412	Best loss: 0.029412	Accuracy: 99.18%
9	Validation loss: 0.040365	Best loss: 0.029412	Accuracy: 98.79%
10	Validation loss: 0.043421	Best loss: 0.029412	Accuracy: 98.79%
11	Validation loss: 0.043950	Best loss: 0.029412	Accuracy: 98.79%
12	Validation loss: 0.049045	Best loss: 0.029412	Accuracy: 98.59%
13	Validation loss: 0.057851	Best loss: 0.029412	Accuracy: 98.79%
14	Validation loss: 0.047235	Best loss: 0.029412	Accuracy: 98.67%
15	Validation loss: 0.038209	Best loss: 0.029412	Accuracy: 98.91%
16	Validation loss: 0.033678	Best loss: 0.029412	Accuracy: 99.02%
17	Validation loss: 0.045405	Best loss: 0.029412	Accuracy: 98.71%
18	Validation loss: 0.047749	Best loss: 0.029412	Accuracy: 98.94%
19	Validation loss: 0.048466	Best loss: 0.029412	Accuracy: 98.94%
Early stopping!
[CV]  n_neurons=110, n_hidden_layers=2, learning_rate=0.01, batch_size=20, batch_norm_momentum=0.9, activation=<function elu at 0x125c65268>, total=  51.4s
[CV] n_neurons=120, n_hidden_layers=4,

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 90.9min finished


0	Validation loss: 0.140264	Best loss: 0.140264	Accuracy: 97.89%
1	Validation loss: 0.061940	Best loss: 0.061940	Accuracy: 98.83%
2	Validation loss: 0.033785	Best loss: 0.033785	Accuracy: 99.06%
3	Validation loss: 0.035777	Best loss: 0.033785	Accuracy: 98.79%
4	Validation loss: 0.030728	Best loss: 0.030728	Accuracy: 99.22%
5	Validation loss: 0.035688	Best loss: 0.030728	Accuracy: 99.02%
6	Validation loss: 0.023166	Best loss: 0.023166	Accuracy: 99.22%
7	Validation loss: 0.027214	Best loss: 0.023166	Accuracy: 99.02%
8	Validation loss: 0.031719	Best loss: 0.023166	Accuracy: 99.06%
9	Validation loss: 0.028626	Best loss: 0.023166	Accuracy: 99.26%
10	Validation loss: 0.025031	Best loss: 0.023166	Accuracy: 99.22%
11	Validation loss: 0.034673	Best loss: 0.023166	Accuracy: 99.02%
12	Validation loss: 0.042308	Best loss: 0.023166	Accuracy: 99.02%
13	Validation loss: 0.030191	Best loss: 0.023166	Accuracy: 99.22%
14	Validation loss: 0.047577	Best loss: 0.023166	Accuracy: 98.75%
15	Validation loss: 

RandomizedSearchCV(cv=3, error_score='raise-deprecating',
          estimator=DNNClassifier(activation=<function elu at 0x125c65268>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0xb305edf28>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42),
          fit_params=None, iid='warn', n_iter=50, n_jobs=None,
          param_distributions={'n_neurons': [80, 90, 100, 110, 120], 'batch_size': [20, 100, 500], 'learning_rate': [0.003, 0.01, 0.03], 'activation': [<function elu at 0x125c65268>, <function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2c7642f0>, <function leaky_relu.<locals>.parametrized_leaky_relu at 0xb2d8c7730>], 'n_hidden_layers': [2, 3, 4, 5], 'batch_norm_momentum': [0.9, 0.95, 0.99]},
          pre_dispatch='2*n_jobs', random_state=42, refit=True,
          ret

In [61]:
rnd_search_bn.best_params_

{'activation': <function __main__.leaky_relu.<locals>.parametrized_leaky_relu>,
 'batch_norm_momentum': 0.99,
 'batch_size': 500,
 'learning_rate': 0.003,
 'n_hidden_layers': 3,
 'n_neurons': 100}

In [62]:
y_pred = rnd_search_bn.predict(X_test04)
accuracy_score(y_test04, y_pred)

0.9935785172212492