# Scikit - learn interface and bayesian optimization

It is often necessary to tune parameters of your deep neural network in order to achieve the best performance, such as number of layers, number of neurons in layer, etc. Sklearn interfaces allow to check different parameters easily. `BayesSearchCV` class from `scikit-optimize` allows to search for the parameters efficient in number of evaluations.

In [19]:
import pickle as pc
import numpy as np

from sklearn.base import BaseEstimator, ClassifierMixin

from keras.models import Model
from keras.layers import Dense, Input, Dropout
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras.optimizers import Adam
from keras.datasets import cifar10, mnist

from misc import make_keras_picklable

# necessary magic for pickling to work
make_keras_picklable()

# example implementation of sklearn estimator with keras
class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_neurons=256, epochs=1, batch_size=256, 
                 learning_rate=1e-3, beta_1=0.9, beta_2=0.999,
                neuron_type='relu'):

        self.n_neurons = n_neurons
        self.epochs = epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.neuron_type= neuron_type

    def fit(self, X, y):
        
        # define network architecture
        x = Input(shape=X[0].shape)
        h = x
        h = Dense(self.n_neurons)(h)
        
        if self.neuron_type == 'relu':
            h = LeakyReLU()(h)
        else:
            h = ELU()(h)
            
        h = Dense(10, activation='softmax')(h)

        self.model = Model(inputs=x, outputs=h)

        # compile computational graph of NN
        self.model.compile(loss='sparse_categorical_crossentropy',
                      optimizer=Adam(
                          lr=self.learning_rate,
                          beta_1=self.beta_1,
                          beta_2=self.beta_2,
                      ),
                      metrics=['accuracy'])

        # train NN
        self.model.fit(X, y,
                        batch_size=self.batch_size,
                        epochs=self.epochs,
                        verbose=1)

    def predict(self, X):
        return np.argmax(self.model.predict(X), axis=-1)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from skopt import BayesSearchCV

X, y = mnist.load_data()[0]

# reshape images to a vector
X = np.reshape(X, (len(X), -1))

# split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75)

# select indicies for training and validation folds
I = range(len(X_train))
I_train, I_val = train_test_split(I, train_size=0.75)

# simple model pipeline declaration
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('model', DNNClassifier(epochs=5))
])

# Bayesian Optimization class 
# Can be run on cluster of machines with Dask
model = BayesSearchCV(
    estimator=pipe,
    search_spaces={ # parameter search space
        'model__n_neurons': (32, 512),
        'model__learning_rate': (1e-3, 1e-1, 'log-uniform'),
        'model__neuron_type': ['relu', 'elu'],
    },
    error_score=0.0,
    cv=[[I_train, I_val]],
    n_iter=3,
    verbose=100000,
    refit=False
)

# Run the search for best hyperparameters
model.fit(X_train, y_train)

# Fit the model with best parameters
model.refit=True
model._fit_best_model(X_train, y_train)

pc.dump(model, open('model.bin', 'wb'))



Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] model__neuron_type=elu, model__n_neurons=60, model__learning_rate=0.0444350660289 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV]  model__neuron_type=elu, model__n_neurons=60, model__learning_rate=0.0444350660289, score=0.8906666666666667, total=   4.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.9s finished




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [18]:
model = pc.load(open('model.bin', 'rb'))

print("Model's best parameters:")
print(model.best_params_)

print("Model's test score:")
print(model.score(X_test, y_test))

print("Example estimations:")
print(model.predict(X_test[:3]))
print(y_test[:3])

Model's best parameters:
{'model__neuron_type': 'relu', 'model__learning_rate': 0.0099561404859129319, 'model__n_neurons': 256}
Model's test score:




0.938466666667
Example estimations:
[7 6 3]
[7 6 3]
