In [None]:
import os, sys, random
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import make_scorer, accuracy_score
from sklearn import datasets

import keras.backend as K
import keras.backend.tensorflow_backend as KTF
from keras.models import Sequential, Input, Model, save_model, clone_model
from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Flatten, Dropout
from keras import optimizers
from keras.wrappers.scikit_learn import KerasClassifier

from cvopt.model_selection import SimpleoptCV
from cvopt.search_setting import search_category, search_numeric

# fix seed
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(0)
random.seed(0)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
tf.set_random_seed(0)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

In [None]:
from bokeh.io import output_notebook
output_notebook() # When you need search visualization, need run output_notebook()

# 1. Sample code for cifar10 (image data, classification)
cvopt support estimator like scikit-learn.   
So, when keras.wrappers.scikit_learn is used, cvopt is available in keras.   
In this case, __saver__ and __scorer__ for keras must be defined.   

## Note
### parallel execution
cvopt parallel backend is `multiprocessing` and `multiprocessing` with Keras have problem depend on execution environment.    
If problem occured, try `n_jobs` = `1`.

### feature selection
In feature selection, input shape is changed. But, basic Keras model's input shape is fixed and is not available to feature selection.   
If need feature selection, must make model input shape variable.

### Resource exhausted
Sometimes an Resource exhausted error occurs in gpu environment. One countermeasure to this error is to clear the session at a certain timing.
When use cvopt and keras.wrappers.scikit_learn, there is an implementation example as follows(1.5 Run all backend search).

In [None]:
from keras.datasets import cifar10
from keras.utils import to_categorical

(Xtrain, ytrain), (Xtest, ytest) = cifar10.load_data()
Xtrain = Xtrain.astype("float32") / 255
Xtest = Xtest.astype("float32") / 255
ytrain = to_categorical(ytrain)
ytest = to_categorical(ytest)

n_classes = 10
def mk_nw(activation, lr, out_dim):
    model = Sequential()
    model.add(Conv2D(20, kernel_size=5, strides=1, activation=activation, input_shape=Xtrain.shape[1:]))
    model.add(MaxPool2D(2, strides=2))

    model.add(Conv2D(50, kernel_size=5, strides=1, activation=activation))
    model.add(MaxPool2D(2, strides=2))

    model.add(Flatten())
    model.add(Dense(out_dim, activation=activation))
    model.add(Dense(n_classes, activation="softmax"))

    model.compile(loss="categorical_crossentropy", optimizer=optimizers.SGD(lr=lr))
    return model
estimator = KerasClassifier(mk_nw, activation="linear", lr=0.01, out_dim=256, epochs=16, verbose=0)

## 1.1 Scorer
`KerasClassifier.fit` need ytrue is 1hot(n_samples, n_classes) and `KerasClassifier.predict` return label(n_samples, ).   
While basic score function (`score(ytrue, ypred)`) need that ytrue and ypred are the same expression(1hot or label).   
Therefore, for `KerasClassifier`, scorer that inputs are ytrue(1hot) and pred(label) must be defined.    
In the define, use `sklearn.metrics.make_scorer`.

In [None]:
from sklearn.metrics import make_scorer, accuracy_score

def acc(ytrue, ypred):
    return accuracy_score(np.argmax(ytrue, axis=1), ypred)
scorer = make_scorer(acc,  greater_is_better=True)

## 1.2 Saver
When save estimator, cvopt use `sklearn.externals.joblib.dump` in default.   
But saving Keras model by joblib is not recommended.
So, saver for Keras need to be defined.

In [None]:
from keras.models import save_model

def saver(estimator, path):
    save_model(estimator.model, path)

## 1.3 Run search

In [None]:
param_distributions = {
    "activation": search_category(["linear", "relu"]),
    "lr":  search_numeric(0.0001, 0.1, "float"),  
    "out_dim" : search_numeric(124, 512, "int"),  
    }
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=0)
opt = SimpleoptCV(estimator, param_distributions, 
                  scoring=scorer, 
                  cv=cv, 
                  max_iter=8, 
                  n_jobs=1,
                  verbose=2, 
                  logdir="./cifar10", 
                  model_id="search_usage", 
                  save_estimator=2, 
                  saver=saver, 
                  backend="hyperopt", 
                  )
opt.fit(Xtrain, ytrain, validation_data=(Xtest, ytest))

# 1.4 Log usage

In [None]:
from cvopt.utils import extract_params
estimator_params, feature_params, feature_select_flag  = extract_params(logdir="./cifar10", 
                                                                        model_id="search_usage", 
                                                                        target_index=0, 
                                                                        feature_groups=None)

estimator.set_params(**estimator_params)         # Set estimator parameters
print(estimator.get_params())

In [None]:
from cvopt.utils import mk_metafeature
from keras.models import load_model

Xtrain_meta, Xtest_meta = mk_metafeature(Xtrain, ytrain, 
                                         logdir="./cifar10", 
                                         model_id="search_usage", 
                                         target_index=0, 
                                         cv=cv, 
                                         validation_data=(Xtest, ytest), 
                                         feature_groups=None, 
                                         estimator_method="predict", 
                                         loader=load_model  # loader for Keras
                                         )

print("Train features shape:", Xtrain.shape)
print("Train meta features shape:", Xtrain_meta.shape)
print("Test features shape:", Xtest.shape)
print("Test meta features shape:",  Xtest_meta.shape)

# 1.5 Run all backend search
To Resource exhausted countermeasure, clear session before fit. In the follows, when clear session,  model(graph) is cleared at the same time. So, saving of model does not work well in saver. If need saving model, please consider using keras callback.

In [None]:
class OrgKerasClassifier(KerasClassifier):
    def fit(self, *args, **kwargs):
        KTF.clear_session()
        session = tf.Session("")
        KTF.set_session(session)
        super().fit(*args, **kwargs)

n_classes = 10
def mk_nw(activation, lr, out_dim):
    model = Sequential()
    model.add(Conv2D(20, kernel_size=5, strides=1, activation=activation, input_shape=Xtrain.shape[1:]))
    model.add(MaxPool2D(2, strides=2))

    model.add(Conv2D(50, kernel_size=5, strides=1, activation=activation))
    model.add(MaxPool2D(2, strides=2))

    model.add(Flatten())
    model.add(Dense(out_dim, activation=activation))
    model.add(Dense(n_classes, activation="softmax"))

    model.compile(loss="categorical_crossentropy", optimizer=optimizers.SGD(lr=lr))
    return model
estimator = OrgKerasClassifier(mk_nw, activation="linear", lr=0.01, out_dim=256, epochs=16, verbose=0)

In [None]:
param_distributions = {
    "activation": search_category(["linear", "relu"]),
    "lr":  search_numeric(0.0001, 0.1, "float"),  
    "out_dim" : search_numeric(124, 512, "int"),  
    }
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=0)

for bk in ["bayesopt", "randomopt", "hyperopt", "gaopt"]:
    opt = SimpleoptCV(estimator, param_distributions, 
                      scoring=scorer, 
                      cv=cv, 
                      max_iter=16, 
                      n_jobs=1,
                      verbose=1, 
                      logdir="./cifar10_all", 
                      model_id=bk, 
                      save_estimator=0, 
                      backend=bk, 
                      )
    opt.fit(Xtrain, ytrain, validation_data=(Xtest, ytest))

# 2 Sample for  the breast cancer wisconsin (matrix data, classification)

In [None]:
dataset = datasets.load_breast_cancer()
Xtrain, Xtest, ytrain, ytest = train_test_split(dataset.data, dataset.target, test_size=0.3, random_state=0)
ytrain = to_categorical(ytrain)
ytest = to_categorical(ytest)

n_classes = 2
def mk_nw(activation, lr, out_dim):
    inputs = Input(shape=Xtrain.shape[1:]) 
    x = Dense(out_dim)(inputs)
    x = Dense(n_classes, activation="softmax")(x)
    model = Model(inputs=inputs, outputs=x)
    model.compile(loss="categorical_crossentropy", optimizer=optimizers.SGD(lr=lr))
    return model
estimator = KerasClassifier(mk_nw, activation="linear", lr=0.01, out_dim=8, epochs=16, verbose=0)

param_distributions = {
    "activation": search_category(["linear", "relu"]),
    "lr":  search_numeric(0.0001, 0.1, "float"),  
    "out_dim" : search_numeric(124, 512, "int"),  
    }

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=0)

opt = SimpleoptCV(estimator, param_distributions, 
                  scoring=make_scorer(lambda ytrue, ypred: accuracy_score(np.argmax(ytrue, axis=1), ypred), greater_is_better=True), 
                  cv=cv, 
                  max_iter=8, 
                  n_jobs=1,
                  verbose=2, 
                  logdir="./bcw", 
                  model_id="search_usage", 
                  save_estimator=2, 
                  saver=lambda model, path: save_model(model.model, path), 
                  backend="hyperopt", 
                  )

opt.fit(Xtrain, ytrain, validation_data=(Xtest, ytest))

In [None]:
from cvopt.utils import extract_params
estimator_params, feature_params, feature_select_flag  = extract_params(logdir="./bcw", 
                                                                        model_id="search_usage", 
                                                                        target_index=0, 
                                                                        feature_groups=None)

estimator.set_params(**estimator_params)  # Set estimator parameters
print(estimator.get_params())

In [None]:
from cvopt.utils import mk_metafeature
from keras.models import load_model

Xtrain_meta, Xtest_meta = mk_metafeature(Xtrain, ytrain, 
                                         logdir="./bcw", 
                                         model_id="search_usage", 
                                         target_index=0, 
                                         cv=cv, 
                                         validation_data=(Xtest, ytest), 
                                         feature_groups=None, 
                                         estimator_method="predict", 
                                         loader=load_model # loader for keras
                                         )

print("Train features shape:", Xtrain.shape)
print("Train meta features shape:", Xtrain_meta.shape)
print("Test features shape:", Xtest.shape)
print("Test meta features shape:",  Xtest_meta.shape)