# Keras Models with Scikit-Learn for General Machine Learning

1. Evaluate Models with Cross-Validation
2. Grid Search Deep Learning Model Parameters

### 1. Evaluate Models with Cross-Validation

In [2]:
# MLP for Pima Indians Diabetes Dataset with 10-fold cross validation via sklearn

from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Function to create model, required for KerasClassifier
def create_model():
    # Create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8, ), activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [5]:
# Fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# Load Pima Indians Diabetes dataset
dataset = np.loadtxt('pima-indians-diabetes.csv', delimiter=",")
# Split dataset into inputs (X) and output (y) variables
X = dataset[:,0:8]
y=dataset[:,8]
# Create model
model = KerasClassifier(build_fn=create_model, epochs=150, batch_size=10, verbose=0)
# Evaluate using 10-fold cross validation
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, X, y, cv=cv)

0.7173957621326041


In [ ]:
print(results.mean())

### 2. Grid Search Deep Learning Model Parameters

- Optimizers for searching di↵erent weight values.
- Initializers for preparing the network weights using di↵erent schemes.
- Number of epochs for training the model for di↵erent number of exposures to the training
dataset.
- Batches for varying the number of samples before weight updates.


The options are specified into a dictionary and passed to the configuration of the GridSearchCV
scikit-learn class.

In [6]:
# MLP for Pima Indians Diabetes Dataset with GridSearch via sklearn
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [7]:
# Function to create model, required for KerasClassifier
def create_model(optimizer='rmsprop', init='glorot_uniform'):
    # Create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8, ), activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [8]:
# Fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# Load pima indians dataset
dataset = np.loadtxt('pima-indians-diabetes.csv', delimiter=",")
# Split dataset into features (X) and labels (y) sets
X = dataset[:,0:8]
y = dataset[:,8]

In [11]:
# Create model
model = KerasClassifier(build_fn=create_model, verbose=0)
# Grid search epochs, batch size and optimizer
optimizers = ['rmsprop', 'adam']
epochs = [50, 100, 150]
batches = [5, 10, 20]
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches)
grid = GridSearchCV(estimator=model,param_grid=param_grid)
grid_result = grid.fit(X, y)

In [13]:
# Summarize results
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, std, param in zip(means, stds, params):
    print(f"{mean} ({std}) with: {param}")

Best: 0.7370172311348782 using {'batch_size': 10, 'epochs': 150, 'optimizer': 'rmsprop'}
0.6822595704948646 (0.05200029772310815) with: {'batch_size': 5, 'epochs': 50, 'optimizer': 'rmsprop'}
0.7162125456243104 (0.033996784401070824) with: {'batch_size': 5, 'epochs': 50, 'optimizer': 'adam'}
0.7240811476105593 (0.04490834026550143) with: {'batch_size': 5, 'epochs': 100, 'optimizer': 'rmsprop'}
0.7253034547152194 (0.0272651137374203) with: {'batch_size': 5, 'epochs': 100, 'optimizer': 'adam'}
0.705704099821747 (0.047242673372194406) with: {'batch_size': 5, 'epochs': 150, 'optimizer': 'rmsprop'}
0.7070791953144895 (0.029189358599682986) with: {'batch_size': 5, 'epochs': 150, 'optimizer': 'adam'}
0.6602410661234192 (0.062461228515694134) with: {'batch_size': 10, 'epochs': 50, 'optimizer': 'rmsprop'}
0.6861981156098802 (0.049999505552491504) with: {'batch_size': 10, 'epochs': 50, 'optimizer': 'adam'}
0.7019438078261608 (0.04225194532285681) with: {'batch_size': 10, 'epochs': 100, 'optimize

In [14]:
# Best: 0.7370172311348782 using {'batch_size': 10, 'epochs': 150, 'optimizer': 'rmsprop'}