In [1]:
# 1
# Load the dataset
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
# store data examples in X and their corresponding labels in y
X =  data['data']
y = data['target']
# preprocessing
# convert y label from ['malignant', 'benign'] to [1, 0]
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)
# scale input data to have mean 0 and variance 1
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [2]:
# 2
# Create the function that returns the keras model
from keras.models import Sequential
from keras.layers import Dense
from keras.regularizers import l2
def build_model(lambda_parameter):
    model = Sequential()
    model.add(Dense(16, input_dim=30, activation='tanh', kernel_regularizer=l2(lambda_parameter)))
    model.add(Dense(8, activation='tanh', kernel_regularizer=l2(lambda_parameter)))
    model.add(Dense(4, activation='tanh', kernel_regularizer=l2(lambda_parameter)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model

Using TensorFlow backend.


In [3]:
# 2
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
# define a seed for random number generator so the result will be reproducible
import numpy
seed = 1
numpy.random.seed(seed)
# create the Keras wrapper with scikit learn
model = KerasClassifier(build_fn=build_model, verbose=0)
# define all the possible values for each hyperparameter
lambda_parameter = [0.001, 0.01, 0.1, 1]
epochs = [100, 150, 200]
batch_size = [10]
# create the dictionary containing all possible values of hyperparameters
param_grid = dict(lambda_parameter=lambda_parameter, epochs=epochs, batch_size=batch_size)
# perform 5-fold cross validation for 6 randomly selected combinations, store the results
grid_seach = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=6, cv=5)
results = grid_seach.fit(X, y)



In [4]:
# 2
# print the results for best cross validation score
print("Best cross validation score=", results.best_score_)
print("Parameters for Best cross validation score=", results.best_params_)
# print the results for all evaluated hyperparameter combinations
accuracy_means = results.cv_results_['mean_test_score']
accuracy_stds = results.cv_results_['std_test_score']
parameters = results.cv_results_['params']
for s in range(len(parameters)):
    print("Accuracy %f (std %f) for params %r" % (accuracy_means[s], accuracy_stds[s], parameters[s]))

Best cross validation score= 0.9771528954246133
Parameters for Best cross validation score= {'batch_size': 10, 'lambda_parameter': 0.1, 'epochs': 200}
Accuracy 0.975395 (std 0.010213) for params {'batch_size': 10, 'lambda_parameter': 0.1, 'epochs': 100}
Accuracy 0.627417 (std 0.132766) for params {'batch_size': 10, 'lambda_parameter': 1, 'epochs': 100}
Accuracy 0.970123 (std 0.014229) for params {'batch_size': 10, 'lambda_parameter': 0.001, 'epochs': 150}
Accuracy 0.977153 (std 0.013110) for params {'batch_size': 10, 'lambda_parameter': 0.1, 'epochs': 200}
Accuracy 0.970123 (std 0.008896) for params {'batch_size': 10, 'lambda_parameter': 0.01, 'epochs': 100}
Accuracy 0.964851 (std 0.018372) for params {'batch_size': 10, 'lambda_parameter': 0.1, 'epochs': 150}


In [5]:
# 3
from sklearn.model_selection import GridSearchCV
# define a seed for random number generator so the result will be reproducible
numpy.random.seed(seed)
# create the Keras wrapper with scikit learn
model = KerasClassifier(build_fn=build_model, verbose=0)
# define all the possible values for each hyperparameter
lambda_parameter = [0.05, 0.1, 0.15]
epochs = [200, 230]
batch_size = [10]
# create the dictionary containing all possible values of hyperparameters
param_grid = dict(lambda_parameter=lambda_parameter, epochs=epochs, batch_size=batch_size)
# search the grid, perform 5-fold cross validation for each possible combination, store the results
grid_seach = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
results = grid_seach.fit(X, y)

In [6]:
# 3
# print the results for best cross validation score
print("Best cross validation score=", results.best_score_)
print("Parameters for Best cross validation score=", results.best_params_)
# print the results for the entire grid
accuracy_means = results.cv_results_['mean_test_score']
accuracy_stds = results.cv_results_['std_test_score']
parameters = results.cv_results_['params']
for s in range(len(parameters)):
    print("Accuracy %f (std %f) for params %r" % (accuracy_means[s], accuracy_stds[s], parameters[s]))

Best cross validation score= 0.9789103640403815
Parameters for Best cross validation score= {'batch_size': 10, 'epochs': 230, 'lambda_parameter': 0.05}
Accuracy 0.971880 (std 0.016988) for params {'batch_size': 10, 'epochs': 200, 'lambda_parameter': 0.05}
Accuracy 0.975395 (std 0.014014) for params {'batch_size': 10, 'epochs': 200, 'lambda_parameter': 0.1}
Accuracy 0.975395 (std 0.011655) for params {'batch_size': 10, 'epochs': 200, 'lambda_parameter': 0.15}
Accuracy 0.978910 (std 0.006978) for params {'batch_size': 10, 'epochs': 230, 'lambda_parameter': 0.05}
Accuracy 0.973638 (std 0.011075) for params {'batch_size': 10, 'epochs': 230, 'lambda_parameter': 0.1}
Accuracy 0.966608 (std 0.021758) for params {'batch_size': 10, 'epochs': 230, 'lambda_parameter': 0.15}


In [9]:
# 4
# Create the function that returns the keras model
from keras.layers import Dropout
def build_model(rate):
    model = Sequential()
    model.add(Dense(16, input_dim=30, activation='tanh'))
    model.add(Dropout(rate))
    model.add(Dense(8, activation='tanh'))
    model.add(Dropout(rate))
    model.add(Dense(4, activation='tanh'))
    model.add(Dropout(rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model

In [10]:
# 4
# define a seed for random number generator so the result will be reproducible
numpy.random.seed(seed)
# create the Keras wrapper with scikit learn
model = KerasClassifier(build_fn=build_model, verbose=0)
# define all the possible values for each hyperparameter
rate = [0, 0.2, 0.4]
epochs = [150, 200]
batch_size = [10]
# create the dictionary containing all possible values of hyperparameters
param_grid = dict(rate=rate, epochs=epochs, batch_size=batch_size)
# perform 5-fold cross validation for 10 randomly selected combinations, store the results
grid_seach = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
results = grid_seach.fit(X, y)

In [11]:
# 4
# print the results for best cross validation score
print("Best cross validation score=", results.best_score_)
print("Parameters for Best cross validation score=", results.best_params_)
# print the results for the entire grid
accuracy_means = results.cv_results_['mean_test_score']
accuracy_stds = results.cv_results_['std_test_score']
parameters = results.cv_results_['params']
for s in range(len(parameters)):
    print("Accuracy %f (std %f) for params %r" % (accuracy_means[s], accuracy_stds[s], parameters[s]))

Best cross validation score= 0.9771528943770799
Parameters for Best cross validation score= {'rate': 0.4, 'batch_size': 10, 'epochs': 150}
Accuracy 0.973638 (std 0.019218) for params {'rate': 0, 'batch_size': 10, 'epochs': 150}
Accuracy 0.968366 (std 0.011887) for params {'rate': 0.2, 'batch_size': 10, 'epochs': 150}
Accuracy 0.977153 (std 0.011894) for params {'rate': 0.4, 'batch_size': 10, 'epochs': 150}
Accuracy 0.968366 (std 0.013156) for params {'rate': 0, 'batch_size': 10, 'epochs': 200}
Accuracy 0.971880 (std 0.008648) for params {'rate': 0.2, 'batch_size': 10, 'epochs': 200}
Accuracy 0.977153 (std 0.014238) for params {'rate': 0.4, 'batch_size': 10, 'epochs': 200}


In [12]:
# 5
# define a seed for random number generator so the result will be reproducible
numpy.random.seed(seed)
# create the Keras wrapper with scikit learn
model = KerasClassifier(build_fn=build_model, verbose=0)
# define all the possible values for each hyperparameter
rate = [0.3, 0.4, 0.5]
epochs = [180]
batch_size = [10]
# create the dictionary containing all possible values of hyperparameters
param_grid = dict(rate=rate, epochs=epochs, batch_size=batch_size)
# perform 5-fold cross validation for 10 randomly selected combinations, store the results
grid_seach = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
results = grid_seach.fit(X, y)

In [13]:
# 5
# print the results for best cross validation score
print("Best cross validation score=", results.best_score_)
print("Parameters for Best cross validation score=", results.best_params_)
# print the results for the entire grid
accuracy_means = results.cv_results_['mean_test_score']
accuracy_stds = results.cv_results_['std_test_score']
parameters = results.cv_results_['params']
for s in range(len(parameters)):
    print("Accuracy %f (std %f) for params %r" % (accuracy_means[s], accuracy_stds[s], parameters[s]))

Best cross validation score= 0.9753954247137784
Parameters for Best cross validation score= {'rate': 0.4, 'batch_size': 10, 'epochs': 180}
Accuracy 0.968366 (std 0.014247) for params {'rate': 0.3, 'batch_size': 10, 'epochs': 180}
Accuracy 0.975395 (std 0.016997) for params {'rate': 0.4, 'batch_size': 10, 'epochs': 180}
Accuracy 0.973638 (std 0.014668) for params {'rate': 0.5, 'batch_size': 10, 'epochs': 180}
