In [1]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers.core import Dense
from keras.callbacks import EarlyStopping
import pandas as pd
pd.set_option('Display.max_rows',None)
from IPython.display import display, clear_output


def xor_test(activation_function='sigmoid', learning_rate=0.1, verbose=0, optimizer='adam', epochs=1000):
    # the four different states of the XOR gate
    training_data = np.array([[0,0],[0,1],[1,0],[1,1]], "float32")

    # the four expected results in the same order
    target_data = np.array([[0],[1],[1],[0]], "float32")

    model = Sequential()
    model.add(Dense(16, input_dim=2, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    opt = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(loss='mean_squared_error',
                  optimizer=optimizer,
                  metrics=['binary_accuracy'])

    history = model.fit(training_data, target_data, epochs=epochs, verbose=verbose)
    #print(model.predict(training_data))
    return history.history


# Choosing the number of hidden neurons
* The number of hidden neurons should be between the size of the input layer and the size of the output layer.
* The number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer.
* The number of hidden neurons should be less than twice the size of the input layer.

https://www.heatonresearch.com/2017/06/01/hidden-layers.html

# Choosing the number of hidden layers
* In artificial neural networks, hidden layers are required if and only if the data must be separated non-linearly.

https://towardsdatascience.com/beginners-ask-how-many-hidden-layers-neurons-to-use-in-artificial-neural-networks-51466afa0d3e

In [None]:
activation_functions =  [
    'deserialize', 
    'elu', 
    'exponential', 
    'get', 
    'hard_sigmoid', 
    'linear', 
    'relu', 
    'selu', 
    'serialize', 
    'sigmoid', 
    'softmax', 
    'softplus', 
    'softsign', 
    'swish', 
    'tanh'
]

optimizers = [
    'SGD', 
    'RMSprop', 
    'Adam', 
    'Adadelta', 
    'Adagrad', 
    'Adamax', 
    'Nadam', 
    'Ftrl', 
    'rmsprop'
]

losses = [
    'BinaryCrossentropy',
    'CategoricalCrossentropy',
    'SparseCategoricalCrossentropy',
    'Poisson',
    'binary_crossentropy',
    'categorical_crossentropy',
    'sparse_categorical_crossentropy',
    'poisson',
    'KLDivergence',
    'kl_divergence'
]

learning_rates = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]

In [48]:
import numpy as np
import os
from keras.datasets import mnist
from keras.layers import *
from keras.models import *
from time import time
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 784)
X_test = X_test.reshape(X_test.shape[0], 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

from keras.utils import np_utils
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [241]:
# Use scikit-learn to grid search the learning rate and momentum
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
#from keras.optimizers import SGD

# Function to create model, required for KerasClassifier
def create_model(learn_rate=0.01, momentum=0, optimizer='rmsprop', activation='relu', loss='binary_crossentropy'):
    # Create model
    model = Sequential()
    model.add(Dense(name = "hidden_layer", units=512, input_shape = (784,), activation=activation))
    model.add(Dense(name = "output_layer", units=10, activation='sigmoid'))
    # Compile model
    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    #model.summary()
    return model

model = KerasClassifier(build_fn=create_model)

# grid search parameters
learn_rate = [0.3]#[0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.9] #[0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
optimizers = ['adam','rmsprop']
epochs = np.array([10])
batches = np.array([5])
activation = ['relu']#,'tanh','softmax']
loss = ['binary_crossentropy']#,'categorical_crossentropy']
param_grid = dict(
    learn_rate=learn_rate,
    momentum=momentum,
    optimizer=optimizers,
    nb_epoch=epochs,
    batch_size=batches,
    activation=activation,
    loss=loss
)

grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(X_train, Y_train)



In [227]:
para = pd.DataFrame.from_dict(grid_result.cv_results_['params'])
mean = pd.DataFrame(grid_result.cv_results_['mean_test_score'],columns=['mean_test_score'])
stds = pd.DataFrame(grid_result.cv_results_['std_test_score'],columns=['std_test_score'])
df = para.join(mean.join(stds)).sort_values('mean_test_score', ascending=False)
df.reset_index().drop(columns=['index'])

Unnamed: 0,batch_size,learn_rate,momentum,nb_epoch,optimizer,mean_test_score,std_test_score
0,20,0.2,0.9,10,adam,0.9601,0.00089
1,20,0.2,0.8,10,adam,0.959817,0.003588
2,20,0.2,0.2,10,adam,0.9598,0.001875
3,20,0.2,0.4,10,adam,0.959783,0.000674
4,20,0.2,0.6,10,adam,0.959733,0.001945
5,20,0.2,0.0,10,adam,0.9594,0.000552


In [230]:
para = pd.DataFrame.from_dict(grid_result.cv_results_['params'])
mean = pd.DataFrame(grid_result.cv_results_['mean_test_score'],columns=['mean_test_score'])
stds = pd.DataFrame(grid_result.cv_results_['std_test_score'],columns=['std_test_score'])
df = para.join(mean.join(stds)).sort_values('mean_test_score', ascending=False)
df.reset_index().drop(columns=['index'])

Unnamed: 0,batch_size,learn_rate,momentum,nb_epoch,optimizer,mean_test_score,std_test_score
0,20,0.2,0.9,10,adam,0.961517,0.001185
1,20,0.2,0.9,10,rmsprop,0.9597,0.002394


In [232]:
para = pd.DataFrame.from_dict(grid_result.cv_results_['params'])
mean = pd.DataFrame(grid_result.cv_results_['mean_test_score'],columns=['mean_test_score'])
stds = pd.DataFrame(grid_result.cv_results_['std_test_score'],columns=['std_test_score'])
df = para.join(mean.join(stds)).sort_values('mean_test_score', ascending=False)
df.reset_index().drop(columns=['index'])

Unnamed: 0,batch_size,learn_rate,momentum,nb_epoch,optimizer,mean_test_score,std_test_score
0,50,0.2,0.9,10,rmsprop,0.9534,0.000464
1,50,0.2,0.9,10,adam,0.95075,0.001368


In [235]:
para = pd.DataFrame.from_dict(grid_result.cv_results_['params'])
mean = pd.DataFrame(grid_result.cv_results_['mean_test_score'],columns=['mean_test_score'])
stds = pd.DataFrame(grid_result.cv_results_['std_test_score'],columns=['std_test_score'])
df = para.join(mean.join(stds)).sort_values('mean_test_score', ascending=False)
df.reset_index().drop(columns=['index'])

Unnamed: 0,batch_size,learn_rate,momentum,nb_epoch,optimizer,mean_test_score,std_test_score
0,10,0.2,0.9,10,adam,0.962483,0.003356
1,10,0.2,0.9,10,rmsprop,0.959133,0.001111


In [240]:
para = pd.DataFrame.from_dict(grid_result.cv_results_['params'])
mean = pd.DataFrame(grid_result.cv_results_['mean_test_score'],columns=['mean_test_score'])
stds = pd.DataFrame(grid_result.cv_results_['std_test_score'],columns=['std_test_score'])
df = para.join(mean.join(stds)).sort_values('mean_test_score', ascending=False)
df.reset_index().drop(columns=['index'])

Unnamed: 0,activation,batch_size,learn_rate,momentum,nb_epoch,optimizer,mean_test_score,std_test_score
0,relu,5,0.3,0.9,10,adam,0.967483,0.00128
1,relu,5,0.2,0.9,10,adam,0.96425,0.00131
2,relu,5,0.01,0.9,10,adam,0.964017,0.002
3,relu,5,0.001,0.9,10,adam,0.963917,0.001504
4,relu,5,0.1,0.9,10,adam,0.9613,0.002128


In [242]:
para = pd.DataFrame.from_dict(grid_result.cv_results_['params'])
mean = pd.DataFrame(grid_result.cv_results_['mean_test_score'],columns=['mean_test_score'])
stds = pd.DataFrame(grid_result.cv_results_['std_test_score'],columns=['std_test_score'])
df = para.join(mean.join(stds)).sort_values('mean_test_score', ascending=False)
df.reset_index().drop(columns=['index'])

Unnamed: 0,activation,batch_size,learn_rate,loss,momentum,nb_epoch,optimizer,mean_test_score,std_test_score
0,relu,5,0.3,categorical_crossentropy,0.9,10,adam,0.95885,0.002334
1,relu,5,0.3,categorical_crossentropy,0.9,10,rmsprop,0.95405,0.002369
