In [1]:
# Using Keras to create a 10 neurons and single layer NN
# Against the MNIST databesa

from keras.datasets import mnist
from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers.core import Dense, Dropout, Activation
from keras.utils import np_utils
import keras.backend as K
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (7,7) # Make the figures a bit bigger
nb_classes = 10

np.random.seed(42)
%matplotlib notebook

Using TensorFlow backend.


In [2]:
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print("X_train original shape", X_train.shape)
print("y_train original shape", y_train.shape)

# Show some images
def show(X_train, y_train):
	for i in range(9):
	    plt.subplot(3,3,i+1)
	    plt.imshow(X_train[i], cmap='gray', interpolation='none')
	    plt.title("Class {}".format(y_train[i]))
	plt.show()

# Swish activation function
# x*sigmoid(x)
def swish(x):
    return x*K.sigmoid(x)

# Custom activation function 1
# mix between relu and positive part of swish mirrored across x=1
def e_swish_1(x):
    return K.maximum(0.0, x*(2-K.sigmoid(x)))

# Custom activation function 2
# positive part of swish mirrored across x=1
def e_swish_2(x):
    return K.maximum(x*K.sigmoid(x), x*(2-K.sigmoid(x)))

def e_swish_5(x):
    return K.maximum(-x*K.sigmoid(-x), x*K.sigmoid(x))

show(X_train, y_train)

# Reshape arrays
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print("Training matrix shape", X_train.shape)
print("Testing matrix shape", X_test.shape)

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

X_train original shape (60000, 28, 28)
y_train original shape (60000,)


<IPython.core.display.Javascript object>

Training matrix shape (60000, 784)
Testing matrix shape (10000, 784)


In [4]:
activations = [e_swish_1, e_swish_2, e_swish_5, swish, "relu", "elu"]
names = activations[:]
for i,a in enumerate(names):
    if not isinstance(a, str):
        names[i] = a.__name__
print(names)

['e_swish_1', 'e_swish_2', 'e_swish_5', 'swish', 'relu', 'sigmoid', 'tanh']


In [5]:
def create_models():
    models = []

    sgd = SGD(lr=0.1)
    # create models - ALCAIDES, SWISH, RELU, DIGMOID, TANH
    for act in activations:
        model1 = Sequential()
        model1.add(Dense(200, input_dim=784, activation=act))
        model1.add(Dropout(0.2))
        model1.add(Dense(100, activation=act))
        model1.add(Dropout(0.2))
        model1.add(Dense(60, activation=act))
        model1.add(Dropout(0.2))
        model1.add(Dense(30, activation=act))
        model1.add(Dropout(0.2))
        model1.add(Dense(10, activation='softmax'))
        models.append(model1)

    # Compile models
    for model in models:
        model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    # Create Accuracy history
    acc = []
    for model in models: acc.append([0])
        
    return models, acc

In [6]:
def test_against(models, epochs, acc):
    historigrams = []
    accs = []
    for i,model in enumerate(models):
        his = model.fit(X_train, Y_train, epochs=epochs, batch_size=64,  verbose=1, validation_data=(X_test, Y_test))
        historigrams.append(his.history)
        accs.append(his.history["val_acc"])
    return models, accs

# Plot the evolution of the accuracies
def plot_results(accs):
	plt.figure()
	for acc in accs:
		plt.plot(acc)
	plt.grid()
	plt.legend(names, loc='upper left')
	plt.show()

In [7]:
kernels, acc = create_models()

In [8]:
kernels, acc = test_against(kernels, 25, acc)
# kernels_, acc = test_against(kernels_, 20, acc)

Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Train on 60000 samples, validate on 10000 samples
Epoch 1/1


In [9]:
plot_results(acc)

<IPython.core.display.Javascript object>