In [2]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.datasets import mnist
from keras.utils import np_utils

Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][width][height][channels]
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255.
X_test = X_test / 255.
print(X_train.shape)

(60000, 28, 28, 1)


In [4]:
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

In [None]:
#We put the model.fit inside the loop because we want to obtain an array of the errors with the same loss function
#for calculate the mean and the standard deviation of our loss function.

In [4]:
#We first try with the categorical crossentropy loss function because it's one of the typical error function that NN use

In [5]:
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
scores_vector = []
for iteration in range(0,9):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=200, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    scores_vector.append((100-scores[1]*100))
    print("Baseline Error: %.2f%%" % (100-scores[1]*100))

Baseline Error: 0.97%
Baseline Error: 0.98%
Baseline Error: 1.01%
Baseline Error: 0.93%
Baseline Error: 0.97%
Baseline Error: 0.98%
Baseline Error: 1.01%
Baseline Error: 1.00%
Baseline Error: 1.09%


In [8]:
print("Mean value upon 10 iterations: " + str(np.mean(scores_vector)))
print("Standard Deviation upon 10 iterations: " + str(np.std(scores_vector)))

Mean value upon 10 iterations: 0.9933333333333346
Standard Deviation upon 10 iterations: 0.04136557881997198


In [7]:
#Now we try with the MSE loss function because it's one of the typical error function that NN use

In [9]:
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
scores_vector = []
for iteration in range(0,9):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=200, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    scores_vector.append((100-scores[1]*100))
    print("Baseline Error: %.2f%%" % (100-scores[1]*100))

Baseline Error: 0.94%
Baseline Error: 1.02%
Baseline Error: 0.98%
Baseline Error: 0.90%
Baseline Error: 0.79%
Baseline Error: 1.05%
Baseline Error: 0.97%
Baseline Error: 0.97%
Baseline Error: 0.90%


In [10]:
print("Mean value upon 10 iterations: " + str(np.mean(scores_vector)))
print("Standard Deviation upon 10 iterations: " + str(np.std(scores_vector)))

Mean value upon 10 iterations: 0.9466666666666678
Standard Deviation upon 10 iterations: 0.07241853660799837


In [1]:
#Now we try with the squared hinge function because it is a function commonly used in classifiers, 
#especially in SVMs and when there are only two classes. 
#However, we have used it because it is also a good measure when you want to compare a class against another
#or a class against all.

In [11]:
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='squared_hinge', optimizer='adam', metrics=['accuracy'])
scores_vector = []
for iteration in range(0,9):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=200, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    scores_vector.append((100-scores[1]*100))
    print("Baseline Error: %.2f%%" % (100-scores[1]*100))

Baseline Error: 0.95%
Baseline Error: 1.03%
Baseline Error: 1.01%
Baseline Error: 1.05%
Baseline Error: 1.06%
Baseline Error: 0.98%
Baseline Error: 1.02%
Baseline Error: 0.98%
Baseline Error: 1.01%


In [12]:
print("Mean value upon 10 iterations: " + str(np.mean(scores_vector)))
print("Standard Deviation upon 10 iterations: " + str(np.std(scores_vector)))

Mean value upon 10 iterations: 1.0100000000000005
Standard Deviation upon 10 iterations: 0.033333333333334755


In [None]:
#Now we try with the squared Kullback–Leibler divergence function because it's a good measure for the deviation
#of the error.

In [5]:
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='kullback_leibler_divergence', optimizer='adam', metrics=['accuracy'])
scores_vector = []
for iteration in range(0,9):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=200, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    scores_vector.append((100-scores[1]*100))
    print("Baseline Error: %.2f%%" % (100-scores[1]*100))

Baseline Error: 0.91%
Baseline Error: 0.93%
Baseline Error: 0.97%
Baseline Error: 0.90%
Baseline Error: 0.92%
Baseline Error: 0.92%
Baseline Error: 1.03%
Baseline Error: 0.95%
Baseline Error: 1.07%


In [6]:
print("Mean value upon 10 iterations: " + str(np.mean(scores_vector)))
print("Standard Deviation upon 10 iterations: " + str(np.std(scores_vector)))

Mean value upon 10 iterations: 0.9555555555555549
Standard Deviation upon 10 iterations: 0.05499719409228882


In [6]:
#We observe both results, we conclude the Mean Squared Error is a better loss function in this case as it gives a lower mean.