In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.utils import to_categorical

#Datasets
from tensorflow.keras.datasets import mnist


#Data

nb_dim_in = 784
nb_classes = 10
layer_size = 512
dropout = 0.3

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, nb_dim_in)
x_test = x_test.reshape(10000, nb_dim_in)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train /= 255
x_test /= 255

#One-hot encoding
y_train = to_categorical(y_train, nb_classes)
y_test = to_categorical(y_test, nb_classes)

#Model
model = Sequential()

model.add(Dense(layer_size, input_shape=(nb_dim_in, )))
model.add(Activation('relu'))
model.add(Dropout(dropout))

model.add(Dense(layer_size))
model.add(Activation('relu'))
model.add(Dropout(dropout))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

lr_schedule = ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=10000,
    decay_rate=0.9)

optimizer = SGD(learning_rate=lr_schedule)

loss = CategoricalCrossentropy(
    from_logits=False, label_smoothing=0,
    name='categorical_crossentropy'
)

model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

model.fit(x_train, y_train, epochs=15, verbose=1,
validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test score: 0.08590888977050781
Test accuracy: 0.9724000096321106


### Accuracy with 15 epoch
|Dropout / Neurones par couche | 128 | 256 | 512 |
|:-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|**.2**|.967|.971|**.972**|
|**.3**|.965|.970|**.972**|
|**.4**|.965|.969|.971|

The best parameters found so far are dropout=0.3 or 0.2 & layer_size=512 with an accuracy of 0.972