In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from keras import optimizers


Load and prepare the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset. Convert the samples from integers to floating-point numbers:

In [0]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

## Model with 1 hidden layer

In [0]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])


sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

Train and evaluate model:

In [26]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.23908916844427586, 0.932]

## Model with 3 hidden layer

In [0]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])


sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [28]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.25361136218309405, 0.9261]

## Model with 9 hidden layer

In [0]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])


sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [30]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[2.3020533473968507, 0.1135]

In [0]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.BatchNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])


sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [32]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.0919776431108825, 0.9738]

Hier sieht man deutlich, dass durch BN zwischen allen Layern das Ergebnis deutlich gestiegen ist. **Von knapp 11% auf 97%.** 