In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

Read Data

In [3]:
import gzip
import sys
import pickle
f = gzip.open('mnist.pkl.gz', 'rb')
if sys.version_info < (3,):
    data = pickle.load(f)
else:
    data = pickle.load(f, encoding='bytes')
f.close()
(x_train, y_train), (x_test, y_test) = data

x_train.shape

(60000, 28, 28)

Reshape data:  28*28 matrix --> a vector

In [4]:
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0

Sequential API (very convenient, not very flexible)

In [35]:
model = keras.Sequential(
  [
    keras.Input(shape=(28*28,)),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu', name='my_layer'),
    layers.Dense(10)
  ]
)

model.compile(
  # from_logits=True means the output is not passed through an activation function like softmax (raw logits).
  loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  optimizer=keras.optimizers.Adam(learning_rate=0.001),
  metrics=['accuracy']
)

print(model.summary())

None


The 2 following blocks are used for debugging

In [36]:
# model = keras.Model(inputs=model.inputs,
#                     outputs=[model.layers[-2].output])
model = keras.Model(inputs=model.inputs,
                    outputs=[model.get_layer('my_layer').output])
feature = model.predict(x_train)
print(feature.shape)

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step
(60000, 256)


In [37]:
model = keras.Model(inputs=model.inputs,
                    outputs=[layer.output for layer in model.layers])
features = model.predict(x_train)
for feature in features:
  print(feature.shape)

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step
(60000, 784)
(60000, 512)
(60000, 256)


In [46]:
model = keras.Sequential()
model.add(keras.Input(shape=(784,)))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

model.compile(
  # from_logits=True means the output is not passed through an activation function like softmax (raw logits).
  loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  optimizer=keras.optimizers.Adam(learning_rate=0.001),
  metrics=['accuracy']
)

model.summary()

In [47]:
model.fit(x_train, y_train, batch_size=32, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/10
1875/1875 - 20s - 11ms/step - accuracy: 0.9377 - loss: 0.2080
Epoch 2/10
1875/1875 - 21s - 11ms/step - accuracy: 0.9737 - loss: 0.0907
Epoch 3/10
1875/1875 - 18s - 10ms/step - accuracy: 0.9788 - loss: 0.0696
Epoch 4/10
1875/1875 - 18s - 10ms/step - accuracy: 0.9842 - loss: 0.0531
Epoch 5/10
1875/1875 - 19s - 10ms/step - accuracy: 0.9867 - loss: 0.0435
Epoch 6/10
1875/1875 - 19s - 10ms/step - accuracy: 0.9887 - loss: 0.0371
Epoch 7/10
1875/1875 - 18s - 10ms/step - accuracy: 0.9906 - loss: 0.0311
Epoch 8/10
1875/1875 - 18s - 10ms/step - accuracy: 0.9920 - loss: 0.0271
Epoch 9/10
1875/1875 - 18s - 10ms/step - accuracy: 0.9930 - loss: 0.0249
Epoch 10/10
1875/1875 - 18s - 10ms/step - accuracy: 0.9936 - loss: 0.0233
313/313 - 1s - 4ms/step - accuracy: 0.9824 - loss: 0.0746


[0.07459374517202377, 0.9824000000953674]

I tryed to increase the accuracy of test set to be more thatn 98.2%. I increased the depth of neural network to 5 layers and also I increased the training time

Functional API (A bit more flexible)

In [28]:
inputs = keras.Input(shape=(784,))
x = layers.Dense(512, activation='relu')(inputs)
x = layers.Dense(256, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = keras.Model(inputs=inputs, outputs=outputs)


model.compile(
  loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  optimizer=keras.optimizers.Adam(learning_rate=0.001),
  metrics=['accuracy']
)

print(model.summary())


None


In [29]:
model.fit(x_train, y_train,batch_size=32, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 18s - 10ms/step - accuracy: 0.9430 - loss: 0.1865
Epoch 2/5
1875/1875 - 16s - 9ms/step - accuracy: 0.9746 - loss: 0.0809
Epoch 3/5
1875/1875 - 16s - 9ms/step - accuracy: 0.9832 - loss: 0.0538
Epoch 4/5
1875/1875 - 16s - 9ms/step - accuracy: 0.9867 - loss: 0.0421
Epoch 5/5
1875/1875 - 16s - 9ms/step - accuracy: 0.9890 - loss: 0.0332
313/313 - 1s - 4ms/step - accuracy: 0.9801 - loss: 0.0796


[0.07961393147706985, 0.9800999760627747]

In [38]:
my_NN = keras.Sequential([
  keras.Input(shape=(784,)),
  layers.Dense(512, activation='relu'),
  layers.Dense(256, activation='relu'),
  layers.Dense(10, activation='softmax')
])

my_NN.compile(
  loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9),
  metrics=['accuracy']
)

In [39]:
my_NN.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
my_NN.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 14s - 8ms/step - accuracy: 0.9252 - loss: 0.2539
Epoch 2/5
1875/1875 - 15s - 8ms/step - accuracy: 0.9706 - loss: 0.0984
Epoch 3/5
1875/1875 - 18s - 10ms/step - accuracy: 0.9805 - loss: 0.0648
Epoch 4/5
1875/1875 - 13s - 7ms/step - accuracy: 0.9858 - loss: 0.0466
Epoch 5/5
1875/1875 - 14s - 7ms/step - accuracy: 0.9896 - loss: 0.0339
313/313 - 1s - 4ms/step - accuracy: 0.9794 - loss: 0.0654


[0.06537753343582153, 0.9793999791145325]