In [28]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf
import keras
from keras import layers
from keras.datasets import mnist

In [29]:
#### load data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)
# print(x_train[1, 1:4])
print(y_train.shape)
# -1:  to keep whatever value in the first dimension
# / 255.0: normalise between 0 and 1
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0
print(x_train.shape)
print(x_test.shape)


(60000, 28, 28)
(60000,)
(60000, 784)
(10000, 784)


In [31]:
#### sequential api - convenient but not flexible, one input and one output
## all layers at once
model = keras.Sequential(
    [
        keras.Input(
            shape=(28 * 28,)
        ),  # adding model input allows to run model summary, which gives more info
        layers.Dense(512, activation="relu"),
        layers.Dense(256, activation="relu"),
        layers.Dense(10),  # 1 layer for each digit
    ]
)

print(model.summary())

None


In [None]:
## adding layers one by one
model = keras.Sequential()
model.add(keras.Input(shape=(28 * 28,)))
model.add(layers.Dense(512, activation="relu"))

print(model.summary())  # common way of debugging

model.add(layers.Dense(256, activation="relu"))
model.add(layers.Dense(10))

model.compile(
    # SparseCategorical ... <-- no hot-encoding
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

model.evaluate(x_test, y_test, batch_size=32, verbose=2)

None
Epoch 1/5
1875/1875 - 7s - 4ms/step - accuracy: 0.9439 - loss: 0.1862
Epoch 2/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9758 - loss: 0.0784
Epoch 3/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9822 - loss: 0.0563
Epoch 4/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9870 - loss: 0.0414
Epoch 5/5
1875/1875 - 6s - 3ms/step - accuracy: 0.9894 - loss: 0.0327
313/313 - 0s - 1ms/step - accuracy: 0.9815 - loss: 0.0707


[0.07071422040462494, 0.9815000295639038]

In [38]:
## get features of particular layer
model = keras.Sequential()
model.add(keras.Input(shape=(28 * 28,)))
model.add(layers.Dense(512, activation="relu", name="first_layer"))
model.add(layers.Dense(256, activation="relu", name="second_layer"))
model.add(layers.Dense(10))


model1 = keras.Model(inputs=model.inputs, outputs=[model.layers[-2].output])
feature1 = model1.predict(x_train)
print(feature1.shape)

model2 = keras.Model(
    inputs=model.inputs, outputs=[model.get_layer("second_layer").output]
)
feature2 = model2.predict(x_train)
print(feature2.shape)

models = keras.Model(
    inputs=model.inputs, outputs=[layer.output for layer in model.layers]
)
features = models.predict(x_train)
for feat in features:
    print(feat.shape)

[1m 159/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 956us/step

Expected: ['keras_tensor_49']
Received: inputs=Tensor(shape=(32, 784))


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 873us/step
(60000, 256)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 884us/step
(60000, 256)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
(60000, 512)
(60000, 256)
(60000, 10)


In [36]:
#### Functional api - a bit more flexible
inputs = keras.Input(shape=(28 * 28,))
x = layers.Dense(512, activation="relu", name="first_layer")(inputs)
x = layers.Dense(256, activation="relu", name="second_layer")(x)
outputs = layers.Dense(10, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9442 - loss: 0.1860
Epoch 2/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9758 - loss: 0.0787
Epoch 3/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9829 - loss: 0.0541
Epoch 4/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9874 - loss: 0.0400
Epoch 5/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9882 - loss: 0.0346
313/313 - 0s - 1ms/step - accuracy: 0.9784 - loss: 0.0777


[0.07772306352853775, 0.9783999919891357]

In [50]:
## try to achieve 98.2%+ on the test data
## 98.36%, batch size to 50, epochs to 30
model = keras.Sequential()
model.add(keras.Input(shape=(28 * 28,)))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(256, activation="relu"))
model.add(layers.Dense(10))

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=50, epochs=30, verbose=2)

model.evaluate(x_test, y_test, batch_size=50, verbose=2)

Epoch 1/30
1200/1200 - 4s - 4ms/step - accuracy: 0.9434 - loss: 0.1908
Epoch 2/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9767 - loss: 0.0755
Epoch 3/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9840 - loss: 0.0510
Epoch 4/30
1200/1200 - 3s - 3ms/step - accuracy: 0.9878 - loss: 0.0387
Epoch 5/30
1200/1200 - 3s - 3ms/step - accuracy: 0.9898 - loss: 0.0306
Epoch 6/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9915 - loss: 0.0258
Epoch 7/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9927 - loss: 0.0231
Epoch 8/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9943 - loss: 0.0172
Epoch 9/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9938 - loss: 0.0188
Epoch 10/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9956 - loss: 0.0134
Epoch 11/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9955 - loss: 0.0143
Epoch 12/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9953 - loss: 0.0145
Epoch 13/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9961 - loss: 0.0114
Epoch 14/30
1200/1200 - 4s - 3ms/step - accuracy: 0.9965 - loss: 0.0123
E

[0.11630605161190033, 0.9836000204086304]

In [52]:
## different optimizers than adam eg) gradient descent with momentum, adagrad and rmsprop
model = keras.Sequential()
model.add(keras.Input(shape=(28 * 28,)))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(256, activation="relu"))
model.add(layers.Dense(10))

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adagrad(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 5s - 3ms/step - accuracy: 0.7729 - loss: 1.0189
Epoch 2/5
1875/1875 - 5s - 3ms/step - accuracy: 0.8874 - loss: 0.4375
Epoch 3/5
1875/1875 - 5s - 2ms/step - accuracy: 0.9031 - loss: 0.3566
Epoch 4/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9122 - loss: 0.3187
Epoch 5/5
1875/1875 - 5s - 2ms/step - accuracy: 0.9187 - loss: 0.2945
313/313 - 0s - 1ms/step - accuracy: 0.9261 - loss: 0.2706


[0.27060666680336, 0.9261000156402588]

In [None]:
## check impact of normalization
## lower performance
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype("float32")
x_test = x_test.reshape(-1, 28 * 28).astype("float32")

model = keras.Sequential()
model.add(keras.Input(shape=(28 * 28,)))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(256, activation="relu"))
model.add(layers.Dense(10))

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 6s - 3ms/step - accuracy: 0.9027 - loss: 1.4637
Epoch 2/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9457 - loss: 0.2011
Epoch 3/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9558 - loss: 0.1619
Epoch 4/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9618 - loss: 0.1448
Epoch 5/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9662 - loss: 0.1274
313/313 - 0s - 1ms/step - accuracy: 0.9601 - loss: 0.1529


[0.15287744998931885, 0.960099995136261]