# Intro

In [16]:
import numpy as np
import keras

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print('y_train shape:', y_train.shape)

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

x_train shape: (60000, 28, 28, 1)
y_train shape: (60000,)
60000 train samples
10000 test samples


In [4]:
num_classes = 10
input_shape = (28,28,1)

model = keras.Sequential(
    [
        keras.layers.Input(shape=input_shape),
        keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
        keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2,2)),
        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation="softmax"),
    ]
)

In [5]:
model.summary()

In [6]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name='acc'),
    ],
)

In [8]:
batch_size = 128
epochs = 10

callbacks = [
    keras.callbacks.ModelCheckpoint(filepath='model_at_epoch_{epoch}.keras'),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=2),
]

model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.15,
    callbacks=callbacks,
)

Epoch 1/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 275ms/step - acc: 0.9333 - loss: 0.2200 - val_acc: 0.9764 - val_loss: 0.0828
Epoch 2/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 314ms/step - acc: 0.9527 - loss: 0.1602 - val_acc: 0.9820 - val_loss: 0.0606
Epoch 3/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 336ms/step - acc: 0.9633 - loss: 0.1254 - val_acc: 0.9856 - val_loss: 0.0523
Epoch 4/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 322ms/step - acc: 0.9692 - loss: 0.1028 - val_acc: 0.9870 - val_loss: 0.0453
Epoch 5/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 309ms/step - acc: 0.9731 - loss: 0.0920 - val_acc: 0.9890 - val_loss: 0.0405
Epoch 6/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 284ms/step - acc: 0.9765 - loss: 0.0796 - val_acc: 0.9883 - val_loss: 0.0431
Epoch 7/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x26603bc5e50>

In [10]:
score = model.evaluate(x_test, y_test, verbose=0)
score

[0.02511001192033291, 0.9922000169754028]

In [11]:
model.save('final_model.keras')

In [12]:
model = keras.saving.load_model('final_model.keras')

In [13]:
predictions = model.predict(x_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step


## custom components

In [25]:
class MyDense(keras.layers.Layer):
    def __init__(self, units, activation=None, name=None):
        super().__init__(name=name)
        self.units = units
        self.activation = keras.activations.get(activation)

    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.w = self.add_weight(
            shape=(input_dim, self.units),
            initializer=keras.initializers.GlorotNormal(),
            name="kernel",
            trainable=True,
        )

        self.b = self.add_weight(
            shape=(self.units,),
            initializer=keras.initializers.Zeros(),
            name="bias",
            trainable=True,
        )

    def call(self, inputs):
        x = keras.ops.matmul(inputs, self.w) + self.b
        return self.activation(x)

In [26]:
class MyDropout(keras.layers.Layer):
    def __init__(self, rate, name=None):
        super().__init__(name=name)
        self.rate = rate
        self.seed_generator = keras.random.SeedGenerator(1337)

    def call(self, inputs):
        return keras.random.dropout(inputs, self.rate, seed=self.seed_generator)

In [27]:
class MyModel(keras.Model):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_base = keras.Sequential(
            [
                keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
                keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
                keras.layers.MaxPooling2D(pool_size=(2, 2)),
                keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
                keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
                keras.layers.GlobalAveragePooling2D(),
            ]
        )
        self.dp = MyDropout(0.5)
        self.dense = MyDense(num_classes, activation='softmax')

    def call(self, x):
        x = self.conv_base(x)
        x = self.dp(x)
        return self.dense(x)

In [28]:
model = MyModel(num_classes=10)
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name="acc"),
    ],
)

model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=1,  # For speed
    validation_split=0.15,
)

[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 276ms/step - acc: 0.7449 - loss: 0.7480 - val_acc: 0.9256 - val_loss: 0.2374


<keras.src.callbacks.history.History at 0x2663c3f8c30>

In [29]:
import torch
train_torch_dataset = torch.utils.data.TensorDataset(
    torch.from_numpy(x_train), torch.from_numpy(y_train)
)
val_torch_dataset = torch.utils.data.TensorDataset(
    torch.from_numpy(x_test), torch.from_numpy(y_test)
)

train_dataloader = torch.utils.data.DataLoader(
    train_torch_dataset, batch_size=batch_size, shuffle=True
)
val_dataloader = torch.utils.data.DataLoader(
    val_torch_dataset, batch_size=batch_size, shuffle=False
)

model = MyModel(num_classes=10)
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name='acc')
    ],
)
model.fit(train_dataloader, epochs=1, validation_data=val_dataloader)

[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 272ms/step - acc: 0.7743 - loss: 0.6703 - val_acc: 0.9253 - val_loss: 0.2405


<keras.src.callbacks.history.History at 0x2663c1e3bf0>

In [31]:
import tensorflow as tf

train_dataset = (
    tf.data.Dataset.from_tensor_slices((x_train, y_train))
    .batch(batch_size)
    .prefetch(tf.data.AUTOTUNE)
)
test_dataset = (
    tf.data.Dataset.from_tensor_slices((x_test, y_test))
    .batch(batch_size)
    .prefetch(tf.data.AUTOTUNE)
)

model = MyModel(num_classes=10)
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name="acc"),
    ],
)
model.fit(train_dataset, epochs=1, validation_data=test_dataset)

[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 344ms/step - acc: 0.7458 - loss: 0.7501 - val_acc: 0.9110 - val_loss: 0.2822


<keras.src.callbacks.history.History at 0x2663a3cbbd0>