In [8]:
from keras import datasets

(X_train, y_train), (X_test, y_test) = datasets.fashion_mnist.load_data()
assert X_train.shape == (60000, 28, 28)
assert X_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

X_train = X_train.astype("float32") / 255
X_test = X_test.astype("float32") / 255

X_val = X_train[50000:]
y_val = y_train[50000:]

X_train = X_train[:50000]
y_train = y_train[:50000]

assert X_train.shape == (50000, 28, 28)
assert y_train.shape == (50000,)
assert X_val.shape == (10000, 28, 28)
assert y_val.shape == (10000,)

In [46]:
import numpy as np

def split_datasets(X, y):
    """
    Split dataset into:
    A: samples excluding classes 6 and 7
    B: samples of only classes 6 and 7, converted to binary labels
    """
    maskB = (y == 6) | (y == 7)
    maskA = ~maskB

    XA, yA = X[maskA], y[maskA]
    XB, yB = X[maskB], y[maskB]

    # Update labels for A: remove classes 6 and 7
    yA = yA.copy()
    yA[yA > 7] -= 2

    # Update labels for B: convert to binary (0 for class 7, 1 for class 6)
    yB = (yB == 6).astype(np.float32)
    return (XA, yA), (XB, yB)

(X_train_A, y_train_A), (X_train_B, y_train_B) = split_datasets(X_train, y_train)
(X_val_A, y_val_A), (X_val_B, y_val_B) = split_datasets(X_val, y_val)

In [58]:
import joblib
from keras import models
from keras import layers
from keras import utils
from keras import optimizers

new_training = False
if new_training:
    modelA = models.Sequential()
    modelA.add(layers.Flatten(input_shape=(28, 28)))

    # add 5 hidden layers
    for n in [300, 100, 50, 50, 50]:
        modelA.add(layers.Dense(n, activation="elu", kernel_initializer="he_normal"))

    modelA.add(layers.Dense(8, activation="softmax"))
    # modelA.summary()
    # utils.plot_model(modelA, show_shapes=True)

    modelA.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=optimizers.SGD(learning_rate=1e-3),
        metrics=["accuracy"],
    )

    epoch = 20
    H = modelA.fit(X_train_A, y_train_A, epochs=epoch, validation_data=(X_val_A, y_val_A))
    history = H.history

    joblib.dump(history, "histories/transfer_learning")
    modelA.save("models/transfer_learning.keras")
else:
    history = joblib.load("histories/transfer_learning")
    modelA = models.load_model("models/transfer_learning.keras")
    # print(modelA.summary())
    # modelA.compile(
    #     loss="sparse_categorical_crossentropy",
    #     optimizer=optimizers.SGD(learning_rate=1e-3),
    #     metrics=["accuracy"],
    # )
    epoch = 20
    H = modelA.fit(X_train_A, y_train_A, epochs=epoch, validation_data=(X_val_A, y_val_A))
    history = H.history
    
    print(history["accuracy"])

Epoch 1/20
[1m1248/1248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9252 - loss: 0.2225 - val_accuracy: 0.9194 - val_loss: 0.2441
Epoch 2/20
[1m1248/1248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9277 - loss: 0.2161 - val_accuracy: 0.9172 - val_loss: 0.2426
Epoch 3/20
[1m1248/1248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9262 - loss: 0.2156 - val_accuracy: 0.9198 - val_loss: 0.2389
Epoch 4/20
[1m1248/1248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9252 - loss: 0.2203 - val_accuracy: 0.9188 - val_loss: 0.2400
Epoch 5/20
[1m1248/1248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9276 - loss: 0.2127 - val_accuracy: 0.9183 - val_loss: 0.2394
Epoch 6/20
[1m1248/1248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9272 - loss: 0.2160 - val_accuracy: 0.9186 - val_loss: 0.2386
Epoch 7/20
[1m1

In [61]:
import joblib
from keras import models

modelA = models.load_model("models/transfer_learning.keras")

modelB = models.Sequential(modelA.layers[:-1])
modelB.add(layers.Dense(1, activation="sigmoid"))

for layer in modelB.layers[:2]:
    layer.trainable = False

modelB.compile(
    loss="binary_crossentropy",
    optimizer=optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)

H = modelB.fit(X_train_B, y_train_B, epochs=epoch, validation_data=(X_val_B, y_val_B))
history = H.history

joblib.dump(history, "histories/transfer_learning_binary")
modelB.save("models/transfer_learning_binary.keras")

Epoch 1/20
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9973 - loss: 0.0602 - val_accuracy: 0.9990 - val_loss: 0.0187
Epoch 2/20
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 999us/step - accuracy: 0.9989 - loss: 0.0169 - val_accuracy: 0.9990 - val_loss: 0.0115
Epoch 3/20
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 941us/step - accuracy: 0.9994 - loss: 0.0103 - val_accuracy: 0.9990 - val_loss: 0.0087
Epoch 4/20
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 841us/step - accuracy: 0.9993 - loss: 0.0088 - val_accuracy: 0.9995 - val_loss: 0.0072
Epoch 5/20
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 820us/step - accuracy: 0.9989 - loss: 0.0087 - val_accuracy: 0.9995 - val_loss: 0.0063
Epoch 6/20
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 829us/step - accuracy: 0.9993 - loss: 0.0061 - val_accuracy: 0.9995 - val_loss: 0.0056
Epoch 7/20
[1m315