In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [7]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

In [8]:
X_train_full = X_train_full[:30000]
y_train_full = y_train_full[:30000]
X_test = X_test[:5000]
y_test = y_test[:5000]

In [9]:
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

In [10]:
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]

In [11]:
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [21]:
def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirt
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # classes 7,8,9 moved to 5,6,7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) 
    # binary classification: is it a shirt?
    return((X[~y_5_or_6], y_A), (X[y_5_or_6], y_B))

In [41]:
(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)

In [42]:
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)

In [43]:
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)

In [44]:
tf.random.set_seed(42)
np.random.seed(42)

In [60]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="relu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))
    

In [62]:
model_A.compile(loss="sparse_categorical_crossentropy",
               optimizer=keras.optimizers.SGD(lr=1e-3),
               metrics=["accuracy"])

In [63]:
history = model_A.fit(X_train_A, y_train_A, epochs=5,
                     validation_data=(X_valid_A, y_valid_A))

Train on 19875 samples, validate on 4014 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [64]:
model_A.save("my_model_A.h5")

In [67]:
model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation="relu"))
model_B.add(keras.layers.Dense(1, activation="softmax"))

In [68]:
model_B.compile(loss="binary_crossentropy",
               optimizer=keras.optimizers.SGD(lr=1e-3),
               metrics=["accuracy"])

In [69]:
history = model_B.fit(X_train_B, y_train_B, epochs=5,
                     validation_data=(X_valid_B, y_valid_B))

Train on 5125 samples, validate on 986 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [72]:
model_B.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 300)               235500    
_________________________________________________________________
dense_12 (Dense)             (None, 100)               30100     
_________________________________________________________________
dense_13 (Dense)             (None, 50)                5050      
_________________________________________________________________
dense_14 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_15 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_16 (Dense)             (None, 1)                

In [73]:
model_A_clone = keras.models.clone_model(model_A)

In [74]:
model_A_clone.set_weights(model_A.get_weights())

In [75]:
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
#set all the layer except the last layer to be non-trainable

In [76]:
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

In [77]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False

In [78]:
model_B_on_A.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 300)               235500    
_________________________________________________________________
dense_6 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_7 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_8 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_9 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_17 (Dense)             (None, 1)                

In [79]:
model_B_on_A.compile(loss="binary_crossentropy",
                    optimizer=keras.optimizers.SGD(lr=1e-3),
                    metrics=["accuracy"])

In [80]:
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=5,
                          validation_data=(X_valid_B, y_valid_B))

Train on 5125 samples, validate on 986 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [83]:
model_B.evaluate(X_test_B, y_test_B)



[7.64827382502906, 0.49844882]

In [84]:
model_B_on_A.evaluate(X_test_B, y_test_B)



[0.1715417182402379, 0.983454]