In [1]:
import numpy as np
# import pandas as pd
import tensorflow as tf
from tensorflow import keras

##### loading data set into train and validation

In [2]:
(X_train_full, y_train_full),(X_test,y_test) = keras.datasets.fashion_mnist.load_data()

##### splitting data set into train and test

In [3]:
X_train_full =X_train_full[:30000]
y_train_full =y_train_full[:30000]

In [4]:
X_test = X_test[:5000]
y_test = y_test[:5000]

##### Scale the train and test  by dividing by 255

In [5]:
X_train_full = X_train_full/255.0
X_test = X_test/255.0

#### dividing the train set in other to get a validation set

In [6]:
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

#### function for splitting the dataset

In [7]:
def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A), (X[y_5_or_6], y_B))

In [8]:
(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)

#### Setting seed 

In [9]:
tf.random.set_seed(50)
np.random.seed(50)

##### building function 

In [10]:
model_A = keras.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28,28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))

##### compiling the method

In [11]:
model_A.compile(loss= "sparse_categorical_crossentropy",
    optimizer= keras.optimizers.SGD(lr=1e-3),
    metrics=["accuracy"])

##### Fitting model A

In [12]:
history = model_A.fit(X_train_A, y_train_A, epochs=15, validation_data=(X_valid_A, y_valid_A))

Train on 19875 samples, validate on 4014 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [13]:
model_A.save("my_model_A.h5")

##### Building model B

In [14]:
model_B = keras.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28,28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation='relu'))
model_B.add(keras.layers.Dense(1, activation='softmax'))

##### compiling model

In [15]:
model_B.compile(loss="binary_crossentropy",
    optimizer= keras.optimizers.Adam(lr=1e-3),
    metrics=["accuracy"])

In [16]:
history = model_B.fit(X_train_B, y_train_B, epochs=15,
            validation_data=(X_valid_B, y_valid_B))

Train on 5125 samples, validate on 986 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [17]:
model_A.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_3 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_4 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 4

In [18]:
model_B.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 300)               235500    
_________________________________________________________________
dense_7 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_8 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_9 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_10 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                

##### Cloning model A

In [19]:
model_A_clone = keras.models.clone_model(model_A)

In [20]:
model_A_clone.set_weights(model_A.get_weights())

###### creating a new model_B_on_A based on model_A 

In [21]:
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])

In [22]:
model_B_on_A.add(keras.layers.Dense(1, activation='sigmoid'))

In [23]:
for layers in model_B_on_A.layers[:-1]:
    layers.trainable=False

In [24]:
model_B_on_A.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_3 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_4 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_12 (Dense)             (None, 1)                

In [25]:
model_B_on_A.compile(loss="binary_crossentropy",
     optimizer=keras.optimizers.SGD(lr=1e-3),
     metrics=["accuracy"])

In [26]:
model_B_on_A.fit(X_train_B, y_train_B, epochs=15, validation_data=(X_valid_B, y_valid_B))

Train on 5125 samples, validate on 986 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7fcc381aafd0>

In [27]:
model_B_on_A.save("real_model")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: real_model/assets


#### evaluating the model with transfer learning model

In [28]:
model_B.evaluate(X_test_B, y_test_B)



[7.64827382502906, 0.49844882]

In [29]:
model_B_on_A.evaluate(X_test_B, y_test_B)



[0.03175078355153443, 0.9958635]

#### we can conclude that the train transfer train model performs more better than the train model given that it has being able to learn accross time that even with small that set it will perform way better than the train model