<a href="https://colab.research.google.com/github/VinayakDubey07/Deep-Learning-Concepts/blob/main/dl_lab5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np


In [2]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
len(X_train_full)

60000

In [4]:
len(X_test)

10000

In [5]:
#train the data
X_train_full = X_train_full[:30000]
y_train_full = y_train_full[:30000]

In [6]:
X_test = X_test[:5000]
y_test = y_test[:5000]

In [7]:
#scaling the datasets
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

In [8]:
#divinding the datasets into training and validation set
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [9]:
def split_dataset(X, y):
  y_5_or_6 = (y==5) | (y==6)
  y_A = y[~y_5_or_6]
  y_A[y_A > 6] -= 2
  y_B = (y[y_5_or_6] == 6).astype(np.float32)
  return ((X[~y_5_or_6], y_A),
          (X[y_5_or_6], y_B))

In [10]:
(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)

In [11]:
X_train_B = X_train_B[:200]
y_train_B = y_train_B[:200]

In [12]:
len(X_train_B)

200

In [13]:
X_train_A.shape

(19875, 28, 28)

In [14]:
X_train_B.shape

(200, 28, 28)

In [15]:
y_train_A[:30]

array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,
       1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)

In [16]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
  model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))

In [17]:
#compiling the model
model_A.compile(loss="sparse_categorical_crossentropy",
                optimizer=keras.optimizers.SGD(lr=1e-3),
                metrics=["accuracy"])



In [18]:
#training the model
history = model_A.fit(X_train_A, y_train_A, epochs=5, validation_data=(X_valid_A, y_valid_A))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
model_A.save("my_model_A.h5") # saving the model we created

In [20]:
# defining the model
model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
  model_B.add(keras.layers.Dense(n_hidden, activation="selu"))
model_B.add(keras.layers.Dense(1, activation="sigmoid"))

In [21]:
# compiling the model with binary crossentropy
# that can accept either logits (i.e values from last linear node, z)
# or probabilities from the last Sigmoid node
model_B.compile(loss="binary_crossentropy",

optimizer=keras.optimizers.SGD(lr=1e-3),
metrics=["accuracy"])



In [22]:
# training the model
history = model_B.fit(X_train_B, y_train_B, epochs=5,
validation_data=(X_valid_B, y_valid_B))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
model_A.summary() # generating model summary

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 dense_2 (Dense)             (None, 50)                5050      
                                                                 
 dense_3 (Dense)             (None, 50)                2550      
                                                                 
 dense_4 (Dense)             (None, 50)                2550      
                                                                 
 dense_5 (Dense)             (None, 8)                 4

In [24]:
model_B.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_6 (Dense)             (None, 300)               235500    
                                                                 
 dense_7 (Dense)             (None, 100)               30100     
                                                                 
 dense_8 (Dense)             (None, 50)                5050      
                                                                 
 dense_9 (Dense)             (None, 50)                2550      
                                                                 
 dense_10 (Dense)            (None, 50)                2550      
                                                                 
 dense_11 (Dense)            (None, 1)                

In [25]:
model_A = keras.models.load_model("my_model_A.h5") # loading our saved model
model_B_on_A = keras.models.Sequential(model_A.layers[:-1]) # creating new model based on existing layer
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid")) # adding new layer to new model

In [26]:
# model_A and model_B_on_A now share some layers. When you train
# model_B_on_A, it will also affect model_A. To avoid that, you need to clone
# model_A before you reuse its layers. To do this, you clone model Aâ€™s
# architecture with clone_model(), then copy its weights
# (since clone_model() does not clone the weights)
model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())

In [27]:
# freezing reused layers
for layer in model_B_on_A.layers[:3]:
  layer.trainable = False

In [28]:
# compiling the model
model_B_on_A.compile(loss="binary_crossentropy",
optimizer=keras.optimizers.SGD(lr=1e-3),
metrics=["accuracy"])



In [29]:
# training the model
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,
validation_data=(X_valid_B, y_valid_B))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [30]:
# unfreezing reused layers
for layer in model_B_on_A.layers[:-1]:  
  layer.trainable = True

In [31]:
# compiling after reducing learning rate to avoid damaging the reused weights
model_B_on_A.compile(loss="binary_crossentropy",

optimizer=keras.optimizers.SGD(lr=1e-3),
metrics=["accuracy"])



In [32]:
# training the model
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=5,
validation_data=(X_valid_B, y_valid_B))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [33]:
model_B.evaluate(X_test_B, y_test_B) # evaluating the model A



[0.055875085294246674, 0.9834539890289307]

In [34]:
model_B_on_A.evaluate(X_test_B, y_test_B) # evaluating the model B



[0.02260442264378071, 0.997931718826294]