<a href="https://colab.research.google.com/github/dawoodshahzad07/Transfer_Learning/blob/main/Transfer_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

```
Weight Initializers:
Zero: Not recommended.
Random: Basic, breaks symmetry.
Xavier/Glorot: Good for tanh, sigmoid.
He: Best for ReLU variants.
LeCun: Useful for tanh, sigmoid.
```
---
```
Activation Functions:
Sigmoid: Binary classification, vanishing gradient.
Tanh: Zero-centered, vanishing gradient.
ReLU: General use, efficient, avoids vanishing gradient.
Leaky ReLU: Mitigates dying ReLU problem.
ELU: Useful for deeper networks.
Softmax: Output layer for multi-class classification.
```

In [None]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("fivethirtyeight")
%load_ext tensorboard

In [None]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
tf.random.set_seed(42)
np.random.seed(42)

LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation="softmax")]


model = tf.keras.models.Sequential(LAYERS)

In [None]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 266610 

In [None]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 7s - loss: 1.5144 - accuracy: 0.6016 - val_loss: 0.9318 - val_accuracy: 0.8024 - 7s/epoch - 4ms/step
Epoch 2/10
1719/1719 - 4s - loss: 0.7459 - accuracy: 0.8239 - val_loss: 0.5862 - val_accuracy: 0.8552 - 4s/epoch - 2ms/step
Epoch 3/10
1719/1719 - 6s - loss: 0.5443 - accuracy: 0.8599 - val_loss: 0.4696 - val_accuracy: 0.8784 - 6s/epoch - 4ms/step
Epoch 4/10
1719/1719 - 7s - loss: 0.4619 - accuracy: 0.8758 - val_loss: 0.4124 - val_accuracy: 0.8906 - 7s/epoch - 4ms/step
Epoch 5/10
1719/1719 - 6s - loss: 0.4168 - accuracy: 0.8854 - val_loss: 0.3779 - val_accuracy: 0.8986 - 6s/epoch - 4ms/step
Epoch 6/10
1719/1719 - 6s - loss: 0.3877 - accuracy: 0.8915 - val_loss: 0.3547 - val_accuracy: 0.9036 - 6s/epoch - 4ms/step
Epoch 7/10
1719/1719 - 4s - loss: 0.3669 - accuracy: 0.8964 - val_loss: 0.3375 - val_accuracy: 0.9060 - 4s/epoch - 3ms/step
Epoch 8/10
1719/1719 - 5s - loss: 0.3509 - accuracy: 0.9004 - val_loss: 0.3238 - val_accuracy: 0.9112 - 5s/epoch - 3ms/step
Epoch 9/

In [None]:
model.save("full_mnist_model.h5")

  saving_api.save_model(


In [None]:
load_full_mnist_model = tf.keras.models.load_model("full_mnist_model.h5")

In [None]:
load_full_mnist_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 266610 

#Transfer Learning:

In [None]:
for layer in load_full_mnist_model.layers:
  print(f"{layer.name} : {layer.trainable}")

flatten : True
dense : True
leaky_re_lu : True
dense_1 : True
leaky_re_lu_1 : True
dense_2 : True


In [None]:
for layer in load_full_mnist_model.layers[:-1]:
  print(f"{layer.name} : {layer.trainable}")

flatten : True
dense : True
leaky_re_lu : True
dense_1 : True
leaky_re_lu_1 : True


In [None]:
for layer in load_full_mnist_model.layers[:-1]:
  layer.trainable = False
  print(f"{layer.name} : {layer.trainable}")

flatten : False
dense : False
leaky_re_lu : False
dense_1 : False
leaky_re_lu_1 : False


In [None]:
for layer in load_full_mnist_model.layers:
  print(f"{layer.name} : {layer.trainable}")

flatten : False
dense : False
leaky_re_lu : False
dense_1 : False
leaky_re_lu_1 : False
dense_2 : True


In [None]:
lower_pretrained_layers = load_full_mnist_model.layers[:-1]

new_model = tf.keras.models.Sequential(lower_pretrained_layers)
new_model.add(tf.keras.layers.Dense(2, activation="softmax"))

In [None]:
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 2)                 202       
                                                                 
Total params: 265802 (1.01 MB)
Trainable params: 202 (

In [None]:
np.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [None]:
 def update_even_odd_labels(labels):
  for idx, label in enumerate(labels):
    labels[idx] = np.where(label % 2 == 0, 1, 0)
  return labels


In [None]:
y_train_bin, y_test_bin, y_valid_bin = update_even_odd_labels([y_train, y_test, y_valid])

In [None]:
np.unique(y_train_bin)

array([0, 1])

In [None]:
new_model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
                  metrics=["accuracy"])

In [None]:
history = new_model.fit(X_train, y_train_bin, epochs= 10, validation_data= (X_valid, y_valid_bin), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
new_model.evaluate(X_test, y_test_bin)



[0.28360113501548767, 0.8860999941825867]

In [None]:
X_new = X_test[:3]

y_test[:3], y_test_bin[:3]

(array([7, 2, 1], dtype=uint8), array([0, 1, 0]))

In [None]:
np.argmax(new_model.predict(X_new), axis=1)



array([0, 1, 0])

<h1> Task </h1> <br>
if label > 5 == 1 <br>
if label <= 5 == 0

In [None]:
new_model.save("task.h5")

  saving_api.save_model(


In [None]:
load_task_model = tf.keras.models.load_model("task.h5")

In [None]:
load_task_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 2)                 202       
                                                                 
Total params: 265802 (1.01 MB)
Trainable params: 202 (

In [None]:
for layer in load_task_model.layers:
  print(f"{layer.name} : {layer.trainable}")

flatten : False
dense : False
leaky_re_lu : False
dense_1 : False
leaky_re_lu_1 : False
dense_3 : True


In [None]:
np.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [None]:
def update_low_high_labels(labels):
  for idx, label in enumerate(labels):
    print(f"{idx} : {label}")
  return labels
update_low_high_labels([y_train, y_test, y_valid])

0 : [7 3 4 ... 5 6 8]
1 : [7 2 1 ... 4 5 6]
2 : [5 0 4 ... 2 1 2]


[array([7, 3, 4, ..., 5, 6, 8], dtype=uint8),
 array([7, 2, 1, ..., 4, 5, 6], dtype=uint8),
 array([5, 0, 4, ..., 2, 1, 2], dtype=uint8)]

In [None]:
def update_low_high_labels(labels):
  for idx, label in enumerate(labels):
    labels[idx] = np.where(label > 5, 1, 0)
  return labels

In [None]:
update_low_high_labels([y_train, y_test, y_valid])

[array([1, 0, 0, ..., 0, 1, 1]),
 array([1, 0, 0, ..., 0, 0, 1]),
 array([0, 0, 0, ..., 0, 0, 0])]

In [None]:
y_train_bin, y_test_bin, y_valid_bin = update_low_high_labels([y_train, y_test, y_valid])

In [None]:
np.unique(y_train_bin)

array([0, 1])

In [None]:
load_task_model.compile(loss = "sparse_categorical_crossentropy",
                        optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])

In [None]:
history_task = load_task_model.fit(X_train, y_train_bin, epochs=10, validation_data=(X_valid, y_valid_bin),verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
load_task_model.evaluate(X_test, y_test_bin)



[0.3357034921646118, 0.8629999756813049]

In [None]:
y_test[:3], y_test_bin[:3]

(array([7, 2, 1], dtype=uint8), array([1, 0, 0]))

In [None]:
X_new = X_test[:3]

In [None]:
np.argmax(load_task_model.predict(X_new), axis=1)



array([1, 0, 0])