References:

https://keras.io/guides/transfer_learning/






**Transfer learning** consists of taking features learned on one problem, and leveraging them on a new, similar problem. For instance, features from a model that has learned to identify racoons may be useful to kick-start a model meant to identify tanukis.

**fine-tuning** which consists of unfreezing the entire model you obtained above (or part of it), and re-training it on the new data with a very low learning rate.

In [None]:
from tensorflow.keras.layers import Dense,Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import Xception
import numpy as np

In [None]:
# Freezing layers: understanding the trainable attribute
layer = Dense(units=5)
layer.build(10)

layer.trainable = True
print("weights:", len(layer.weights))
print("trainable_weights:", len(layer.trainable_weights))
print("non_trainable_weights:", len(layer.non_trainable_weights))

layer.trainable = False
print("weights:", len(layer.weights))
print("trainable_weights:", len(layer.trainable_weights))
print("non_trainable_weights:", len(layer.non_trainable_weights))

weights: 2
trainable_weights: 2
non_trainable_weights: 0
weights: 2
trainable_weights: 0
non_trainable_weights: 2


In [None]:
layer.get_weights()

[array([[ 0.34438372, -0.10960078, -0.0829013 ,  0.43132752,  0.39637607],
        [-0.28892165,  0.15634978,  0.1124264 , -0.11049843,  0.30737466],
        [ 0.3237577 , -0.18090189,  0.23819059, -0.5063975 ,  0.4299615 ],
        [ 0.26807278, -0.415672  ,  0.45800513, -0.0847497 , -0.43254787],
        [ 0.439372  , -0.33280703, -0.28588083, -0.2069771 ,  0.36514604],
        [ 0.16605234, -0.4311357 ,  0.2676103 ,  0.10231072,  0.56260115],
        [-0.20577139,  0.35584807, -0.493136  , -0.382597  , -0.31366208],
        [-0.03944945,  0.1340285 ,  0.61397845, -0.09809119,  0.5924638 ],
        [ 0.20215517,  0.4350571 ,  0.08144724, -0.6234227 , -0.10029185],
        [ 0.54995435,  0.20072103,  0.05097967, -0.3460531 ,  0.33594483]],
       dtype=float32), array([0., 0., 0., 0., 0.], dtype=float32)]

In [None]:
layer.get_config()

{'activation': 'linear',
 'activity_regularizer': None,
 'bias_constraint': None,
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'bias_regularizer': None,
 'dtype': 'float32',
 'kernel_constraint': None,
 'kernel_initializer': {'class_name': 'GlorotUniform',
  'config': {'seed': None}},
 'kernel_regularizer': None,
 'name': 'dense',
 'trainable': False,
 'units': 5,
 'use_bias': True}

In [None]:
layer.trainable_weights

[]

In [None]:
layer.non_trainable_weights

[<tf.Variable 'kernel:0' shape=(10, 5) dtype=float32, numpy=
 array([[ 0.34438372, -0.10960078, -0.0829013 ,  0.43132752,  0.39637607],
        [-0.28892165,  0.15634978,  0.1124264 , -0.11049843,  0.30737466],
        [ 0.3237577 , -0.18090189,  0.23819059, -0.5063975 ,  0.4299615 ],
        [ 0.26807278, -0.415672  ,  0.45800513, -0.0847497 , -0.43254787],
        [ 0.439372  , -0.33280703, -0.28588083, -0.2069771 ,  0.36514604],
        [ 0.16605234, -0.4311357 ,  0.2676103 ,  0.10231072,  0.56260115],
        [-0.20577139,  0.35584807, -0.493136  , -0.382597  , -0.31366208],
        [-0.03944945,  0.1340285 ,  0.61397845, -0.09809119,  0.5924638 ],
        [ 0.20215517,  0.4350571 ,  0.08144724, -0.6234227 , -0.10029185],
        [ 0.54995435,  0.20072103,  0.05097967, -0.3460531 ,  0.33594483]],
       dtype=float32)>,
 <tf.Variable 'bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>]

In [None]:
# Make a model with 2 layers
layer1 = Dense(3, activation="relu")
layer2 = Dense(3, activation="sigmoid")
model = Sequential([Input(shape=(3,)), layer1, layer2])

# Freeze the first layer
layer1.trainable = False

# Keep a copy of the weights of layer1,layer2 for later reference
initial_layer1_weights_values = layer1.get_weights()
initial_layer2_weights_values = layer2.get_weights()

# Train the model
model.compile(optimizer="adam", loss="mse")
model.fit(np.random.random((2, 3)), np.random.random((2, 3)),epochs=1)
model.summary()

# Check that the weights of layer1 have not changed during training
final_layer1_weights_values = layer1.get_weights()
final_layer2_weights_values = layer2.get_weights()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 3)                 12        
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 12        
Total params: 24
Trainable params: 12
Non-trainable params: 12
_________________________________________________________________


In [None]:
initial_layer1_weights_values, "----------------", final_layer1_weights_values

([array([[-0.4503696 ,  0.35640144,  0.48098707],
         [ 0.6176903 , -0.6311712 ,  0.44780326],
         [ 0.16698074, -0.38884568,  0.88005114]], dtype=float32),
  array([0., 0., 0.], dtype=float32)],
 '----------------',
 [array([[-0.4503696 ,  0.35640144,  0.48098707],
         [ 0.6176903 , -0.6311712 ,  0.44780326],
         [ 0.16698074, -0.38884568,  0.88005114]], dtype=float32),
  array([0., 0., 0.], dtype=float32)])

In [None]:
initial_layer2_weights_values, "----------------", final_layer2_weights_values

([array([[-0.65224934,  0.9101398 , -0.8492899 ],
         [-0.1779542 ,  0.64869547, -0.7673514 ],
         [-0.80746555,  0.30677772,  0.69177675]], dtype=float32),
  array([0., 0., 0.], dtype=float32)],
 '----------------',
 [array([[-0.65224934,  0.9101398 , -0.8492899 ],
         [-0.17695488,  0.6476961 , -0.76835054],
         [-0.8064656 ,  0.3057778 ,  0.6907768 ]], dtype=float32),
  array([ 0.00099994, -0.00099994, -0.00099992], dtype=float32)])

In [None]:
inner_model = Sequential(
    [
        Input(shape=(3,)),
        Dense(3, activation="relu"),
        Dense(3, activation="relu"),
    ]
)
inner_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 12        
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 12        
Total params: 24
Trainable params: 24
Non-trainable params: 0
_________________________________________________________________


In [None]:
model = Sequential(
    [Input(shape=(3,)), inner_model, Dense(3, activation="sigmoid"),]
)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 3)                 24        
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 12        
Total params: 36
Trainable params: 36
Non-trainable params: 0
_________________________________________________________________


In [None]:
inner_model.trainable = False
inner_model.summary()
print("\n\n")
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 12        
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 12        
Total params: 24
Trainable params: 0
Non-trainable params: 24
_________________________________________________________________



Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 3)                 24        
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 12        
Total params: 36
Trainable params: 12
Non-trainable params: 24
_________________________________________________________________


In [None]:
inner_model.trainable = True
inner_model.summary()
print("\n\n")
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 12        
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 12        
Total params: 24
Trainable params: 24
Non-trainable params: 0
_________________________________________________________________



Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 3)                 24        
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 12        
Total params: 36
Trainable params: 36
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.trainable = False  # Freeze the outer model
model.summary()
print("\n\n")
inner_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 3)                 24        
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 12        
Total params: 36
Trainable params: 0
Non-trainable params: 36
_________________________________________________________________



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 12        
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 12        
Total params: 24
Trainable params: 0
Non-trainable params: 24
_________________________________________________________________


In [None]:
inner_model.layers[0].trainable = True
inner_model.summary()
print("\n\n")
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 12        
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 12        
Total params: 24
Trainable params: 0
Non-trainable params: 24
_________________________________________________________________



Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 3)                 24        
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 12        
Total params: 36
Trainable params: 0
Non-trainable params: 36
_________________________________________________________________


In [None]:
model.layers[0].trainable = True
model.summary()
print("\n\n")
inner_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 3)                 24        
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 12        
Total params: 36
Trainable params: 0
Non-trainable params: 36
_________________________________________________________________



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 12        
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 12        
Total params: 24
Trainable params: 24
Non-trainable params: 0
_________________________________________________________________


In [None]:
base_model = Xception(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(150, 150, 3),
    include_top=False)  # Do not include the ImageNet classifier at the top.

base_model.summary()

Model: "xception"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 150, 150, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 74, 74, 32)   864         input_4[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 74, 74, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 74, 74, 32)   0           block1_conv1_bn[0][0]            
___________________________________________________________________________________________

In [None]:
# Transfer Learning

# Then, freeze the base model.
base_model.trainable = False

# We make sure that the base_model is running in inference mode here,
# by passing `training=False`. This is important for fine-tuning, as you will
# learn in a few paragraphs.
x = base_model(inputs, training=False)

# Convert features of shape `base_model.output_shape[1:]` to vectors
x = keras.layers.GlobalAveragePooling2D()(x)

# A Dense classifier with a single unit (binary classification)
outputs = keras.layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

# Train the model on new data
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=[keras.metrics.BinaryAccuracy()])
model.fit(new_dataset, epochs=20, callbacks=..., validation_data=...)

In [None]:
# Fine Tuning

# Unfreeze the base model
base_model.trainable = True

# It's important to recompile your model after you make any changes
# to the `trainable` attribute of any inner layer, so that your changes
# are take into account
model.compile(optimizer=keras.optimizers.Adam(1e-5),  # Very low learning rate
              loss=keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=[keras.metrics.BinaryAccuracy()])

# Train end-to-end. Be careful to stop before you overfit!
model.fit(new_dataset, epochs=10, callbacks=..., validation_data=...)