<a href="https://colab.research.google.com/github/aminfazy/IITP_BSE_2022/blob/main/transer_learning_cnn_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Transfer Learning Example

## Transfer learning is a basic appoach of model reuse and retraining
### A model trained on one dataset for a different domain is refined by modifying some of the last layers and training with new dataset
    * This saves a lot of training time as we only need to modify some of the layers and retrain only those layers
    * Also sometimes we don't have a very big dataset which we can use for training a model so we take pretrained model and retrain it by making only some of the layers trainable
    * This is one of the basic techniques for domain adaptation

### This is a basic example from keras examples directory
(Available @ https://github.com/keras-team/keras/blob/master/examples/mnist_transfer_cnn.py) 

    * - Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
    * - Freeze convolutional layers and fine-tune dense layers for the classification of digits [5..9].
   



In [7]:
from __future__ import print_function

import datetime
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.utils import np_utils 

In [2]:
now = datetime.datetime.now

batch_size = 128  # no.of elements to be used for one iteration
num_classes = 5   # no. of classes for training
epochs = 1        # how many times the whole dataset should be iterated

# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size 
kernel_size = 3  # here kernel_size means a 3x3 filter

if K.image_data_format() == 'channels_first':  # channels mean no. of color channels of the image
    input_shape = (1, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 1)    # tensorflow uses channels_last config by default

### Define the funtion which will run the training with input model and training data 
    This function basically does some preprocessing on training data and then runs compile and fit functions of keras.models.Sequential 

In [8]:
def train_model(model, train, test, num_classes):
    x_train = train[0].reshape((train[0].shape[0],) + input_shape)
    x_test = test[0].reshape((test[0].shape[0],) + input_shape)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(train[1], num_classes)
    y_test = np_utils.to_categorical(test[1], num_classes)

    # compile the model
    # you can chnage the parameters in this compile function
    # custom funtions for loss and opitizer can be used: ref to keras documentation for more
    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])

    t = now()
    
    # Train the model
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))
    print('Training time: %s' % (now() - t))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

In [40]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# create two datasets one with digits below 5 and one with 5 and above
x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]

x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5
x_test_gte5 = x_test[y_test >= 5]
y_test_gte5 = y_test[y_test >= 5] - 5

In [5]:
# define two groups of layers: feature (convolutions) and classification (dense)
feature_layers = [
    Conv2D(filters, kernel_size,
           padding='valid',
           input_shape=input_shape),
    Activation('relu'),
    Conv2D(filters, kernel_size),
    Activation('relu'),
    MaxPooling2D(pool_size=pool_size),
    Dropout(0.25),
    Flatten(),
]

classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(0.5),
    Dense(num_classes),
    Activation('softmax')
]

In [9]:
# create complete model
model = Sequential(feature_layers + classification_layers)

# train model for 5-digit classification [0..4]
train_model(model,
            (x_train_lt5, y_train_lt5),
            (x_test_lt5, y_test_lt5), num_classes)

x_train shape: (30596, 28, 28, 1)
30596 train samples
5139 test samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:04:22.779622
Test score: 1.3344688415527344
Test accuracy: 0.8449114561080933


In [10]:
# Save the model in hdf5 format

model.save("model_1.h5")

In [38]:
# Load the pre-trained model as a new model object

from keras.saving.hdf5_format import load_model_from_hdf5
from keras.models import load_model
model_2 = load_model_from_hdf5("model_1.h5")

In [39]:
# Check the model whether it matches the previous model or not
# Let's check the summary for comparison
model_2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 activation (Activation)     (None, 26, 26, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 24, 24, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 12, 12, 32)        0         
                                                      

In [32]:
# layer_output = model.get_layer('vgg16').get_layer('block3_conv1').output
# We need access to the layers for various manipulations 

layer_output = model_2.layers

for idx in range(len(model_2.layers)):
  print(model_2.get_layer(index = idx).name)



conv2d
activation
conv2d_1
activation_1
max_pooling2d
dropout
flatten
dense
activation_2
dropout_1
dense_1
activation_3


In [33]:
# How do we check the weights of the model

model_2.get_layer('conv2d').get_weights()

[array([[[[-0.03345221, -0.06924943,  0.01241054, -0.0279286 ,
            0.13545208, -0.02964871, -0.04199512,  0.08152123,
            0.07823996, -0.089711  , -0.0186399 , -0.00643799,
            0.04886724,  0.02160867,  0.09711806,  0.0177983 ,
           -0.04666913,  0.00059156, -0.08043575, -0.10659873,
           -0.05022452, -0.02692206,  0.03042031, -0.04926324,
            0.0049313 , -0.09893714,  0.02643462,  0.09612635,
            0.11015517,  0.10334422,  0.12001359,  0.0196844 ]],
 
         [[ 0.12482091,  0.06414187,  0.05248322,  0.00567564,
            0.08036725,  0.12194674,  0.03858657, -0.0886545 ,
            0.10085871, -0.10280392, -0.03848182, -0.00766634,
            0.13827725,  0.09112118,  0.01961266,  0.04886862,
            0.07698962,  0.01866923, -0.08646477, -0.09038078,
           -0.0548382 , -0.09671479,  0.05233694, -0.1185357 ,
            0.1152828 , -0.02767426, -0.0422859 , -0.00964746,
            0.06996392,  0.13275747,  0.09779562, -

In [34]:
# Check whether a layer in the model is traiable or not

model_2.get_layer('conv2d').trainable

True

In [41]:
# Modify the layer attribute to make is trainable or non-trainable

model_2.get_layer('conv2d').traninable = False

model_2.get_layer('conv2d').traninable

True

### Model trained in the above block can be used for classifying digits 5 to 9 by fine tuning it
    For fine tuning we will freeze all the convolutional and maxpooling layers (feature layers)
    This can be done by making those layers non-trainable
    only the top(last) two layers (dense layers) are left trainable

In [42]:
# transfer: train dense layers for new classification task [5..9]
train_model(model_2,
            (x_train_gte5, y_train_gte5),
            (x_test_gte5, y_test_gte5), num_classes)

x_train shape: (29404, 28, 28, 1)
29404 train samples
4861 test samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:04:22.588144
Test score: 1.3748985528945923
Test accuracy: 0.6733182668685913
