# Transfer Learning
## Feature Extraction
Feature extraction: we start with a pretrained model and only update the final layer weights from which we derive predictions. 

## Finetuning a Pretrained Network
Finetuning: In finetuning, we start with a pretrained model and update all of the model’s parameters for our new task, in essence retraining the whole model


Refs: [1](https://ruder.io/transfer-learning/), [2](https://cs231n.github.io/transfer-learning/)

In [6]:
import torch
import torchvision.models as models

# Initialize VGG19 model

# No batch norm
model_vgg19 = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1)      

# With batch norm
model_vgg19_bn = models.vgg19_bn(weights=models.VGG19_BN_Weights.IMAGENET1K_V1)



# Freeze convolutional layers
for param in model_vgg19_bn.features.parameters():
    param.requires_grad = False

print("------------------------------------------------------------------------")

print("Model architecture:")
print(model_vgg19_bn)

print("------------------------------------------------------------------------")



print(model_vgg19_bn.features[0])

print("------------------------------------------------------------------------")

# for param in model_vgg19_bn.classifier.parameters():
#     print(param)

# print("------------------------------------------------------------------------")

print("model_vgg19_bn.classifier",model_vgg19_bn.classifier)









------------------------------------------------------------------------
Model architecture:
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool

In [10]:
import torchvision.models as models
import torchvision
import torchviz

resnet18=models.resnet18(pretrained=True)
for params in resnet18.parameters():
    params.requiers_gard=False

# How to find model input size

print("resnet18 input size: ", resnet18.fc.in_features)
print("resnet18 output size: ",resnet18.fc.out_features)


#  resnet18 has an averagepool layer at the end.
#  So the input size does not matter much provided the feature map size is greater than kernel size.

input=torch.randn(size=[1,3,128,128])

resnet18_graph=torchviz.make_dot(resnet18(input) ,dict(resnet18.named_parameters()))
resnet18_graph.format='svg'
resnet18_graph.save('images/resnet18_graph')
resnet18_graph.render()

resnet18 input size:  512
resnet18 output size:  1000


'images/resnet18_graph.svg'