## Transfer Learning: Feature Extraction

"*In practice, very few people train an entire Convolutional Network from scratch (with random initialization), because it is relatively rare to have a dataset of sufficient size. Instead, it is common to pretrain a ConvNet on a very large dataset (e.g. ImageNet, which contains 1.2 million images with 1000 categories), and then use the ConvNet either as an initialization or a fixed feature extractor for the task of interest.*" From PyTorch.

The goal here is to train a classifier from a pre-trained CNN on another data set. The original CNN classifier has 365 classes. The new data set has 132 class labels, so in feature extraction we freeze the convolutional base, and retrain the classifier portion of the CNN. Only this last fully connected layer is trained.

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
print(os.getcwd())  # C:\Users\chung\Documents\github_repos\nextpick\notebooks
os.chdir('../NextPick-app/')
print(os.getcwd())  # C:\Users\chung\Documents\github_repos\nextpick\
import time
import copy

from NextPick.NextPick.image_search import *
from NextPick.NextPick.ImageDataset import ImageDataset

import pickle
import numpy as np


import torch
import torch.nn as nn
from torch.autograd import Variable as V
import torchvision
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from PIL import Image

from barbar import Bar

C:\Users\chung\Documents\04-Insight\nextpick\notebooks
C:\Users\chung\Documents\04-Insight\nextpick\NextPick-app


`model` is the convolutional base of the pretrained CNN. Notice it's `fc` layer is empty. We are going to use this and add on a new layer to train our new classifier. \
`model_full` is the original full pre-trained CNN. 

In [2]:
pkl_list = load_pkl_paths('data')
input_dataset = ImageDataset('data')
bs = 100
image_loader = torch.utils.data.DataLoader(input_dataset, batch_size=bs)
model, model_full = load_pretrained_model()

In [3]:
transform = trn.Compose([trn.Resize((256, 256)),
                               trn.CenterCrop(224),
                               trn.ToTensor(),
                               trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                              ])

In [4]:
from torchvision.datasets import ImageFolder

raw_dataset = ImageFolder(root='data', transform=transform)

In [5]:
with open('NextPick/class_labels.pkl','rb') as f:
    class_labels = pickle.load(f)
    f.close()

In [6]:
image, idx = raw_dataset[456]
print(image.shape, class_labels[idx])

torch.Size([3, 224, 224]) apartment building


In [7]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [8]:
model_full

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [9]:
pd_files = input_dataset.get_file_df()

In [10]:
with open('NextPick/pd_files.pkl','rb') as f:
    pd_files1 = pickle.load(f)
    f.close()

In [11]:
pd_files.head(15)

Unnamed: 0,label,name,sub_paths,path
0,abbey,49640399046.jpg,/abbey\49640399046.jpg,data\abbey\49640399046.jpg
1,abbey,49644999867.jpg,/abbey\49644999867.jpg,data\abbey\49644999867.jpg
2,abbey,49646975087.jpg,/abbey\49646975087.jpg,data\abbey\49646975087.jpg
3,abbey,49651855633.jpg,/abbey\49651855633.jpg,data\abbey\49651855633.jpg
4,abbey,49654046898.jpg,/abbey\49654046898.jpg,data\abbey\49654046898.jpg
5,abbey,49654589586.jpg,/abbey\49654589586.jpg,data\abbey\49654589586.jpg
6,abbey,49657909762.jpg,/abbey\49657909762.jpg,data\abbey\49657909762.jpg
7,abbey,49661571612.jpg,/abbey\49661571612.jpg,data\abbey\49661571612.jpg
8,abbey,49664815043.jpg,/abbey\49664815043.jpg,data\abbey\49664815043.jpg
9,abbey,49664815063.jpg,/abbey\49664815063.jpg,data\abbey\49664815063.jpg


In [12]:
pd_files1.head(15)

Unnamed: 0,path,label,name,sub_paths
0,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49640399046.jpg,/abbey\49640399046.jpg
1,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49644999867.jpg,/abbey\49644999867.jpg
2,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49646975087.jpg,/abbey\49646975087.jpg
3,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49651855633.jpg,/abbey\49651855633.jpg
4,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49654046898.jpg,/abbey\49654046898.jpg
5,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49654589586.jpg,/abbey\49654589586.jpg
6,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49657909762.jpg,/abbey\49657909762.jpg
7,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49661571612.jpg,/abbey\49661571612.jpg
8,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49664815043.jpg,/abbey\49664815043.jpg
9,C:/Users/chung/Documents/04-Insight/insight/Ne...,abbey,49664815063.jpg,/abbey\49664815063.jpg


In [13]:
pd_files = pd_files.drop(columns=['path'])

In [14]:
pd_files.equals(pd_files1)

False

Now we freeze the convolutional base by setting `requires_grad == False`, and train the new classifier.

In [15]:
for param in model.parameters():
    param.requires_grad = False

Construct new linear layer for new classifier. Note that we want the same input features, but just the number of class labels as the number of outputs.

In [16]:
num_features = model_full.fc.in_features
model.fc = nn.Linear(num_features, 132)

Defining the device for training.

In [17]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [18]:
model = model.to(device)

In [19]:
criterion = nn.CrossEntropyLoss()

In [20]:
optimizer_conv = optim.Adam(model.fc.parameters(), lr=0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

We use the Torch imageFolder class and DataLoader

In [21]:
trainloader = torch.utils.data.DataLoader(raw_dataset, batch_size=128,
                                          shuffle=True, num_workers=3)

In [22]:
model.fc_backup = nn.Identity()

In [23]:
ds_length = len(raw_dataset)

Using the [Barbar](https://github.com/yusugomori/barbar) package for PyTorch deep learning training progress bar. Here we define the topk (top5) accuracy from this [discussion](https://discuss.pytorch.org/t/imagenet-example-accuracy-calculation/7840/3).

In [25]:
since = time.time()
num_epochs = 9
k = 5
model.train()

for epoch in range(num_epochs):  # loop over the dataset multiple times

    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in Bar(trainloader):
        
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer_conv.zero_grad()

        # forward. track history if only in train
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            _, preds = torch.topk(outputs, k, largest=True, sorted=True)
#             print()
#             print(preds.shape)
            preds = preds.t() # shape is now [topk, batch_size]
            loss = criterion(outputs, labels)

            # backward + optimize
            loss.backward()
            optimizer_conv.step()

        # statistics
        running_loss += loss.item() * inputs.size(0)
        correct = preds.eq(labels.view(1, -1).expand_as(preds))
#         print(correct.shape)
        correct_k = correct.view(-1).float().sum(0, keepdim=True)
#         print(correct_k.shape)
        running_corrects += correct_k
#         print(running_corrects)
#         running_corrects += torch.sum(res == labels.data)
        
    exp_lr_scheduler.step()
    epoch_loss = running_loss / ds_length
    epoch_acc = running_corrects.double() / ds_length

    print('Loss: %.4f Acc: %.4f' %(epoch_loss, epoch_acc.numpy()))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

print('Finished Training')

Epoch 0/8
----------
Loss: 4.2810 Acc: 0.3998
Training complete in 25m 6s
Epoch 1/8
----------
Loss: 4.0584 Acc: 0.4901
Training complete in 49m 51s
Epoch 2/8
----------
Loss: 3.8630 Acc: 0.5420
Training complete in 74m 39s
Epoch 3/8
----------
Loss: 3.6917 Acc: 0.5723
Training complete in 99m 29s
Epoch 4/8
----------
Loss: 3.5386 Acc: 0.5939
Training complete in 124m 18s
Epoch 5/8
----------
Loss: 3.4116 Acc: 0.6120
Training complete in 149m 0s
Epoch 6/8
----------
Loss: 3.3359 Acc: 0.6235
Training complete in 173m 43s
Epoch 7/8
----------
Loss: 3.3242 Acc: 0.6247
Training complete in 198m 30s
Epoch 8/8
----------
Loss: 3.3122 Acc: 0.6267
Training complete in 223m 24s
Finished Training


Save the trained model

In [26]:
torch.save(model.state_dict(), "transfer_model.pth")

Load the model

In [27]:
# Model class must be defined somewhere
# model = torch.load(PATH)
# model.eval()