# Transfer Learning

libraries

In [39]:
from sklearn.model_selection import StratifiedShuffleSplit
from torchvision.datasets import ImageFolder
from torch.utils.data import Subset, DataLoader
from collections import Counter
from torchvision import transforms
import numpy as np
from torchvision import models, datasets, transforms
import torch.nn as nn
import torch.optim as optim
import torch
import time

1- load dataset and data normalization

In [5]:
ls

[0m[01;34mCOVID-19_Radiography_Dataset[0m/  tp4.ipynb  TP4.pdf


In [7]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),       
    transforms.ToTensor(),
])

dataset = ImageFolder(
    root='./COVID-19_Radiography_Dataset/', 
    transform=transform
)

targets = np.array([sample[1] for sample in dataset]) 

2- split data 70% for training and 30% for validation

In [8]:
split = StratifiedShuffleSplit(
    n_splits=1, 
    test_size=0.3, 
    random_state=42
)

train_idx, val_idx = next(
    split.split(np.zeros(len(targets)), targets)
)

In [9]:
train_dataset = Subset(dataset, train_idx)

train_loader = DataLoader(
    train_dataset, 
    batch_size=32, 
    shuffle=True
)

In [10]:
len(train_dataset)

29631

In [11]:
val_dataset = Subset(dataset, val_idx)

val_loader = DataLoader(
    val_dataset, 
    batch_size=32, 
    shuffle=False
)

In [12]:
len(val_dataset)

12699

3- import arch (pre-trained model): EfficientNet-B0

In [41]:
model = models.efficientnet_b0(weights='IMAGENET1K_V1')

In [42]:
# model.classifier[1] = nn.Linear(model.classifier[1].in_features, 4)

In [43]:
for param in model.parameters():
    param.requires_grad = False

5- add fully connected layers

In [44]:
print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [45]:
model.classifier = nn.Sequential(
    nn.Linear(model.classifier[1].in_features, 1024),
    nn.ReLU(),  
    nn.Dropout(0.5), 
    
    nn.Linear(1024, 512),
    nn.ReLU(),  
    nn.Dropout(0.5),  
    
    nn.Linear(512, 4)
)

In [46]:
print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

6- count params

In [53]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad is False) + sum(p.numel() for p in model.parameters() if p.requires_grad is True)
total_params

5846144

8- train using diff transfer learning strategies

In [54]:
for param in model.features[-1].parameters():
    param.requires_grad = True

for param in model.features[-2].parameters():
    param.requires_grad = True

for param in model.classifier.parameters():
    param.requires_grad = True

In [55]:
freeze_params = sum(p.numel() for p in model.parameters() if p.requires_grad is False)
not_freeze_params = sum(p.numel() for p in model.parameters() if p.requires_grad is True)

print("freezed params: ", freeze_params)
print("not freezed params: ", not_freeze_params)
print('total params: ', freeze_params + not_freeze_params)

freezed params:  2878156
not freezed params:  2967988
total params:  5846144


In [56]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()
num_epochs = 20

In [None]:
start_time = time.time()

for epoch in range(num_epochs):
    epoch_start = time.time()
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    print(f"epoch {epoch+1}/{num_epochs}")
    
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_time = time.time() - epoch_start
    
    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total
    
    print(f"loss: {train_loss:.4f}, accuracy: {train_accuracy:.2f}%, time: {epoch_time:.2f} sec")

    # Validation step
    # model.eval()
    # correct = 0
    # total = 0
    # with torch.no_grad():
    #     for inputs, labels in val_loader:
    #         outputs = model(inputs)
    #         _, predicted = torch.max(outputs, 1)
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    
    # val_accuracy = 100 * correct / total
    # print(f"val accuracy: {val_accuracy:.2f}%")

total_time = time.time() - start_time
print(f"training tile {total_time // 60:.0f}m {total_time % 60:.0f}s")

epoch 1/20


9- evaluate model performance