In [None]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torchsummary import summary
import matplotlib.pyplot as plt
import time
import os
import copy

cudnn.benchmark = True
plt.ion()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(224),
        transforms.RandomPerspective(distortion_scale=0.1, p=0.5),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = ''

train_datasets = datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train'])
val_datasets = datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val'])
# test_datasets = datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms['test'])

train_dataloader = torch.utils.data.DataLoader(train_datasets, batch_size=32, shuffle=True, num_workers=4)
val_dataloader = torch.utils.data.DataLoader(val_datasets, batch_size=32, shuffle=True, num_workers=4)
# test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=32, shuffle=True, num_workers=4)

class_names = train_datasets.classes

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(train_dataloader))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

In [None]:
train_datasets

In [None]:
val_datasets

In [None]:
def evaluation(model, dataloader):
    model.eval()
    dataset_size = len(dataloader.dataset)
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
                
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        with torch.no_grad():
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            
            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
    
    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects.double() / dataset_size
    return epoch_acc, epoch_loss

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_acc = 0.0
    val_epoch_acc = 0 
    train_dataset_size = len(train_dataloader.dataset)
    train_loss = []
    val_loss = []
    train_acc = []
    val_acc = []
   
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        
        running_loss = 0.0
        running_corrects = 0
        i = 0
            
        model.train() 
        # Iterate over data during the training stage.
        for inputs, labels in train_dataloader:
            i = i+1
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize  phase
                loss.backward()
                optimizer.step()

                # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        scheduler.step()
            
        train_epoch_loss = running_loss / train_dataset_size
        train_loss.append(train_epoch_loss)
        train_epoch_acc = running_corrects.double() / train_dataset_size
        train_acc.append(train_epoch_acc.item())
        print(f'Train loss: {train_epoch_loss:.4f} Training_acc: {train_epoch_acc:.4f}')
        
        val_since = time.time()
        val_epoch_acc, val_epoch_loss = evaluation(model, val_dataloader)
        val_loss.append(val_epoch_loss)
        val_acc.append(val_epoch_acc.item())
        val_time = time.time() - val_since
        print(f'Val complete in {val_time:4f} seconds.')
        print(f'Val loss: {val_epoch_loss:.4f} Val acc: {val_epoch_acc:.4f}')
        
        # deep copy the model
        if  val_epoch_acc > best_val_acc:
            best_val_acc = val_epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        print(i)

    time_elapsed = time.time() - since
    print(f'Training and validation complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.4f}s')
    print(f'Best Val Acc: {best_val_acc:4f}')
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, train_loss, train_acc, val_loss, val_acc 

In [None]:
def loss_plot(train_loss, val_loss, index, epoch_num, lr):
    x = np.arange(epoch_num)
    plt.plot(x, train_loss, 'r-+', label='Train loss')
    plt.plot(x, val_loss, 'b-o', label='Val loss')  
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    title = 'Train and val loss with ' + str(index) + ' layer(s) locked and learning rate set to ' + str(lr)
    plt.title(title)
    plt.legend()
    plt.show()
    
def acc_plot(train_acc, val_acc, index, epoch_num, lr):
    x = np.arange(epoch_num)
    plt.plot(x, train_acc, 'r-+', label='Train acc')
    plt.plot(x, val_acc, 'b-o', label='Val acc')  
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    title = 'Train and val acc with ' + str(index) + ' layer(s) locked and learning rate set to ' + str(lr)
    plt.title(title)
    plt.legend()
    plt.show()

In [None]:
train_loss_list = []
val_loss_list = []
train_acc_list = []
val_acc_list = []
epoch_num = 30
criterion = nn.CrossEntropyLoss()
learning_rate = 3e-5

In [None]:
vgg = models.vgg16(pretrained=True)
vgg.classifier[3] = nn.Linear(4096, 1000)
vgg.classifier[6] = nn.Linear(1000, 2)
nn.init.xavier_uniform_(vgg.classifier[3].weight)
nn.init.xavier_uniform_(vgg.classifier[6].weight)
vgg = vgg.to(device)

In [None]:
block1 = [0, 2]
block2 = [5, 7]
block3 = [10, 12, 14]
block4 = [17, 19, 21]
block5 = [24, 26, 28]

In [None]:
# Fine-tine
optimizer = torch.optim.Adam([{'params': vgg.features.parameters()}, 
    {'params': vgg.classifier.parameters(), 'lr': learning_rate * 10}], 
    lr=learning_rate, weight_decay=0.001)

# Decay LR by a factor of 0.1 every 10 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
vgg, train_loss, train_acc, val_loss, val_acc = train_model(vgg, criterion, optimizer, exp_lr_scheduler, epoch_num)
train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)

In [None]:
# freeze the trainable parameters of 1st block
vgg1 = models.vgg16(pretrained=True)
vgg1.classifier[3] = nn.Linear(4096, 1000)
vgg1.classifier[6] = nn.Linear(1000, 2)
nn.init.xavier_uniform_(vgg1.classifier[3].weight)
nn.init.xavier_uniform_(vgg1.classifier[6].weight)
vgg1 = vgg1.to(device)

for i in block1:
    vgg1.features[i].weight.requires_grad = False
    vgg1.features[i].bias.requires_grad = False

In [None]:
optimizer = torch.optim.Adam([{'params': vgg1.features.parameters()}, 
    {'params': vgg1.classifier.parameters(), 'lr': learning_rate * 10}], 
    lr=learning_rate, weight_decay=0.001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
vgg1, train_loss, train_acc, val_loss, val_acc = train_model(vgg1, criterion, optimizer, exp_lr_scheduler, epoch_num)

train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)

In [None]:
# freeze the trainable parameters of first 2 blocks
vgg2 = models.vgg16(pretrained=True)
vgg2.classifier[3] = nn.Linear(4096, 1000)
vgg2.classifier[6] = nn.Linear(1000, 2)
nn.init.xavier_uniform_(vgg2.classifier[3].weight)
nn.init.xavier_uniform_(vgg2.classifier[6].weight)
vgg2 = vgg2.to(device)

for i in block1:
    vgg2.features[i].weight.requires_grad = False
    vgg2.features[i].bias.requires_grad = False

for i in block2:
    vgg2.features[i].weight.requires_grad = False
    vgg2.features[i].bias.requires_grad = False

In [None]:
optimizer = torch.optim.Adam([{'params': vgg2.features.parameters()}, 
    {'params': vgg2.classifier.parameters(), 'lr': learning_rate * 10}], 
    lr=learning_rate, weight_decay=0.001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
vgg2, train_loss, train_acc, val_loss, val_acc = train_model(vgg2, criterion, optimizer, exp_lr_scheduler, epoch_num)

train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)

In [None]:
# freeze the trainable parameters of first 3 blocks
vgg3 = models.vgg16(pretrained=True)
vgg3.classifier[3] = nn.Linear(4096, 1000)
vgg3.classifier[6] = nn.Linear(1000, 2)
nn.init.xavier_uniform_(vgg3.classifier[3].weight)
nn.init.xavier_uniform_(vgg3.classifier[6].weight)
vgg3 = vgg3.to(device)

for i in block1:
    vgg3.features[i].weight.requires_grad = False
    vgg3.features[i].bias.requires_grad = False

for i in block2:
    vgg3.features[i].weight.requires_grad = False
    vgg3.features[i].bias.requires_grad = False
    
for i in block3:
    vgg3.features[i].weight.requires_grad = False
    vgg3.features[i].bias.requires_grad = False

In [None]:
optimizer = torch.optim.Adam([{'params': vgg3.features.parameters()}, 
    {'params': vgg3.classifier.parameters(), 'lr': learning_rate * 10}], 
    lr=learning_rate, weight_decay=0.001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
vgg3, train_loss, train_acc, val_loss, val_acc = train_model(vgg3, criterion, optimizer, exp_lr_scheduler, epoch_num)

train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)

In [None]:
# freeze the trainable parameters of first 4 blocks
vgg4 = models.vgg16(pretrained=True)
vgg4.classifier[3] = nn.Linear(4096, 1000)
vgg4.classifier[6] = nn.Linear(1000, 2)
nn.init.xavier_uniform_(vgg4.classifier[3].weight)
nn.init.xavier_uniform_(vgg4.classifier[6].weight)
vgg4 = vgg4.to(device)

for i in block1:
    vgg4.features[i].weight.requires_grad = False
    vgg4.features[i].bias.requires_grad = False

for i in block2:
    vgg4.features[i].weight.requires_grad = False
    vgg4.features[i].bias.requires_grad = False
    
for i in block3:
    vgg4.features[i].weight.requires_grad = False
    vgg4.features[i].bias.requires_grad = False
    
for i in block4:
    vgg4.features[i].weight.requires_grad = False
    vgg4.features[i].bias.requires_grad = False

In [None]:
optimizer = torch.optim.Adam([{'params': vgg4.features.parameters()}, 
    {'params': vgg4.classifier.parameters(), 'lr': learning_rate * 10}], 
    lr=learning_rate, weight_decay=0.001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
vgg4, train_loss, train_acc, val_loss, val_acc = train_model(vgg4, criterion, optimizer, exp_lr_scheduler, epoch_num)

train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)

In [None]:
# freeze the trainable parameters of first 4 blocks
vgg5 = models.vgg16(pretrained=True)
vgg5.classifier[3] = nn.Linear(4096, 1000)
vgg5.classifier[6] = nn.Linear(1000, 2)
nn.init.xavier_uniform_(vgg5.classifier[3].weight)
nn.init.xavier_uniform_(vgg5.classifier[6].weight)
vgg5 = vgg5.to(device)

for i in block1:
    vgg5.features[i].weight.requires_grad = False
    vgg5.features[i].bias.requires_grad = False

for i in block2:
    vgg5.features[i].weight.requires_grad = False
    vgg5.features[i].bias.requires_grad = False
    
for i in block3:
    vgg5.features[i].weight.requires_grad = False
    vgg5.features[i].bias.requires_grad = False
    
for i in block4:
    vgg5.features[i].weight.requires_grad = False
    vgg5.features[i].bias.requires_grad = False
    
for i in block5:
    vgg5.features[i].weight.requires_grad = False
    vgg5.features[i].bias.requires_grad = False

In [None]:
optimizer = torch.optim.Adam([{'params': vgg5.features.parameters()}, 
    {'params': vgg5.classifier.parameters(), 'lr': learning_rate * 10}], 
    lr=learning_rate, weight_decay=0.001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
vgg5, train_loss, train_acc, val_loss, val_acc = train_model(vgg5, criterion, optimizer, exp_lr_scheduler, epoch_num)

train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)

In [None]:
save_dir = ''

In [None]:
torch.save(vgg, os.path.join(save_dir, 'vgg.pth'))
torch.save(vgg1, os.path.join(save_dir, 'vgg1.pth'))
torch.save(vgg2, os.path.join(save_dir, 'vgg2.pth'))
torch.save(vgg3, os.path.join(save_dir, 'vgg3.pth'))
torch.save(vgg4, os.path.join(save_dir, 'vgg4.pth'))
torch.save(vgg5, os.path.join(save_dir, 'vgg5.pth'))

In [None]:
for i in range(6):
    loss_plot(train_loss_list[i], val_loss_list[i], i, epoch_num, learning_rate)

In [None]:
for i in range(6):
    acc_plot(train_acc_list[i], val_acc_list[i], i, epoch_num, learning_rate)