# Model training via transfer learning

In [3]:
!git clone https://github.com/jgbeni/BirdPics.git
!mv BirdPics/utils .
!rm -rf BirdPics

fatal: destination path 'BirdPics' already exists and is not an empty directory.


In [4]:
import utils.data_preprocessing as dp
import numpy as np
import h5py
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import os
from tqdm import tqdm
from google.colab import drive

drive.mount('/content/drive',force_remount=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

Mounted at /content/drive


In [5]:
dir = '/content/drive/MyDrive/BirdPics'
os.makedirs(dir+'/models',exist_ok=True)

In [6]:
f = h5py.File(dir+'/data/bird_data.hdf5', "r")

In [7]:
X_train,Y_train = f['train']['X'],np.copy(f['train']['Y'])
X_val,Y_val = f['val']['X'],np.copy(f['val']['Y'])

In [8]:
Y_train = dp.prepare_labels(Y_train)
Y_val = dp.prepare_labels(Y_val)

In [9]:
train_dataset = dp.HDF5Dataset(X_train,Y_train,train=True)
val_dataset = dp.HDF5Dataset(X_val,Y_val)

In [17]:
batch_size = 64

train_loader = DataLoader(train_dataset, num_workers=2, batch_size=batch_size, pin_memory=True,
                                                shuffle=True)
val_loader = DataLoader(val_dataset, num_workers=2, batch_size=batch_size, pin_memory=True,
                                                shuffle=True)

## Choosing the model

In [18]:
# Load pre-trained VGG16 model
model = models.vgg16(weights='DEFAULT')
model_path = 'models/vgg16_retrained.pth'
loss_acc_path = 'models/vgg16_retrained.npz'
# Print the model architecture
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [19]:
model.classifier[-1] = nn.Linear(in_features=4096, out_features=3)
model = model.to(device)

## Training

In [20]:
# Define the loss function
criterion = nn.CrossEntropyLoss().to(device)

# Define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=0.00005 ,weight_decay=1e-3)

#LR decay
decayRate = 0.96
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decayRate)

In [21]:
n_total_steps = len(train_loader)
num_epochs = 20

max_val = 0.
PATH = os.path.join(dir,model_path)

train_loss,val_loss = np.zeros(num_epochs,dtype=np.float32),np.zeros(num_epochs,dtype=np.float32)
train_acc,val_acc = np.zeros(num_epochs,dtype=np.float32),np.zeros(num_epochs,dtype=np.float32)

for epoch in range(num_epochs):
    train_correct,train_samples = 0,0
    val_correct,val_samples = 0,0
    for i, (images, labels) in enumerate(tqdm(train_loader)):
        images = images.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        train_loss[epoch] += loss.item()/len(train_loader)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        train_samples += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_acc[epoch] = 100.0 * train_correct / train_samples
    print('train loss %.4f - train acc. %.2f' %(train_loss[epoch],train_acc[epoch]))


    for val_images, val_labels in val_loader:
        val_images = val_images.to(device)
        val_labels = val_labels.type(torch.LongTensor)
        val_labels = val_labels.to(device)
        outputs = model(val_images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        val_samples += val_labels.size(0)
        val_correct += (predicted == val_labels).sum().item()

        val_loss[epoch] += criterion(outputs, val_labels).item()/len(val_loader)
    val_acc[epoch] = 100.0 * val_correct / val_samples
    if val_acc[epoch] > max_val:
        max_val = val_acc[epoch]
        torch.save(model.state_dict(), PATH)
    print('val loss %.4f - val acc. %.2f' %(val_loss[epoch],val_acc[epoch]))

    lr_scheduler.step()

print('Finished Training')

100%|██████████| 312/312 [04:26<00:00,  1.17it/s]

train loss 1.2603 - train acc. 46.30





val loss 0.9744 - val acc. 61.62


100%|██████████| 312/312 [04:43<00:00,  1.10it/s]

train loss 0.9377 - train acc. 52.58





val loss 0.8637 - val acc. 67.01


100%|██████████| 312/312 [04:43<00:00,  1.10it/s]

train loss 0.8732 - train acc. 56.12





val loss 0.7700 - val acc. 74.46


100%|██████████| 312/312 [04:43<00:00,  1.10it/s]

train loss 0.8066 - train acc. 59.61





val loss 0.7350 - val acc. 76.67


100%|██████████| 312/312 [04:43<00:00,  1.10it/s]

train loss 0.7697 - train acc. 61.63





val loss 0.7193 - val acc. 78.38


100%|██████████| 312/312 [04:43<00:00,  1.10it/s]

train loss 0.7309 - train acc. 63.77





val loss 0.6518 - val acc. 78.82


100%|██████████| 312/312 [04:31<00:00,  1.15it/s]

train loss 0.6987 - train acc. 65.06





val loss 0.6676 - val acc. 79.95


100%|██████████| 312/312 [04:31<00:00,  1.15it/s]

train loss 0.6839 - train acc. 66.21





val loss 0.5823 - val acc. 83.43


100%|██████████| 312/312 [04:31<00:00,  1.15it/s]

train loss 0.6642 - train acc. 66.64





val loss 0.5422 - val acc. 84.80


100%|██████████| 312/312 [04:31<00:00,  1.15it/s]

train loss 0.6501 - train acc. 67.21





val loss 0.5248 - val acc. 85.83


100%|██████████| 312/312 [04:32<00:00,  1.14it/s]

train loss 0.6340 - train acc. 68.06





val loss 0.5176 - val acc. 84.36


100%|██████████| 312/312 [04:32<00:00,  1.15it/s]

train loss 0.6163 - train acc. 68.95





val loss 0.4924 - val acc. 87.16


100%|██████████| 312/312 [04:32<00:00,  1.15it/s]

train loss 0.6113 - train acc. 69.06





val loss 0.4986 - val acc. 84.51


100%|██████████| 312/312 [04:31<00:00,  1.15it/s]

train loss 0.6091 - train acc. 69.38





val loss 0.4803 - val acc. 87.30


100%|██████████| 312/312 [04:32<00:00,  1.15it/s]

train loss 0.5963 - train acc. 69.73





val loss 0.4663 - val acc. 86.57


100%|██████████| 312/312 [04:32<00:00,  1.15it/s]

train loss 0.5876 - train acc. 70.07





val loss 0.4302 - val acc. 87.45


100%|██████████| 312/312 [04:32<00:00,  1.15it/s]

train loss 0.5831 - train acc. 70.31





val loss 0.4324 - val acc. 86.08


100%|██████████| 312/312 [04:31<00:00,  1.15it/s]

train loss 0.5766 - train acc. 70.51





val loss 0.4005 - val acc. 87.60


100%|██████████| 312/312 [04:32<00:00,  1.15it/s]

train loss 0.5637 - train acc. 71.33





val loss 0.4129 - val acc. 87.50


100%|██████████| 312/312 [04:31<00:00,  1.15it/s]

train loss 0.5690 - train acc. 70.80





val loss 0.4374 - val acc. 88.77
Finished Training


In [22]:
np.savez(os.path.join(dir,loss_acc_path),train_loss=train_loss,val_loss=val_loss,train_acc=train_acc,val_acc=val_acc)