In [3]:
!git clone https://github.com/jgbeni/BirdPics.git
!mv BirdPics/utils .
!rm -r BirdPics
!ls -l

Cloning into 'BirdPics'...
remote: Enumerating objects: 66, done.[K
remote: Counting objects: 100% (66/66), done.[K
remote: Compressing objects: 100% (54/54), done.[K
remote: Total 66 (delta 22), reused 42 (delta 8), pack-reused 0 (from 0)[K
Receiving objects: 100% (66/66), 6.79 MiB | 14.74 MiB/s, done.
Resolving deltas: 100% (22/22), done.
mv: cannot move 'BirdPics/utils' to './utils': Directory not empty
total 8
drwxr-xr-x 1 root root 4096 Nov  4 14:36 sample_data
drwxr-xr-x 3 root root 4096 Nov  6 13:57 utils


In [4]:
import utils.data_preprocessing as dp
import numpy as np
import h5py
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import os
from tqdm import tqdm
from google.colab import drive

drive.mount('/content/drive',force_remount=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

Mounted at /content/drive


In [5]:
dir = '/content/drive/MyDrive/BirdPics'
os.makedirs(dir+'/models',exist_ok=True)

In [6]:
f = h5py.File(dir+'/data/bird_data.hdf5', "r")

In [7]:
X_train,Y_train = f['train']['X'],np.copy(f['train']['Y'])
X_val,Y_val = f['val']['X'],np.copy(f['val']['Y'])

In [8]:
Y_train = dp.prepare_labels(Y_train)
Y_val = dp.prepare_labels(Y_val)

In [9]:
train_dataset = dp.HDF5Dataset(X_train,Y_train,train=True)
val_dataset = dp.HDF5Dataset(X_val,Y_val)

In [None]:
batch_size = 64

train_loader = DataLoader(train_dataset, num_workers=8, batch_size=batch_size, pin_memory=True,
                                                shuffle=True)
val_loader = DataLoader(val_dataset, num_workers=8, batch_size=batch_size, pin_memory=True,
                                                shuffle=True)

## Choosing the model

In [14]:
# Load pre-trained ResNet50 model
model = models.vgg19(weights='DEFAULT')
model_path = 'models/vgg19_retrained.pth'
loss_acc_path = 'models/vgg19_retrained.npz'
# Print the model architecture
print(model)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth


100%|██████████| 548M/548M [00:07<00:00, 79.2MB/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [15]:
model.classifier[-1] = nn.Linear(in_features=4096, out_features=3)
model = model.to(device)

## Training

In [None]:
# Define the loss function
criterion = nn.CrossEntropyLoss().to(device)

# Define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=0.00005 ,weight_decay=1e-3)

#LR decay
decayRate = 0.96
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decayRate)

In [None]:
n_total_steps = len(train_loader)
num_epochs = 20

max_val = 0.
PATH = os.path.join(dir,model_path)

train_loss,val_loss = np.zeros(num_epochs,dtype=np.float32),np.zeros(num_epochs,dtype=np.float32)
train_acc,val_acc = np.zeros(num_epochs,dtype=np.float32),np.zeros(num_epochs,dtype=np.float32)

for epoch in range(num_epochs):
    train_correct,train_samples = 0,0
    val_correct,val_samples = 0,0
    for i, (images, labels) in enumerate(tqdm(train_loader)):
        # origin shape: [32, 3, 224, 224] = 32, 3, 1024
        # input_layer: 3 input channels, 6 output channels, 5 kernel size
        images = images.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        train_loss[epoch] += loss.item()/len(train_loader)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        train_samples += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_acc[epoch] = 100.0 * train_correct / train_samples
    print('train loss %.4f - train acc. %.2f' %(train_loss[epoch],train_acc[epoch]))


    for val_images, val_labels in val_loader:
        val_images = val_images.to(device)
        val_labels = val_labels.type(torch.LongTensor)
        val_labels = val_labels.to(device)
        outputs = model(val_images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        val_samples += val_labels.size(0)
        val_correct += (predicted == val_labels).sum().item()

        val_loss[epoch] += criterion(outputs, val_labels).item()/len(val_loader)
    val_acc[epoch] = 100.0 * val_correct / val_samples
    if val_acc[epoch] > max_val:
        max_val = val_acc[epoch]
        torch.save(model.state_dict(), PATH)
    print('val loss %.4f - val acc. %.2f' %(val_loss[epoch],val_acc[epoch]))

    lr_scheduler.step()

print('Finished Training')

100%|██████████| 312/312 [02:03<00:00,  2.53it/s]

train loss 2.221 - train acc. 37.10





val loss 1.051 - val acc. 50.29


100%|██████████| 312/312 [02:03<00:00,  2.53it/s]

train loss 1.051 - train acc. 42.75





val loss 1.039 - val acc. 51.67


100%|██████████| 312/312 [02:09<00:00,  2.42it/s]

train loss 1.040 - train acc. 43.22





val loss 0.993 - val acc. 55.15


 51%|█████     | 159/312 [01:06<00:47,  3.19it/s]

In [None]:
np.savez(os.path.join(dir,loss_acc_path),train_loss=train_loss,val_loss=val_loss,train_acc=train_acc,val_acc=val_acc)