In [21]:
import torch
from torchvision import transforms, datasets, models
from pathlib import Path

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

#%pip install torchsummary 
from torchsummary import summary

import torch.nn as nn

In [2]:
train_on_gpu = torch.cuda.is_available()
print(f'Train on gpu: {train_on_gpu}')

if train_on_gpu:
    gpu_count = torch.cuda.device_count()
    print(f'{gpu_count} gpus detected.')
    if gpu_count > 1:
        multi_gpu = True
    else:
        multi_gpu = False

Train on gpu: False


In [3]:
BASE_PATH = Path('/project/volume/data/out/NIMH-CHEFS')

TRAIN = 'train'
VAL = 'val'
TEST = 'test'

In [23]:
# set transformations per dataset
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(degrees=30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.RandomResizedCrop(size=224, scale=(0.08, 1.0)),
        transforms.ToTensor(), # automatically scales the from [0,255] to [0,1]
        transforms.Normalize(mean=[0.485, 0.456, 0.406], # vgg16 / imagenet standard
                             std=[0.229, 0.224, 0.225])

    ]),
    'val': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]) 
    ]),
    'test': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]) 
    ])
}


# create datasets
image_datasets = {
    x: datasets.ImageFolder(
        root=(BASE_PATH / x),
        transform=data_transforms[x]
    )
    for x in [TRAIN, VAL, TEST]
}


# create dataloaders to avoidd loading all fof the data into memory at once
dataloaders = {
    x: torch.utils.data.DataLoader(
        image_datasets[x], 
        batch_size=64,
        shuffle=True, 
        num_workers=4
    )
    for x in [TRAIN, VAL, TEST]
}

dataset_sizes = { x : len(image_datasets[x]) for x in [TRAIN, VAL, TEST] }

for x in [TRAIN, VAL, TEST]:
    print(f"[INFO] Number of images in {x} set ...{dataset_sizes[x]}")

class_names = image_datasets[TRAIN].classes
n_classes = len(class_names)
print(n_classes)
print("[INFO] Classes: ", image_datasets[TRAIN].classes)

[INFO] Number of images in train set ...320
[INFO] Number of images in val set ...106
[INFO] Number of images in test set ...107
5
[INFO] Classes:  ['Afraid', 'Angry', 'Happy', 'Neutral', 'Sad']


In [5]:
trainiter = iter(dataloaders['train'])
features, labels = next(trainiter)
print(features.shape) # batch_size, color_channels, height, width
print(labels.shape) # batch_size

torch.Size([64, 3, 224, 224])
torch.Size([64])


In [52]:
vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

In [55]:
for param in vgg16.features.parameters():
    param.requires_grad = False

In [56]:
vgg16.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [57]:
n_inputs = list(vgg16.classifier)[6].in_features

features = list(vgg16.classifier)[:-1]
features.extend([nn.Linear(in_features=n_inputs, out_features=n_classes)])

vgg16.classifier = nn.Sequential(*features)
vgg16.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=5, bias=True)
)

In [61]:
total_params = sum(p.numel() for p in vgg16.parameters())
print(f'{total_params:,} total parameters.')

total_trainable_params = sum(p.numel() for p in vgg16.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')

134,281,029 total parameters.
119,566,341 training parameters.


In [64]:
if train_on_gpu:
    vgg16 = vgg16.to('cuda')

In [65]:
summary(vgg16, input_size=(3, 224, 224), batch_size=64, device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [64, 64, 224, 224]           1,792
              ReLU-2         [64, 64, 224, 224]               0
            Conv2d-3         [64, 64, 224, 224]          36,928
              ReLU-4         [64, 64, 224, 224]               0
         MaxPool2d-5         [64, 64, 112, 112]               0
            Conv2d-6        [64, 128, 112, 112]          73,856
              ReLU-7        [64, 128, 112, 112]               0
            Conv2d-8        [64, 128, 112, 112]         147,584
              ReLU-9        [64, 128, 112, 112]               0
        MaxPool2d-10          [64, 128, 56, 56]               0
           Conv2d-11          [64, 256, 56, 56]         295,168
             ReLU-12          [64, 256, 56, 56]               0
           Conv2d-13          [64, 256, 56, 56]         590,080
             ReLU-14          [64, 256,