In [1]:

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cs231n/assignments/assignment1/'
FOLDERNAME = 'home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier/'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier')

# This downloads the CIFAR-10 dataset to your Drive
# if it doesn't already exist.
%cd /$FOLDERNAME/flowers/
!bash get_datasets.sh
%cd /$FOLDERNAME

/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier/flowers
bash: get_datasets.sh: No such file or directory
/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier


In [2]:
import torch
from torchvision import datasets, transforms
from torch.autograd import Variable

import os
path = '/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier'
print(os.listdir(f'{path}/flowers'))


# Any results you write to the current directory are saved as output.

['rose', 'daisy', 'dandelion', 'flowers', 'sunflower', 'tulip']


**Image augmentation and normalization** 

- Transforms can be chained together using Compose
- In image augmentation we randomly flip images, so that our model can detect wrongly oriented images too
- All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 224. 
- Normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]
- We first Resize the image to 256 then crop it to 224, so that it doesnt cut important features

In [3]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
                                transforms.Resize(256),
                                transforms.RandomResizedCrop(224),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(),
                                transforms.Normalize(mean, std)])

test_transform = transforms.Compose([
                                transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize(mean, std)])

In [4]:
path = '/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier'
#print(os.listdir(f'{path}/flowers'))
data_dir = f'{path}/flowers'


A call to ImageFolder(Path, Transform) applies our transformations to all the images in the specified directory.
We will create a dictorionary called img_dataset for train and test folder**

In [5]:
img_datasets ={}

In [6]:
# That's how easily you can for images folders in Pytorch for further operations
img_datasets['train']= datasets.ImageFolder(data_dir , train_transform)
img_datasets['test']= datasets.ImageFolder(data_dir, test_transform)


Classes Present

In [7]:
# these gets extracted from the folder name
train_class_names = img_datasets['train'].classes
print("train", train_class_names)

test_class_names = img_datasets['test'].classes
print("test",test_class_names)

train ['daisy', 'dandelion', 'flowers', 'rose', 'sunflower', 'tulip']
test ['daisy', 'dandelion', 'flowers', 'rose', 'sunflower', 'tulip']


In [8]:
# these gets extracted from the folder name - class label mapping
train_class_idx = img_datasets['train'].class_to_idx
print("train",train_class_idx)

test_class_idx = img_datasets['test'].class_to_idx
print("test",test_class_idx)

train {'daisy': 0, 'dandelion': 1, 'flowers': 2, 'rose': 3, 'sunflower': 4, 'tulip': 5}
test {'daisy': 0, 'dandelion': 1, 'flowers': 2, 'rose': 3, 'sunflower': 4, 'tulip': 5}


Creating Train & Test DataLoaders

In [9]:
train_loader = torch.utils.data.DataLoader(img_datasets['train'],
                                                   batch_size=10,
                                                   shuffle=True,
                                                   num_workers=4)

test_loader = torch.utils.data.DataLoader(img_datasets['test'],
                                                   batch_size=10,
                                                   shuffle=True,
                                                   num_workers=4)

Let's examing a Batch of training Data

In [10]:
train_images , labels = next(iter(train_loader))
print("train", train_images.shape)

test_images , labels = next(iter(test_loader))
print("test", test_images.shape)

train torch.Size([10, 3, 224, 224])
test torch.Size([10, 3, 224, 224])


- 10 - number of images in a single batch
- 3 - number channels 
- 224 - width & height of the image

In [11]:
# lets look at the labels
labels

tensor([3, 1, 0, 0, 2, 2, 2, 3, 3, 2])

All of the pretrained models are present inside torchvision , in this tutorial we will use vgg16 pretrained layer.
PS: In Kaggle to download the pretrained model , you need to set Internet to On in settings.

In [12]:
import torchvision.models as models

model = models.vgg16(pretrained=True)

**Freezing model's layers:**

We will freeze all the layers in the network except the final layer.
requires_grad == False will freeze the parameters so that the gradients are not computed in backward() i.e. weights of these layers won't be trained

In [13]:
for param in model.parameters():
    param.required_grad = False

In [14]:
# Now let's check the model archietecture
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

If you remember we have five classes i.e. five class image classification , in the above print out if you look closely the (classifier)
section - this is doing something else. We need to change the classifier to make it a 5 class classifier.

we need to feed the no of input features to the linear layer (classifier[0]) to our newly created linear layer and output would be 5.

In [15]:
num_of_inputs = model.classifier[0].in_features
num_of_inputs

25088

In [16]:
# restructaring the classifier
import torch.nn as nn
model.classifier = nn.Sequential(
                      nn.Linear(num_of_inputs, 5),
                        nn.LogSoftmax(dim=1))

In [17]:
# Now let's check the model archietecture again to see the changes 
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

Hope you can see the changes in the classifier layer

In [18]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')
    CUDA_LAUNCH_BLOCKING=1
# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

CUDA is available!  Training on GPU ...


In [19]:
# loss function and optimizer
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=0.001)

In [34]:
# number of epochs to train the model
n_epochs = 10


for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    train_accuracy = 0
    
    ###################
    # train the model #
    ###################
    model.train() # prep model for training
    for data, target in train_loader:
        print(data)
        if train_on_gpu:
            CUDA_LAUNCH_BLOCKING=1
            data, target = Variable(data.cuda()), Variable(target.cuda())
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
        #calculate accuracy
        ps = torch.exp(output)
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == target.view(*top_class.shape)
        train_accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
    
# calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.dataset)

    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
            epoch+1, 
            train_loss
            ))
    print(f"Train accuracy: {train_accuracy/len(train_loader):.3f}")


tensor([[[[ 2.1462,  1.9920,  1.8893,  ...,  1.0502,  1.3584,  1.4954],
          [ 2.1804,  1.9920,  1.8550,  ...,  1.3242,  1.4954,  1.6153],
          [ 2.1633,  2.0092,  1.8379,  ...,  1.1529,  1.3755,  1.5297],
          ...,
          [-2.0837, -2.0837, -1.9809,  ...,  1.5982,  1.7180,  1.7694],
          [-2.0494, -2.0323, -1.8610,  ...,  1.6324,  1.7009,  1.8037],
          [-1.9980, -1.9467, -1.7412,  ...,  1.9064,  1.9064,  2.0434]],

         [[-0.2675, -0.6352, -1.0028,  ...,  1.0980,  0.9755,  0.9055],
          [-0.3200, -0.5651, -0.9328,  ...,  0.6954,  0.7129,  0.7829],
          [-0.4601, -0.4951, -0.8452,  ...,  0.2752,  0.5903,  0.7829],
          ...,
          [-1.1253, -1.1078, -1.0903,  ..., -1.5980, -1.4755, -1.3880],
          [-1.0028, -1.0903, -1.0903,  ..., -1.1954, -0.9853, -0.9153],
          [-0.9853, -1.1078, -1.1429,  ..., -0.4076, -0.2325, -0.1625]],

         [[ 1.2457,  0.8797,  0.4962,  ..., -1.5430, -1.2467, -0.5321],
          [ 1.2108,  0.9319,  

RuntimeError: CUDA error: device-side assert triggered

In [32]:
# Checking Test Performence
test_accuracy = 0
model.eval() # prep model for evaluation
for data, target in test_loader:
    if train_on_gpu:
        data, target = Variable(data.cuda()), Variable(target.cuda())
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the loss
    loss = criterion(output, target)
    #calculate accuracy
    ps = torch.exp(output)
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == target.view(*top_class.shape)
    test_accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

print(f"Test accuracy: {test_accuracy/len(test_loader):.3f}")

RuntimeError: CUDA error: device-side assert triggered

Accuracy can be improved by changing the classifer archietecture !! 