## Creating Custom Datasets and Finetuning Pre-trained Networks
In this notebook you have to create custom datasets for PyTorch and use this dataset to finetune certain pre-trained neural networks and observe the results.

In [76]:
# Import Statements
#
# Several of the imports you will need have been added but you will need to provide the
# rest yourself; you should be able to figure out most of the imports as you go through
# the notebook since without proper imports your code will fail to run
#
# All import statements go in this block

from __future__ import division, print_function, unicode_literals
import numpy as np
import torch
import torch.utils.data
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import glob
from scipy import misc
from PIL import Image
import torch.nn as nn

%matplotlib inline
import matplotlib.pyplot as plt

All hyper parameters go in the next block

In [47]:
batch_size = 100
num_epochs = 5
learning_rate = 0.01
filelist = glob.glob('./notMNIST_small/train/A/*.png')
images_0 = np.array([np.array(Image.open(fname)) for fname in filelist])
print(images_0.shape[0])

# images_0 = np.fromiter((misc.imread(path) for path in glob.glob('./notMNIST_small/train/A/*.png')), np.float)

1685


### Creating Custom Datasets
Your first task is to create a pipeline for the custom dataset so that you can load it using a dataloader. Download the dataset provided in the assignment webpage and complete the following block of code so that you can load it as if it was a standard dataset.

In [73]:
class CDATA(torch.utils.data.Dataset): # Extend PyTorch's Dataset class
    def __init__(self, root_dir, train, transform=None):
        # root_dir  - the root directory of the dataset
        # train     - a boolean parameter representing whether to return the training set or the test set
        # transform - the transforms to be applied on the images before returning them
        #
        # In this function store the parameters in instance variables and make a mapping
        # from images to labels and keep it as an instance variable. Make sure to check which
        # dataset is required; train or test; and create the mapping accordingly.
        self.root_dir = root_dir
        self.train = train
        self.transform = transform
        if(self.train):
            filelist = glob.glob(root_dir+'train/A/*.png')
            images_0 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/B/*.png')
            images_1 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/C/*.png')
            images_2 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/D/*.png')
            images_3 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/E/*.png')
            images_4 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/F/*.png')
            images_5 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/G/*.png')
            images_6 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/H/*.png')
            images_7 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/I/*.png')
            images_8 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'train/J/*.png')
            images_9 = np.array([np.array(Image.open(fname)) for fname in filelist])
        else:
            filelist = glob.glob(root_dir+'test/A/*.png')
            images_0 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/B/*.png')
            images_1 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/C/*.png')
            images_2 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/D/*.png')
            images_3 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/E/*.png')
            images_4 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/F/*.png')
            images_5 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/G/*.png')
            images_6 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/H/*.png')
            images_7 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/I/*.png')
            images_8 = np.array([np.array(Image.open(fname)) for fname in filelist])
            filelist = glob.glob(root_dir+'test/J/*.png')
            images_9 = np.array([np.array(Image.open(fname)) for fname in filelist])
            
        images = np.vstack((images_0, images_1, images_2, images_3, images_4, images_5, images_6, images_7, images_8, images_9))
        self.images = images
        print(images.shape)
            
        cat = np.zeros((images.shape[0],1))
        test = np.zeros((images_0.shape[0],1))
        cat[0:images_0.shape[0],:] = test
        test = np.ones((images_1.shape[0],1))
        cat[images_0.shape[0]:images_0.shape[0]+images_1.shape[0],:] = test
        test = np.ones((images_2.shape[0],1))*2
        cat[images_0.shape[0]+images_1.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0],:] = test
        test = np.ones((images_3.shape[0],1))*3
        cat[images_0.shape[0]+images_1.shape[0]+images_2.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0],:] = test
        test = np.ones((images_4.shape[0],1))*4
        cat[images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0],:] = test
        test = np.ones((images_5.shape[0],1))*5
        cat[images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0],:] = test
        test = np.ones((images_6.shape[0],1))*6
        cat[images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]+images_6.shape[0],:] = test
        test = np.ones((images_7.shape[0],1))*7
        cat[images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]+images_6.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]+images_6.shape[0]+images_7.shape[0],:] = test
        test = np.ones((images_8.shape[0],1))*8
        cat[images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]+images_6.shape[0]+images_7.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]+images_6.shape[0]+images_7.shape[0]+images_8.shape[0],:] = test
        test = np.ones((images_9.shape[0],1))*9
        cat[images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]+images_6.shape[0]+images_7.shape[0]+images_8.shape[0]:images_0.shape[0]+images_1.shape[0]+images_2.shape[0]+images_3.shape[0]+images_4.shape[0]+images_5.shape[0]+images_6.shape[0]+images_7.shape[0]+images_8.shape[0]+images_9.shape[0],:] = test
        self.cat = cat
        
    def __len__(self):
        # return the size of the dataset (total number of images) as an integer
        # this should be rather easy if you created a mapping in __init__
        return self.images.shape[0]
        
    def __getitem__(self, idx):
        # idx - the index of the sample requested
        #
        # Open the image correspoding to idx, apply transforms on it and return a tuple (image, label)
        # where label is an integer from 0-9 (since notMNIST has 10 classes)
        sample = self.images[idx,:,:]
        print(sample.shape)
        scale = transforms.Scale((224,224))
        toTensor = transforms.ToTensor()
        if self.transform:
            for i, tsfrm in enumerate([toTensor]):
                transformed_sample = tsfrm(sample)
#             transformed_sample = self.transform(sample)
        return transformed_sample, self.cat[idx,:]
            

We shall now load the dataset. You just need to supply the `root_dir` in the block below and if you implemented the above block correctly, it should work without any issues.

In [75]:
composed_transform = transforms.Compose([transforms.Scale((224,224)),transforms.ToTensor()])
print(composed_transform)
train_dataset = CDATA(root_dir='./notMNIST_small/', train=True, transform=composed_transform) # Supply proper root_dir
test_dataset = CDATA(root_dir='./notMNIST_small/', train=False, transform=composed_transform) # Supply proper root_dir

# Let's check the size of the datasets, if implemented correctly they should be 16854 and 1870 respectively
print('Size of train dataset: %d' % len(train_dataset))
print('Size of test dataset: %d' % len(test_dataset))

# Create loaders for the dataset
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Let's look at one batch of train and test images
def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    
train_dataiter = iter(train_loader)
train_images, train_labels = train_dataiter.next()
print("Train images")
imshow(torchvision.utils.make_grid(train_images))

test_dataiter = iter(test_loader)
test_images, test_labels = test_dataiter.next()
print("Test images")
imshow(torchvision.utils.make_grid(test_images))

<torchvision.transforms.Compose object at 0x7f3d561ab110>
(16854, 28, 28)
(1870, 28, 28)
Size of train dataset: 16854
Size of test dataset: 1870
(28, 28)


ValueError: axes don't match array

### VGG-16 and Resnet-18
Now that you have created the dataset we can use it for training and testing neural networks. VGG-16 and Resnet-18 are both well-known deep-net architectures. VGG-16 is named as such since it has 16 layers in total (13 convolution and 3 fully-connected). Resnet-18 on the other hand is a Resnet architecture that uses skip-connections. PyTorch provides pre-trained models of both these architectures and we shall be using them directly. If you are interested in knowing how they have been defined do take a look at the source, [VGG](https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py), [Resnet](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py)

In [85]:
vgg16 = models.vgg16(pretrained=True)
resnet18 = models.resnet18(pretrained=True)

# Code to change the last layers so that they only have 10 classes as output
vgg16.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 10),
)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 10)

# Add code for using CUDA here if it is available

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /home/abhishek/.torch/models/vgg16-397923af.pth


URLError: <urlopen error [Errno 1] _ssl.c:510: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure>

Define loss functions and optimizers

In [None]:
criterion = nn.CrossEntropyLoss()# Define cross-entropy loss
optimizer_vgg16 = torch.optim.Adam(vgg16.classifier.parameters(), lr=learning_rate)# Use Adam optimizer, use learning_rate hyper parameter
optimizer_resnet18 = torch.optim.Adam(resnet18.fc.parameters(), lr=learning_rate)# Use Adam optimizer, use learning_rate hyper parameter

#### Finetuning
Finetuning is nothing but training models after their weights have been loaded. This allows us to start at a better position than training from scratch. Since the models created already have weights loaded, you simply need to write a training loop.

In [86]:
def train_vgg16():
    # Write loops so as to train the model for N epochs, use num_epochs hyper parameter
    # Train/finetune the VGG-16 network
    # Store the losses for every epoch and generate a graph using matplotlib
    loss_store = nn.zeros(num_epochs)
    for epoch in range(num_epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs, labels = Variable(inputs), Variable(labels)

            # zero the parameter gradients
            optimizer_vgg16.zero_grad()

            # forward + backward + optimize
            outputs = vgg16(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_vgg16.step()

            # print statistics
            running_loss += loss.data[0]
#             if i % 2000 == 1999:    # print every 2000 mini-batches
#                 print('[%d, %5d] loss: %.3f' %
#                       (epoch + 1, i + 1, running_loss / 2000))
#                 running_loss = 0.0
        loss_store[epoch] = running_loss
    plt.plot(loss_store)
    plt.show()
            

    print('Finished Training')
    
def train_resnet18():
    # Same as above except now using the Resnet-18 network
    # Write loops so as to train the model for N epochs, use num_epochs hyper parameter
    # Train/finetune the VGG-16 network
    # Store the losses for every epoch and generate a graph using matplotlib
    loss_store = nn.zeros(num_epochs)
    for epoch in range(num_epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs, labels = Variable(inputs), Variable(labels)

            # zero the parameter gradients
            optimizer_resnet18.zero_grad()

            # forward + backward + optimize
            outputs = resnet18(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_resnet18.step()

            # print statistics
            running_loss += loss.data[0]
            if i % 50 == 49:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss))
#                 running_loss = 0.0
        loss_store[epoch] = running_loss
    plt.plot(loss_store)
    plt.show()
    print('Finished Training')

Now let us start the training/finetuning

In [None]:
%time train_vgg16()
%time train_resnet18()

#### Testing
Once finetuning is done we need to test it on the test set.

In [83]:
def test(model):
    # Write loops for testing the model on the test set
    # You should also print out the accuracy of the model
    correct = 0
    total = 0
    for data in test_loader:
        images, labels = data
        outputs = model(Variable(images))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()

    print('Accuracy of the network: %d %%' % (
        100 * correct / total))

Test the models

In [84]:
%time test(vgg16)
%time test(resnet18)

NameError: name 'vgg16' is not defined

You can add more code to save the models if you want but otherwise this notebook is complete