In [1]:
import torch
import torchvision
import sklearn.metrics as metrics
import numpy as np
import sys
from torch.utils.data import Dataset, random_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import datetime
from helper import get_cat_count, count_parameters, compute_confusion_matrix, show_examples, plot_training_loss, plot_accuracy, plot_confusion_matrix

# COMP9444 Assignment 2

## Cat breed classification

## Neural Network

**student.py**

UNSW COMP9444 Neural Networks and Deep Learning

You may modify this file however you wish, including creating additional
variables, functions, classes, etc., so long as your code runs with the
hw2main.py file unmodified, and you are only using the approved packages.

You have been given some default values for the variables train_val_split,
batch_size as well as the transform function.
You are encouraged to modify these to improve the performance of your model.

**Answer to Question:**

Briefly describe how your program works, and explain any design and training
decisions you made along the way.

In [2]:
############################################################################
######     Specify transform(s) to be applied to the input images     ######
############################################################################

def transform(mode):
    """
    Called when loading the data. Visit this URL for more information:
    https://pytorch.org/vision/stable/transforms.html
    You may specify different transforms for training and testing
    """

    # channel size = 3

def transform(mode):
    """
    Called when loading the data. Visit this URL for more information:
    https://pytorch.org/vision/stable/transforms.html
    You may specify different transforms for training and testing
    """
    # Data Augmentation
    if mode == 'train':
        return transforms.Compose(
            [   
                transforms.RandomResizedCrop(size=80, scale=(0.55, 1.0), ratio=(0.75, 1.3)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomPerspective(p=0.2),
                transforms.RandomAffine(degrees=(-15, 15), translate=(0.0, 0.5)),
                transforms.RandomRotation((-10,10)),
                transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.1, hue=0.02),
                transforms.RandomPosterize(bits=3, p=0.3),
                transforms.RandomEqualize(p=0.1),
                transforms.RandomGrayscale(p=0.01),
                transforms.RandomPerspective(distortion_scale=0.05, p=0.1, fill=0),
                transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.5),
                transforms.ToTensor()
            ]
        )
    # Keep the testing data original to ensure accuracy
    elif mode == 'test':
        return transforms.Compose(
            [   
                transforms.ToTensor()
            ]
        )

In [3]:
#################################################################################
#####                      Specify NN to be used                           ######
#################################################################################

### Simplified implementation of VGG16 with 12 layers instead of 16.
### Cut layer = 256 - 256 conv layer. 512-512 * 3 conv layers at the end.
### Reduced number of nodes on FC layer from 4096 to 1024.
vgg_12 = [64, 64, 'maxpool', 128, 128, 'maxpool', 256, 256, 'maxpool', 512, 512, 512, 'maxpool', 'avgpool', 'fc1', 'fc2', 'fc3']    
##########################################################################################
# trying to take some inspirations from vgg16 but with less channels and fc layer nodes. #
##########################################################################################
class Network(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.conv_layers = nn.Sequential(
            ######### block 1 #########
            nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ELU(inplace=True),
            
            nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2)),
            
            
            ######### block 2 #########
            nn.Conv2d(64, 128, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ELU(inplace=True),
            
            nn.Conv2d(128, 128, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2)),
            
            ######### block 3 #########   
            nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
        
            
            nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2)),
            
            
            ######### block 4 #########
            nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
            
            nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
            
            nn.Conv2d(256, 192, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(192),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2))
        )
        
        # shrink final conv layer width to 4
        self.avgpool = nn.AdaptiveAvgPool2d((4,4))

        self.fc_layers = nn.Sequential(
            nn.Flatten(),  # Flatten from conv layers

            nn.Dropout(p=0.3),
            nn.Linear(192*4*4, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            
            nn.Dropout(p=0.6),
            nn.Linear(2048, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
        
            nn.Dropout(p=0.4),
            nn.Linear(1024, 8)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.avgpool(x)       
        x = self.fc_layers(x)
        return F.log_softmax(x, dim=1)

net = Network()

############################################################################
######      Specify the optimizer and loss function                   ######
############################################################################
learning_rate = 0.0005
# optimizer = torch.optim.SGD(model.parameters(), momentum=0.9, lr=learning_rate)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# loss_func = F.nll_loss
loss_func = nn.CrossEntropyLoss()

############################################################################
######  Custom weight initialization and lr scheduling are optional   ######
############################################################################

# Normally, the default weight initialization and fixed learing rate
# should work fine. But, we have made it possible for you to define
# your own custom weight initialization and lr scheduler, if you wish.
def weights_init(m):
    return

scheduler = None


############################################################################
#######              Metaparameters and training options              ######
############################################################################
dataset = "./data"
train_val_split = 1
batch_size = 256 
epochs = 500


###############################################
#**          Print Network Information      **#
###############################################
print(transform('train'))
print(net)
count_parameters(net)

Compose(
    RandomResizedCrop(size=(80, 80), scale=(0.55, 1.0), ratio=(0.75, 1.3), interpolation=bilinear)
    RandomHorizontalFlip(p=0.5)
    RandomPerspective(p=0.2)
    RandomAffine(degrees=[-15.0, 15.0], translate=(0.0, 0.5))
    RandomRotation(degrees=[-10.0, 10.0], interpolation=nearest, expand=False, fill=0)
    ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.9, 1.1], hue=[-0.02, 0.02])
    RandomPosterize(bits=3,p=0.3)
    RandomEqualize(p=0.1)
    RandomGrayscale(p=0.01)
    RandomPerspective(p=0.1)
    RandomAdjustSharpness(sharpness_factor=2,p=0.5)
    ToTensor()
)
Network(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ELU(alpha=1.0, inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_sta

11177352

### Plotting Helper functions

## HW 2 Main

In [4]:
# Use a GPU if available, as it should be faster.
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

###########################
## Cat breed dictionary  ##
###########################
cat_dict = {
    0: 'bombay',
    1: 'calico',
    2: 'persian',
    3: 'russianblue',
    4: 'siamese',
    5: 'tiger',
    6: 'tortoiseshell',
    7: 'tuxedo'
}

class DatasetFromSubset(Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform

    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.subset)


# Test network on validation set, if it exists.
## Added params
def test_network(net,testloader,test_accuracy_list,print_confusion=False):
    net.eval()
    total_images = 0
    total_correct = 0
    conf_matrix = np.zeros((8,8))
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total_images += labels.size(0)
            total_correct += (predicted == labels).sum().item()
            conf_matrix = conf_matrix + metrics.confusion_matrix(
                labels.cpu(),predicted.cpu(),labels=[0,1,2,3,4,5,6,7])

    model_accuracy = total_correct / total_images * 100
    test_accuracy_list.append(model_accuracy)
    print(', {0} test {1:.2f}%'.format(total_images,model_accuracy))
    if print_confusion:
        np.set_printoptions(precision=2, suppress=True)
        print(conf_matrix)
    net.train()

In [5]:
##############################
### Tracking training time ###
##############################
start_time = time.time() ## Added
time_elapsed = 0  ## Added Line
##############################

###############################
### Tracking nn performance ###
###############################
minibatch_loss_list, train_accuracy_list, test_accuracy_list = [], [], [] ## Added
###############################



# Main
print("Using device: {}"
      "\n".format(str(device)))
########################################################################
#######                      Loading Data                        #######
########################################################################
data = torchvision.datasets.ImageFolder(root=dataset)

if train_val_split == 1:
    # Train on the entire dataset
    data = torchvision.datasets.ImageFolder(root=dataset,
                        transform=transform('train'))
    trainloader = torch.utils.data.DataLoader(data,
                        batch_size=batch_size, shuffle=True);
else:
    # Split the dataset into trainset and testset
    data = torchvision.datasets.ImageFolder(root=dataset)
    data.len=len(data)
    train_len = int((train_val_split)*data.len)
    test_len = data.len - train_len
    train_subset, test_subset = random_split(data, [train_len, test_len])
    trainset = DatasetFromSubset(
        train_subset, transform=transform('train'))
    testset = DatasetFromSubset(
        test_subset, transform=transform('test'))

    trainloader = torch.utils.data.DataLoader(trainset, 
                        batch_size=batch_size, shuffle=False)
    testloader = torch.utils.data.DataLoader(testset, 
                        batch_size=batch_size, shuffle=False)


# Get model, loss criterion and optimizer from student
net = net.to(device)
criterion = loss_func
optimizer = optimizer
# get weight initialization and lr scheduler, if appropriate
weights_init = weights_init
scheduler = scheduler

# apply custom weight initialization, if it exists
net.apply(weights_init)

########################################################################
#######                        Training                          #######
########################################################################
print("Start training...")
for epoch in range(1,epochs+1):
    total_loss = 0
    total_images = 0
    total_correct = 0

    for batch in trainloader:           # Load batch
        images, labels = batch 
        images = images.to(device)
        labels = labels.to(device)

        preds = net(images)             # Process batch

        loss = criterion(preds, labels) # Calculate loss

        optimizer.zero_grad()
        loss.backward()                 # Calculate gradients
        optimizer.step()                # Update weights

        output = preds.argmax(dim=1)

        total_loss += loss.item()
        total_images += labels.size(0)
        total_correct += output.eq(labels).sum().item()
        minibatch_loss_list.append(loss.item())  ## Added

    # apply lr schedule, if it exists
    if scheduler is not None:
        scheduler.step()

    model_accuracy = total_correct / total_images * 100 
    train_accuracy_list.append(model_accuracy)  ## Added
    print('ep {0}, loss: {1:.2f}, {2} train {3:.2f}%'.format(
           epoch, total_loss, total_images, model_accuracy), end='')

    if train_val_split < 1:
        test_network(net,testloader, test_accuracy_list,
                     print_confusion=(epoch % 10 == 0)) ## Added
    else:
        print()
    
   
    if epoch % 10 == 0:
        torch.save(net.state_dict(),'v3_check.pth')
        print("   Model saved to checkModel.pth")
        time_elapsed = time.time() - start_time  ## Added Line
        print(f'Time elapsed: {str(datetime.timedelta(seconds = time_elapsed))}') ## TIME
    

    sys.stdout.flush()


torch.save(net.state_dict(),'v3_saved.pth')
print("   Model saved to savedModel.pth")
time_elapsed = time.time() - start_time ## Added Line
print(f'total time needed to train network: \
        {str(datetime.timedelta(seconds = time_elapsed))}\ntotal time in seconds: {time_elapsed}') ## TIME



Using device: cuda:0

Start training...
ep 1, loss: 63.33, 8000 train 23.49%
ep 2, loss: 58.75, 8000 train 29.96%
ep 3, loss: 55.28, 8000 train 34.45%
ep 4, loss: 53.20, 8000 train 37.48%
ep 5, loss: 52.25, 8000 train 39.81%
ep 6, loss: 51.31, 8000 train 40.23%
ep 7, loss: 49.79, 8000 train 40.85%
ep 8, loss: 49.53, 8000 train 42.50%
ep 9, loss: 49.61, 8000 train 42.51%
ep 10, loss: 47.62, 8000 train 45.62%
   Model saved to checkModel.pth
Time elapsed: 0:04:18.772727
ep 11, loss: 46.33, 8000 train 46.74%
ep 12, loss: 45.90, 8000 train 47.25%
ep 13, loss: 45.37, 8000 train 48.00%
ep 14, loss: 44.29, 8000 train 48.96%
ep 15, loss: 43.55, 8000 train 50.19%
ep 16, loss: 43.20, 8000 train 50.75%
ep 17, loss: 42.32, 8000 train 52.21%
ep 18, loss: 41.62, 8000 train 52.59%
ep 19, loss: 41.66, 8000 train 53.10%
ep 20, loss: 40.44, 8000 train 54.64%
   Model saved to checkModel.pth
Time elapsed: 0:08:31.478828
ep 21, loss: 40.47, 8000 train 53.91%
ep 22, loss: 39.84, 8000 train 54.71%
ep 23, lo

ep 185, loss: 17.73, 8000 train 80.83%
ep 186, loss: 17.82, 8000 train 80.41%
ep 187, loss: 18.17, 8000 train 80.60%
ep 188, loss: 17.92, 8000 train 80.42%
ep 189, loss: 17.13, 8000 train 80.95%
ep 190, loss: 17.58, 8000 train 81.40%
   Model saved to checkModel.pth
Time elapsed: 1:19:33.588451
ep 191, loss: 17.48, 8000 train 80.85%
ep 192, loss: 17.48, 8000 train 81.46%
ep 193, loss: 18.04, 8000 train 80.60%
ep 194, loss: 18.04, 8000 train 80.71%
ep 195, loss: 17.51, 8000 train 81.21%
ep 196, loss: 17.49, 8000 train 80.83%
ep 197, loss: 17.04, 8000 train 81.54%
ep 198, loss: 17.22, 8000 train 81.42%
ep 199, loss: 17.16, 8000 train 80.95%
ep 200, loss: 17.37, 8000 train 80.69%
   Model saved to checkModel.pth
Time elapsed: 1:23:40.193217
ep 201, loss: 17.20, 8000 train 81.54%
ep 202, loss: 17.01, 8000 train 81.44%
ep 203, loss: 17.62, 8000 train 81.29%
ep 204, loss: 16.88, 8000 train 81.74%
ep 205, loss: 16.97, 8000 train 81.34%
ep 206, loss: 17.31, 8000 train 81.40%
ep 207, loss: 17.2

ep 367, loss: 12.11, 8000 train 86.51%
ep 368, loss: 11.86, 8000 train 87.05%
ep 369, loss: 11.82, 8000 train 87.04%
ep 370, loss: 11.43, 8000 train 87.71%
   Model saved to checkModel.pth
Time elapsed: 2:32:29.591019
ep 371, loss: 12.29, 8000 train 86.46%
ep 372, loss: 11.30, 8000 train 87.67%
ep 373, loss: 12.50, 8000 train 85.69%
ep 374, loss: 10.89, 8000 train 88.33%
ep 375, loss: 11.81, 8000 train 87.22%
ep 376, loss: 11.69, 8000 train 87.15%
ep 377, loss: 11.42, 8000 train 87.83%
ep 378, loss: 11.58, 8000 train 87.11%
ep 379, loss: 11.01, 8000 train 87.79%
ep 380, loss: 11.80, 8000 train 87.36%
   Model saved to checkModel.pth
Time elapsed: 2:36:42.180755
ep 381, loss: 11.32, 8000 train 87.56%
ep 382, loss: 11.11, 8000 train 87.72%
ep 383, loss: 11.52, 8000 train 87.06%
ep 384, loss: 11.23, 8000 train 88.35%
ep 385, loss: 11.61, 8000 train 87.49%
ep 386, loss: 11.28, 8000 train 87.56%
ep 387, loss: 11.58, 8000 train 87.22%
ep 388, loss: 11.84, 8000 train 86.91%
ep 389, loss: 11.0

### Analysis

In [6]:
###################################
#**        Data Information     **#
###################################
print(f'batch size: {batch_size}')
print(f'learning rate: {learning_rate}')
print(f'train_val_split: {train_val_split}')
print(f'epochs: {epochs}')


#############################
#**         END           **#
#############################


# Getting count of each cat breed, should be close to 8*0.8*1000 initially..
train_data_distribution = get_cat_count(trainloader, 'training data')
# Getting count of each cat breed, should be close to 8*0.2*1000 initially..
test_data_distribution = get_cat_count(testloader, 'test data')

print(f'training data distribution - {train_data_distribution}')
print(f'test data distribution - {test_data_distribution}')

plot_training_loss(minibatch_loss_list=minibatch_loss_list,
                   num_epochs=epochs,
                   iter_per_epoch=len(trainloader),
                   results_dir=None,
                   averaging_iterations=10)
plt.show()


plot_accuracy(train_acc_list=train_accuracy_list,
              test_acc_list=test_accuracy_list,
              results_dir=None)
plt.show()

net.cpu()
show_examples(model=net, data_loader=testloader, class_dict=cat_dict)

conf_matrix = compute_confusion_matrix(model=net, data_loader=testloader, device=torch.device('cpu'))
print(conf_matrix)
plot_confusion_matrix(conf_matrix, class_names=cat_dict.values(), test_data_distribution=test_data_distribution)
plt.show()

batch size: 256
learning rate: 0.0005
train_val_split: 1
epochs: 500
training data - total instances = 8000


NameError: name 'testloader' is not defined