In [1]:
import torch
import torchvision
import sklearn.metrics as metrics
import numpy as np
import sys
from torch.utils.data import Dataset, random_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import datetime
from helper import get_cat_count, count_parameters, compute_confusion_matrix, show_examples, plot_training_loss, plot_accuracy, plot_confusion_matrix

# COMP9444 Assignment 2

## Cat breed classification

## Neural Network

**student.py**

UNSW COMP9444 Neural Networks and Deep Learning

You may modify this file however you wish, including creating additional
variables, functions, classes, etc., so long as your code runs with the
hw2main.py file unmodified, and you are only using the approved packages.

You have been given some default values for the variables train_val_split,
batch_size as well as the transform function.
You are encouraged to modify these to improve the performance of your model.

**Answer to Question:**

Briefly describe how your program works, and explain any design and training
decisions you made along the way.

In [2]:
############################################################################
######     Specify transform(s) to be applied to the input images     ######
############################################################################

def transform(mode):
    """
    Called when loading the data. Visit this URL for more information:
    https://pytorch.org/vision/stable/transforms.html
    You may specify different transforms for training and testing
    """

    # channel size = 3

def transform(mode):
    """
    Called when loading the data. Visit this URL for more information:
    https://pytorch.org/vision/stable/transforms.html
    You may specify different transforms for training and testing
    """
    # Data Augmentation
    if mode == 'train':
        return transforms.Compose(
            [   
                transforms.RandomResizedCrop(size=80, scale=(0.45, 1.0), ratio=(0.70, 1.4)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomPerspective(p=0.2),
                transforms.RandomAffine(degrees=(-15, 15), translate=(0.0, 0.5)),
                transforms.RandomRotation((-10,10)),
                transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.1, hue=0.02),
                transforms.RandomPosterize(bits=3, p=0.3),
                transforms.RandomEqualize(p=0.1),
                transforms.RandomGrayscale(p=0.01),
                transforms.RandomPerspective(distortion_scale=0.05, p=0.15, fill=0),
                transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.5),
                transforms.ToTensor()
            ]
        )
    # Keep the testing data original to ensure accuracy
    elif mode == 'test':
        return transforms.Compose(
            [   
                transforms.ToTensor()
            ]
        )

In [3]:
#################################################################################
#####                      Specify NN to be used                           ######
#################################################################################

### Simplified implementation of VGG16 with 12 layers instead of 16.
### Cut layer = 256 - 256 conv layer. 512-512 * 3 conv layers at the end.
### Reduced number of nodes on FC layer from 4096 to 1024.
vgg_12 = [64, 64, 'maxpool', 128, 128, 'maxpool', 256, 256, 'maxpool', 512, 512, 512, 'maxpool', 'avgpool', 'fc1', 'fc2', 'fc3']    
##########################################################################################
# trying to take some inspirations from vgg16 but with less channels and fc layer nodes. #
##########################################################################################
class Network(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.conv_layers = nn.Sequential(
            ######### block 1 #########
            nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ELU(inplace=True),
            
            nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2)),
            
            
            ######### block 2 #########
            nn.Conv2d(64, 128, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ELU(inplace=True),
            
            nn.Conv2d(128, 128, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2)),
            
            ######### block 3 #########   
            nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
        
            
            nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2)),
            
            
            ######### block 4 #########
            nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
            
            nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ELU(inplace=True),
            
            nn.Conv2d(256, 192, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(192),
            nn.ELU(inplace=True),
            
            nn.MaxPool2d((2, 2), stride=(2, 2))
        )
        
        # shrink final conv layer width to 4
        self.avgpool = nn.AdaptiveAvgPool2d((4,4))

        self.fc_layers = nn.Sequential(
            nn.Flatten(),  # Flatten from conv layers

            nn.Dropout(p=0.3),
            nn.Linear(192*4*4, 2400),
            nn.BatchNorm1d(2400),
            nn.ReLU(),
            
            nn.Dropout(p=0.6),
            nn.Linear(2400, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
        
            nn.Dropout(p=0.4),
            nn.Linear(1024, 8)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.avgpool(x)       
        x = self.fc_layers(x)
        return F.log_softmax(x, dim=1)

net = Network()

############################################################################
######      Specify the optimizer and loss function                   ######
############################################################################
learning_rate = 0.0005
# optimizer = torch.optim.SGD(model.parameters(), momentum=0.9, lr=learning_rate)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# loss_func = F.nll_loss
loss_func = nn.CrossEntropyLoss()

############################################################################
######  Custom weight initialization and lr scheduling are optional   ######
############################################################################

# Normally, the default weight initialization and fixed learing rate
# should work fine. But, we have made it possible for you to define
# your own custom weight initialization and lr scheduler, if you wish.
def weights_init(m):
    return

scheduler = None


############################################################################
#######              Metaparameters and training options              ######
############################################################################
dataset = "./data"
train_val_split = 1
batch_size = 256 
epochs = 1500


###############################################
#**          Print Network Information      **#
###############################################
print(transform('train'))
print(net)
count_parameters(net)

Compose(
    RandomResizedCrop(size=(80, 80), scale=(0.45, 1.0), ratio=(0.7, 1.4), interpolation=bilinear)
    RandomHorizontalFlip(p=0.5)
    RandomPerspective(p=0.2)
    RandomAffine(degrees=[-15.0, 15.0], translate=(0.0, 0.5))
    RandomRotation(degrees=[-10.0, 10.0], interpolation=nearest, expand=False, fill=0)
    ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.9, 1.1], hue=[-0.02, 0.02])
    RandomPosterize(bits=3,p=0.3)
    RandomEqualize(p=0.1)
    RandomGrayscale(p=0.01)
    RandomPerspective(p=0.15)
    RandomAdjustSharpness(sharpness_factor=2,p=0.5)
    ToTensor()
)
Network(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ELU(alpha=1.0, inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_sta

12620200

### Plotting Helper functions

## HW 2 Main

In [4]:
# Use a GPU if available, as it should be faster.
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

###########################
## Cat breed dictionary  ##
###########################
cat_dict = {
    0: 'bombay',
    1: 'calico',
    2: 'persian',
    3: 'russianblue',
    4: 'siamese',
    5: 'tiger',
    6: 'tortoiseshell',
    7: 'tuxedo'
}

class DatasetFromSubset(Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform

    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.subset)


# Test network on validation set, if it exists.
## Added params
def test_network(net,testloader,test_accuracy_list,print_confusion=False):
    net.eval()
    total_images = 0
    total_correct = 0
    conf_matrix = np.zeros((8,8))
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total_images += labels.size(0)
            total_correct += (predicted == labels).sum().item()
            conf_matrix = conf_matrix + metrics.confusion_matrix(
                labels.cpu(),predicted.cpu(),labels=[0,1,2,3,4,5,6,7])

    model_accuracy = total_correct / total_images * 100
    test_accuracy_list.append(model_accuracy)
    print(', {0} test {1:.2f}%'.format(total_images,model_accuracy))
    if print_confusion:
        np.set_printoptions(precision=2, suppress=True)
        print(conf_matrix)
    net.train()

In [5]:
##############################
### Tracking training time ###
##############################
start_time = time.time() ## Added
time_elapsed = 0  ## Added Line
##############################

###############################
### Tracking nn performance ###
###############################
minibatch_loss_list, train_accuracy_list, test_accuracy_list = [], [], [] ## Added
###############################



# Main
print("Using device: {}"
      "\n".format(str(device)))
########################################################################
#######                      Loading Data                        #######
########################################################################
data = torchvision.datasets.ImageFolder(root=dataset)

if train_val_split == 1:
    # Train on the entire dataset
    data = torchvision.datasets.ImageFolder(root=dataset,
                        transform=transform('train'))
    trainloader = torch.utils.data.DataLoader(data,
                        batch_size=batch_size, shuffle=True);
else:
    # Split the dataset into trainset and testset
    data = torchvision.datasets.ImageFolder(root=dataset)
    data.len=len(data)
    train_len = int((train_val_split)*data.len)
    test_len = data.len - train_len
    train_subset, test_subset = random_split(data, [train_len, test_len])
    trainset = DatasetFromSubset(
        train_subset, transform=transform('train'))
    testset = DatasetFromSubset(
        test_subset, transform=transform('test'))

    trainloader = torch.utils.data.DataLoader(trainset, 
                        batch_size=batch_size, shuffle=False)
    testloader = torch.utils.data.DataLoader(testset, 
                        batch_size=batch_size, shuffle=False)


# Get model, loss criterion and optimizer from student
net = net.to(device)
criterion = loss_func
optimizer = optimizer
# get weight initialization and lr scheduler, if appropriate
weights_init = weights_init
scheduler = scheduler

# apply custom weight initialization, if it exists
net.apply(weights_init)

########################################################################
#######                        Training                          #######
########################################################################
print("Start training...")
for epoch in range(1,epochs+1):
    total_loss = 0
    total_images = 0
    total_correct = 0

    for batch in trainloader:           # Load batch
        images, labels = batch 
        images = images.to(device)
        labels = labels.to(device)

        preds = net(images)             # Process batch

        loss = criterion(preds, labels) # Calculate loss

        optimizer.zero_grad()
        loss.backward()                 # Calculate gradients
        optimizer.step()                # Update weights

        output = preds.argmax(dim=1)

        total_loss += loss.item()
        total_images += labels.size(0)
        total_correct += output.eq(labels).sum().item()
        minibatch_loss_list.append(loss.item())  ## Added

    # apply lr schedule, if it exists
    if scheduler is not None:
        scheduler.step()

    model_accuracy = total_correct / total_images * 100 
    train_accuracy_list.append(model_accuracy)  ## Added
    print('ep {0}, loss: {1:.2f}, {2} train {3:.2f}%'.format(
           epoch, total_loss, total_images, model_accuracy), end='')

    if train_val_split < 1:
        test_network(net,testloader, test_accuracy_list,
                     print_confusion=(epoch % 10 == 0)) ## Added
    else:
        print()
    
   
    if epoch % 10 == 0:
        torch.save(net.state_dict(),'v4_check.pth')
        print("   Model saved to checkModel.pth")
        time_elapsed = time.time() - start_time  ## Added Line
        print(f'Time elapsed: {str(datetime.timedelta(seconds = time_elapsed))}') ## TIME
    
    if epoch % 50 == 0:
        torch.save(net.state_dict(), f'v4_{model_accuracy}_{epoch}_saved.pth')
        print(f"   Model saved to v4_{epoch}_saved.pth")
    

    sys.stdout.flush()


torch.save(net.state_dict(),'v4_final_saved.pth')
print("   Model saved to savedModel.pth")
time_elapsed = time.time() - start_time ## Added Line
print(f'total time needed to train network: \
        {str(datetime.timedelta(seconds = time_elapsed))}\ntotal time in seconds: {time_elapsed}') ## TIME



Using device: cuda:0

Start training...
ep 1, loss: 63.80, 8000 train 23.71%
ep 2, loss: 59.27, 8000 train 29.90%
ep 3, loss: 56.98, 8000 train 31.71%
ep 4, loss: 55.36, 8000 train 34.88%
ep 5, loss: 53.14, 8000 train 37.70%
ep 6, loss: 52.08, 8000 train 38.94%
ep 7, loss: 51.09, 8000 train 39.84%
ep 8, loss: 50.31, 8000 train 40.77%
ep 9, loss: 50.23, 8000 train 41.36%
ep 10, loss: 48.29, 8000 train 44.30%
   Model saved to checkModel.pth
Time elapsed: 0:03:50.805036
ep 11, loss: 47.82, 8000 train 44.39%
ep 12, loss: 47.18, 8000 train 45.75%
ep 13, loss: 46.06, 8000 train 47.42%
ep 14, loss: 46.27, 8000 train 47.36%
ep 15, loss: 45.08, 8000 train 49.06%
ep 16, loss: 44.74, 8000 train 49.36%
ep 17, loss: 44.66, 8000 train 49.38%
ep 18, loss: 45.29, 8000 train 48.20%
ep 19, loss: 43.40, 8000 train 50.58%
ep 20, loss: 42.62, 8000 train 51.04%
   Model saved to checkModel.pth
Time elapsed: 0:07:44.648464
ep 21, loss: 41.35, 8000 train 53.05%
ep 22, loss: 42.11, 8000 train 52.95%
ep 23, lo

ep 182, loss: 19.84, 8000 train 78.81%
ep 183, loss: 19.66, 8000 train 78.67%
ep 184, loss: 19.75, 8000 train 78.50%
ep 185, loss: 18.99, 8000 train 79.35%
ep 186, loss: 18.74, 8000 train 80.09%
ep 187, loss: 19.36, 8000 train 78.92%
ep 188, loss: 19.33, 8000 train 79.04%
ep 189, loss: 19.68, 8000 train 78.80%
ep 190, loss: 20.83, 8000 train 77.42%
   Model saved to checkModel.pth
Time elapsed: 1:21:20.818719
ep 191, loss: 18.93, 8000 train 79.09%
ep 192, loss: 18.80, 8000 train 79.71%
ep 193, loss: 19.00, 8000 train 79.01%
ep 194, loss: 18.93, 8000 train 79.33%
ep 195, loss: 18.81, 8000 train 79.90%
ep 196, loss: 18.69, 8000 train 79.11%
ep 197, loss: 18.42, 8000 train 80.04%
ep 198, loss: 19.12, 8000 train 78.92%
ep 199, loss: 18.90, 8000 train 79.24%
ep 200, loss: 17.85, 8000 train 80.80%
   Model saved to checkModel.pth
Time elapsed: 1:25:37.234249
   Model saved to v4_200_saved.pth
ep 201, loss: 18.61, 8000 train 79.86%
ep 202, loss: 19.18, 8000 train 79.04%
ep 203, loss: 18.44, 8

ep 361, loss: 13.26, 8000 train 85.28%
ep 362, loss: 12.98, 8000 train 86.12%
ep 363, loss: 13.63, 8000 train 84.92%
ep 364, loss: 13.65, 8000 train 85.25%
ep 365, loss: 13.50, 8000 train 85.54%
ep 366, loss: 13.98, 8000 train 84.97%
ep 367, loss: 13.86, 8000 train 84.97%
ep 368, loss: 13.56, 8000 train 85.54%
ep 369, loss: 13.46, 8000 train 85.36%
ep 370, loss: 13.36, 8000 train 85.80%
   Model saved to checkModel.pth
Time elapsed: 2:39:23.514740
ep 371, loss: 13.54, 8000 train 85.08%
ep 372, loss: 13.62, 8000 train 85.20%
ep 373, loss: 13.16, 8000 train 85.55%
ep 374, loss: 13.37, 8000 train 85.49%
ep 375, loss: 13.65, 8000 train 84.96%
ep 376, loss: 13.17, 8000 train 85.46%
ep 377, loss: 13.15, 8000 train 86.10%
ep 378, loss: 12.80, 8000 train 86.26%
ep 379, loss: 13.40, 8000 train 85.75%
ep 380, loss: 13.14, 8000 train 85.54%
   Model saved to checkModel.pth
Time elapsed: 2:43:39.575371
ep 381, loss: 12.88, 8000 train 86.21%
ep 382, loss: 12.89, 8000 train 85.86%
ep 383, loss: 13.3

ep 541, loss: 11.18, 8000 train 87.71%
ep 542, loss: 10.66, 8000 train 88.29%
ep 543, loss: 10.37, 8000 train 88.52%
ep 544, loss: 9.90, 8000 train 89.08%
ep 545, loss: 10.68, 8000 train 88.42%
ep 546, loss: 10.52, 8000 train 88.52%
ep 547, loss: 10.43, 8000 train 88.64%
ep 548, loss: 10.28, 8000 train 88.96%
ep 549, loss: 10.59, 8000 train 88.79%
ep 550, loss: 11.31, 8000 train 87.83%
   Model saved to checkModel.pth
Time elapsed: 4:02:34.758165
   Model saved to v4_550_saved.pth
ep 551, loss: 10.50, 8000 train 89.16%
ep 552, loss: 9.88, 8000 train 89.42%
ep 553, loss: 9.63, 8000 train 89.36%
ep 554, loss: 10.29, 8000 train 89.12%
ep 555, loss: 9.93, 8000 train 89.21%
ep 556, loss: 10.68, 8000 train 88.33%
ep 557, loss: 10.24, 8000 train 88.83%
ep 558, loss: 9.81, 8000 train 89.28%
ep 559, loss: 10.46, 8000 train 89.10%
ep 560, loss: 9.63, 8000 train 89.46%
   Model saved to checkModel.pth
Time elapsed: 4:07:33.488529
ep 561, loss: 10.25, 8000 train 88.89%
ep 562, loss: 10.79, 8000 tr

ep 723, loss: 8.34, 8000 train 91.15%
ep 724, loss: 8.19, 8000 train 90.96%
ep 725, loss: 8.46, 8000 train 90.40%
ep 726, loss: 8.76, 8000 train 90.70%
ep 727, loss: 8.45, 8000 train 90.99%
ep 728, loss: 9.01, 8000 train 90.48%
ep 729, loss: 8.77, 8000 train 90.48%
ep 730, loss: 8.37, 8000 train 90.94%
   Model saved to checkModel.pth
Time elapsed: 5:31:06.568544
ep 731, loss: 8.84, 8000 train 90.79%
ep 732, loss: 8.00, 8000 train 91.07%
ep 733, loss: 8.10, 8000 train 91.21%
ep 734, loss: 7.86, 8000 train 91.49%
ep 735, loss: 8.20, 8000 train 91.03%
ep 736, loss: 8.45, 8000 train 91.05%
ep 737, loss: 8.51, 8000 train 90.85%
ep 738, loss: 8.52, 8000 train 90.76%
ep 739, loss: 8.81, 8000 train 90.39%
ep 740, loss: 8.29, 8000 train 91.05%
   Model saved to checkModel.pth
Time elapsed: 5:36:02.599024
ep 741, loss: 7.97, 8000 train 91.29%
ep 742, loss: 7.99, 8000 train 91.20%
ep 743, loss: 8.23, 8000 train 91.19%
ep 744, loss: 8.01, 8000 train 91.09%
ep 745, loss: 8.38, 8000 train 90.97%
ep

ep 906, loss: 7.28, 8000 train 92.24%
ep 907, loss: 7.27, 8000 train 92.27%
ep 908, loss: 7.47, 8000 train 91.77%
ep 909, loss: 6.90, 8000 train 92.65%
ep 910, loss: 7.02, 8000 train 92.25%
   Model saved to checkModel.pth
Time elapsed: 7:00:44.175190
ep 911, loss: 6.86, 8000 train 92.31%
ep 912, loss: 7.28, 8000 train 92.45%
ep 913, loss: 7.61, 8000 train 92.05%
ep 914, loss: 7.56, 8000 train 91.66%
ep 915, loss: 7.06, 8000 train 92.69%
ep 916, loss: 7.46, 8000 train 92.44%
ep 917, loss: 7.72, 8000 train 91.76%
ep 918, loss: 7.02, 8000 train 92.71%
ep 919, loss: 7.46, 8000 train 92.10%
ep 920, loss: 7.30, 8000 train 92.22%
   Model saved to checkModel.pth
Time elapsed: 7:05:57.043371
ep 921, loss: 6.88, 8000 train 92.67%
ep 922, loss: 6.89, 8000 train 92.56%
ep 923, loss: 7.48, 8000 train 91.85%
ep 924, loss: 7.04, 8000 train 92.49%
ep 925, loss: 7.27, 8000 train 92.33%
ep 926, loss: 7.41, 8000 train 91.92%
ep 927, loss: 7.00, 8000 train 92.29%
ep 928, loss: 7.21, 8000 train 91.94%
ep

ep 1088, loss: 6.50, 8000 train 93.16%
ep 1089, loss: 6.76, 8000 train 92.50%
ep 1090, loss: 6.82, 8000 train 92.70%
   Model saved to checkModel.pth
Time elapsed: 8:30:52.615350
ep 1091, loss: 6.48, 8000 train 93.03%
ep 1092, loss: 6.26, 8000 train 93.17%
ep 1093, loss: 6.57, 8000 train 93.01%
ep 1094, loss: 6.82, 8000 train 92.73%
ep 1095, loss: 6.76, 8000 train 92.99%
ep 1096, loss: 6.06, 8000 train 93.54%
ep 1097, loss: 6.40, 8000 train 93.24%
ep 1098, loss: 6.60, 8000 train 93.01%
ep 1099, loss: 6.36, 8000 train 93.24%
ep 1100, loss: 6.40, 8000 train 93.17%
   Model saved to checkModel.pth
Time elapsed: 8:35:54.739519
   Model saved to v4_1100_saved.pth
ep 1101, loss: 5.94, 8000 train 93.61%
ep 1102, loss: 6.27, 8000 train 92.94%
ep 1103, loss: 6.65, 8000 train 93.14%
ep 1104, loss: 6.66, 8000 train 92.89%
ep 1105, loss: 6.39, 8000 train 92.79%
ep 1106, loss: 5.86, 8000 train 93.45%
ep 1107, loss: 6.55, 8000 train 93.08%
ep 1108, loss: 6.70, 8000 train 92.77%
ep 1109, loss: 6.12, 

ep 1266, loss: 5.50, 8000 train 93.76%
ep 1267, loss: 5.54, 8000 train 94.24%
ep 1268, loss: 5.84, 8000 train 93.86%
ep 1269, loss: 6.27, 8000 train 93.09%
ep 1270, loss: 5.23, 8000 train 94.30%
   Model saved to checkModel.pth
Time elapsed: 10:02:07.675100
ep 1271, loss: 5.96, 8000 train 93.67%
ep 1272, loss: 5.69, 8000 train 93.99%
ep 1273, loss: 6.02, 8000 train 93.47%
ep 1274, loss: 5.71, 8000 train 93.64%
ep 1275, loss: 5.71, 8000 train 94.11%
ep 1276, loss: 5.66, 8000 train 94.10%
ep 1277, loss: 5.66, 8000 train 93.97%
ep 1278, loss: 5.90, 8000 train 93.62%
ep 1279, loss: 5.82, 8000 train 93.81%
ep 1280, loss: 5.99, 8000 train 93.46%
   Model saved to checkModel.pth
Time elapsed: 10:07:08.472795
ep 1281, loss: 5.60, 8000 train 94.20%
ep 1282, loss: 5.66, 8000 train 93.95%
ep 1283, loss: 5.54, 8000 train 94.04%
ep 1284, loss: 5.66, 8000 train 93.91%
ep 1285, loss: 5.62, 8000 train 93.88%
ep 1286, loss: 5.85, 8000 train 93.80%
ep 1287, loss: 6.18, 8000 train 93.60%
ep 1288, loss: 6

ep 1445, loss: 5.59, 8000 train 93.92%
ep 1446, loss: 5.93, 8000 train 93.70%
ep 1447, loss: 5.19, 8000 train 94.40%
ep 1448, loss: 5.60, 8000 train 94.14%
ep 1449, loss: 5.17, 8000 train 94.36%
ep 1450, loss: 4.86, 8000 train 94.59%
   Model saved to checkModel.pth
Time elapsed: 11:30:06.485070
   Model saved to v4_1450_saved.pth
ep 1451, loss: 5.09, 8000 train 94.65%
ep 1452, loss: 4.82, 8000 train 94.95%
ep 1453, loss: 4.95, 8000 train 95.00%
ep 1454, loss: 5.60, 8000 train 94.12%
ep 1455, loss: 5.09, 8000 train 94.61%
ep 1456, loss: 4.97, 8000 train 94.27%
ep 1457, loss: 5.35, 8000 train 94.17%
ep 1458, loss: 4.96, 8000 train 94.44%
ep 1459, loss: 5.05, 8000 train 94.64%
ep 1460, loss: 5.26, 8000 train 94.50%
   Model saved to checkModel.pth
Time elapsed: 11:34:41.833165
ep 1461, loss: 5.44, 8000 train 94.15%
ep 1462, loss: 4.77, 8000 train 95.11%
ep 1463, loss: 5.18, 8000 train 94.26%
ep 1464, loss: 5.53, 8000 train 94.46%
ep 1465, loss: 5.00, 8000 train 94.67%
ep 1466, loss: 5.11

### Analysis

In [6]:
###################################
#**        Data Information     **#
###################################
print(f'batch size: {batch_size}')
print(f'learning rate: {learning_rate}')
print(f'train_val_split: {train_val_split}')
print(f'epochs: {epochs}')


#############################
#**         END           **#
#############################


# Getting count of each cat breed, should be close to 8*0.8*1000 initially..
train_data_distribution = get_cat_count(trainloader, 'training data')
# Getting count of each cat breed, should be close to 8*0.2*1000 initially..
test_data_distribution = get_cat_count(testloader, 'test data')

print(f'training data distribution - {train_data_distribution}')
print(f'test data distribution - {test_data_distribution}')

plot_training_loss(minibatch_loss_list=minibatch_loss_list,
                   num_epochs=epochs,
                   iter_per_epoch=len(trainloader),
                   results_dir=None,
                   averaging_iterations=10)
plt.show()


plot_accuracy(train_acc_list=train_accuracy_list,
              test_acc_list=test_accuracy_list,
              results_dir=None)
plt.show()

net.cpu()
show_examples(model=net, data_loader=testloader, class_dict=cat_dict)

conf_matrix = compute_confusion_matrix(model=net, data_loader=testloader, device=torch.device('cpu'))
print(conf_matrix)
plot_confusion_matrix(conf_matrix, class_names=cat_dict.values(), test_data_distribution=test_data_distribution)
plt.show()

batch size: 256
learning rate: 0.0005
train_val_split: 1
epochs: 1500
training data - total instances = 8000


NameError: name 'testloader' is not defined