In [1]:
from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(Net, self).__init__()
        
        self.params = {'conv':[(), 
                               (3, 16, 5, 1, 1), 
                               (16, 32, 3, 1, 1),
                               (32, 32, 3, 1, 0),
                               (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*6*6, 120),
                             (120, 90), 
                             (90, 10)], # in_channels, out_channels
                       'drop':[0, 
                               0.25, 
                               0.25]
                      }
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        self.conv3 = nn.Conv2d(*self.params['conv'][3])
        self.conv4 = nn.Conv2d(*self.params['conv'][4])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        # self.drop1 = nn.Dropout2d(self.params['drop'][1])
        # self.drop2 = nn.Dropout2d(self.params['drop'][2])
        
        self.printed = False

        # TODO: Initialize layers

    def forward(self, img):

        # TODO: Implement forward pass
        x = img
        
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size())
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size())
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        
        x = F.relu(self.conv3(x))
        if not self.printed: 
            print("CONV3", x.size())
        x = F.relu(self.conv4(x))
        if not self.printed: 
            print("CONV4", x.size())
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size())
        '''x = self.drop1(x)
        if not self.printed: 
            print("DROP1", x.size())'''
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size())
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

# TODO: You can change these data augmentation and normalization strategies for
#  better training and testing (https://pytorch.org/vision/stable/transforms.html)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' # Suppose the dataset is stored under this folder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}
# trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=2)
# teatloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=2)

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

# TODO: Implement training and testing procedures (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(running_loss / 2000)
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    
    # save training results
    PATH = './cifar_net.pth'
    torch.save(model.state_dict(), PATH)
    
    
    # testing overall correct rate
    correct = 0
    total = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Training accuracy: %d %%' % (100 * correct / total))
    
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Testing accuracy: %d %%' % (100 * correct / total))
    
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}

    # again no gradients needed
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1


    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
    print()    
    
    return None

model_ft = Net() # Model initialization

model_ft = model_ft.to(device) # Move model to cpu

criterion = nn.CrossEntropyLoss() # Loss function initialization

# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

2021-05-06 20:49:14.772196
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30])
CONV2 torch.Size([4, 32, 30, 30])
POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13])
CONV4 torch.Size([4, 16, 13, 13])
POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
2021-05-06 20:50:34.252678  Epoch 1 : Average Loss [2.256450728416443, 1.4969576964974403, 1.2289507491793483]
2021-05-06 20:51:24.518523  Epoch 2 : Average Loss [1.0001430722074582, 0.9434580509588122, 0.8630769251538223]
2021-05-06 20:52:17.554406  Epoch 3 : Average Loss [0.7628402986791916, 0.7584733539889567, 0.7047955251077656]
2021-05-06 20:53:09.944474  Epoch 4 : Average Loss [0.6560948984310963, 0.6334348885513609, 0.6289269694458927]
2021-05-06 20:54:02.320431  Epoch 5 : Average Loss [0.5779247439355532, 0.562194696746068, 0.550704367017257]
Finished Training
Training accuracy: 83 %
Testing accuracy: 82 %
Testing accuracy (each class): 
0: 89.8%;   1: 86.6%;

In [15]:
# optim.Adadelta
from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(Net, self).__init__()
        
        self.params = {'conv':[(), 
                               (3, 16, 5, 1, 1), 
                               (16, 32, 3, 1, 1),
                               (32, 32, 3, 1, 0),
                               (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*6*6, 120),
                             (120, 90), 
                             (90, 10)], # in_channels, out_channels
                       'drop':[0, 
                               0.25, 
                               0.25]
                      }
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        self.conv3 = nn.Conv2d(*self.params['conv'][3])
        self.conv4 = nn.Conv2d(*self.params['conv'][4])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        # self.drop1 = nn.Dropout2d(self.params['drop'][1])
        # self.drop2 = nn.Dropout2d(self.params['drop'][2])
        
        self.printed = False

        # TODO: Initialize layers

    def forward(self, img):

        # TODO: Implement forward pass
        x = img
        
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size())
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size())
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        
        x = F.relu(self.conv3(x))
        if not self.printed: 
            print("CONV3", x.size())
        x = F.relu(self.conv4(x))
        if not self.printed: 
            print("CONV4", x.size())
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size())
        '''x = self.drop1(x)
        if not self.printed: 
            print("DROP1", x.size())'''
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size())
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

# TODO: You can change these data augmentation and normalization strategies for
#  better training and testing (https://pytorch.org/vision/stable/transforms.html)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' # Suppose the dataset is stored under this folder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}
# trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=2)
# teatloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=2)

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

# TODO: Implement training and testing procedures (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(running_loss / 2000)
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    
    # save training results
    PATH = './cifar_net.pth'
    torch.save(model.state_dict(), PATH)
    
    
    # testing overall correct rate
    correct = 0
    total = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Training accuracy: %d %%' % (100 * correct / total))
    
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Testing accuracy: %d %%' % (100 * correct / total))
    
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}

    # again no gradients needed
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1


    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
    print()    
    
    return None

model_ft = Net() # Model initialization

model_ft = model_ft.to(device) # Move model to cpu

criterion = nn.CrossEntropyLoss() # Loss function initialization

# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adadelta(model_ft.parameters(), lr=1e-4) # Optimizer initialization

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

2021-05-06 16:40:06.488670
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30])
CONV2 torch.Size([4, 32, 30, 30])
POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13])
CONV4 torch.Size([4, 16, 13, 13])
POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
2021-05-06 16:41:05.581086  Epoch 1 : Average Loss [2.3043154529333116, 2.3045051860809327, 2.3042086980342864]
2021-05-06 16:41:59.618088  Epoch 2 : Average Loss [2.3027573450803756, 2.304300184249878, 2.3047116711139677]
2021-05-06 16:43:10.421231  Epoch 3 : Average Loss [2.3051510660648344, 2.303858541727066, 2.304147281527519]
2021-05-06 16:44:17.686033  Epoch 4 : Average Loss [2.303972874045372, 2.3040791311264037, 2.3041595437526703]
2021-05-06 16:45:16.387825  Epoch 5 : Average Loss [2.3041162202358247, 2.3037228739261626, 2.3041245189905166]
Finished Training
Training accuracy: 10 %
Testing accuracy: 10 %
Testing accuracy (each class): 
0: 0.0%;   1: 0.0%;   

In [17]:
# 1e-5
from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(Net, self).__init__()
        
        self.params = {'conv':[(), 
                               (3, 16, 5, 1, 1), 
                               (16, 32, 3, 1, 1),
                               (32, 32, 3, 1, 0),
                               (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*6*6, 120),
                             (120, 90), 
                             (90, 10)], # in_channels, out_channels
                       'drop':[0, 
                               0.25, 
                               0.25]
                      }
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        self.conv3 = nn.Conv2d(*self.params['conv'][3])
        self.conv4 = nn.Conv2d(*self.params['conv'][4])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        # self.drop1 = nn.Dropout2d(self.params['drop'][1])
        # self.drop2 = nn.Dropout2d(self.params['drop'][2])
        
        self.printed = False

        # TODO: Initialize layers

    def forward(self, img):

        # TODO: Implement forward pass
        x = img
        
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size())
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size())
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        
        x = F.relu(self.conv3(x))
        if not self.printed: 
            print("CONV3", x.size())
        x = F.relu(self.conv4(x))
        if not self.printed: 
            print("CONV4", x.size())
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size())
        '''x = self.drop1(x)
        if not self.printed: 
            print("DROP1", x.size())'''
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size())
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

# TODO: You can change these data augmentation and normalization strategies for
#  better training and testing (https://pytorch.org/vision/stable/transforms.html)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' # Suppose the dataset is stored under this folder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}
# trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=2)
# teatloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=2)

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

# TODO: Implement training and testing procedures (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(running_loss / 2000)
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    
    # save training results
    PATH = './cifar_net.pth'
    torch.save(model.state_dict(), PATH)
    
    
    # testing overall correct rate
    correct = 0
    total = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Training accuracy: %d %%' % (100 * correct / total))
    
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Testing accuracy: %d %%' % (100 * correct / total))
    
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}

    # again no gradients needed
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1


    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
    print()    
    
    return None

model_ft = Net() # Model initialization

model_ft = model_ft.to(device) # Move model to cpu

criterion = nn.CrossEntropyLoss() # Loss function initialization

# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-3) # Optimizer initialization

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

2021-05-06 18:13:19.463533
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30])
CONV2 torch.Size([4, 32, 30, 30])
POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13])
CONV4 torch.Size([4, 16, 13, 13])
POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
2021-05-06 18:14:12.039795  Epoch 1 : Average Loss [2.3046225954294206, 2.3030373970270155, 2.3034356145858763]
2021-05-06 18:15:10.813733  Epoch 2 : Average Loss [2.3033910413980485, 2.302909638404846, 2.303143483519554]
2021-05-06 18:16:11.405797  Epoch 3 : Average Loss [2.303170191168785, 2.3029901208877566, 2.3031493718624114]
2021-05-06 18:17:11.464257  Epoch 4 : Average Loss [2.3031673308610916, 2.302951886296272, 2.3031302314996718]
2021-05-06 18:18:09.657266  Epoch 5 : Average Loss [2.303080519795418, 2.303159546971321, 2.302984843850136]
Finished Training
Training accuracy: 10 %
Testing accuracy: 10 %
Testing accuracy (each class): 
0: 0.0%;   1: 0.0%;   2: 

KeyboardInterrupt: 

In [2]:
# ExponentialLR

from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(Net, self).__init__()
        
        self.params = {'conv':[(), 
                               (3, 16, 5, 1, 1), 
                               (16, 32, 3, 1, 1),
                               (32, 32, 3, 1, 0),
                               (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*6*6, 120),
                             (120, 90), 
                             (90, 10)], # in_channels, out_channels
                       'drop':[0, 
                               0.25, 
                               0.25]
                      }
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        self.conv3 = nn.Conv2d(*self.params['conv'][3])
        self.conv4 = nn.Conv2d(*self.params['conv'][4])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        # self.drop1 = nn.Dropout2d(self.params['drop'][1])
        # self.drop2 = nn.Dropout2d(self.params['drop'][2])
        
        self.printed = False

        # TODO: Initialize layers

    def forward(self, img):

        # TODO: Implement forward pass
        x = img
        
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size())
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size())
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        
        x = F.relu(self.conv3(x))
        if not self.printed: 
            print("CONV3", x.size())
        x = F.relu(self.conv4(x))
        if not self.printed: 
            print("CONV4", x.size())
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size())
        '''x = self.drop1(x)
        if not self.printed: 
            print("DROP1", x.size())'''
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size())
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

# TODO: You can change these data augmentation and normalization strategies for
#  better training and testing (https://pytorch.org/vision/stable/transforms.html)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' # Suppose the dataset is stored under this folder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}
# trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=2)
# teatloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=2)

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

# TODO: Implement training and testing procedures (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(running_loss / 2000)
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    
    # save training results
    PATH = './cifar_net.pth'
    torch.save(model.state_dict(), PATH)
    
    
    # testing overall correct rate
    correct = 0
    total = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Training accuracy: %d %%' % (100 * correct / total))
    
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Testing accuracy: %d %%' % (100 * correct / total))
    
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}

    # again no gradients needed
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1


    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
    print()    
    
    return None

model_ft = Net() # Model initialization

model_ft = model_ft.to(device) # Move model to cpu

criterion = nn.CrossEntropyLoss() # Loss function initialization

# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy
#cyc_lr_scheduler = lr_scheduler.CyclicLR(optimizer_ft, base_lr=1e-4, max_lr=5e-4, step_size_up=21, step_size_down=19, mode='triangular', gamma=0.1, scale_fn=None, scale_mode='cycle', cycle_momentum=True, base_momentum=0.8, max_momentum=0.9, last_epoch=-1)
expon_lr_scheduler = lr_scheduler.ExponentialLR(optimizer_ft, gamma=0.1)

'''print("cyclic")
for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, cyc_lr_scheduler, num_epochs=5)'''
print("exponential")
for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, expon_lr_scheduler, num_epochs=5)

exponential
2021-05-06 21:21:43.212723
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30])
CONV2 torch.Size([4, 32, 30, 30])
POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13])
CONV4 torch.Size([4, 16, 13, 13])
POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
2021-05-06 21:22:41.814435  Epoch 1 : Average Loss [2.018582885324955, 1.3273267338052392, 1.111500558863394]
2021-05-06 21:23:49.766135  Epoch 2 : Average Loss [0.9188609193004668, 0.9009850593106821, 0.8203994854008779]
2021-05-06 21:24:47.068376  Epoch 3 : Average Loss [0.7436504960694583, 0.7164136390537024, 0.6825564343965379]
2021-05-06 21:25:48.390452  Epoch 4 : Average Loss [0.6393269818884146, 0.5889167256407091, 0.593654383557936]
2021-05-06 21:26:48.614278  Epoch 5 : Average Loss [0.5642612914105121, 0.5381727868962626, 0.5376628556099459]
Finished Training
Training accuracy: 84 %
Testing accuracy: 82 %
Testing accuracy (each class): 
0: 88.4%;

In [3]:
# different CONV1 ExponentialLR

from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(Net, self).__init__()
        
        self.params = {'conv':[(), 
                               (3, 16, 3, 1, 0), 
                               (16, 32, 3, 1, 1),
                               (32, 32, 3, 1, 0),
                               (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*6*6, 120),
                             (120, 90), 
                             (90, 10)], # in_channels, out_channels
                       'drop':[0, 
                               0.25, 
                               0.25]
                      }
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        self.conv3 = nn.Conv2d(*self.params['conv'][3])
        self.conv4 = nn.Conv2d(*self.params['conv'][4])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        # self.drop1 = nn.Dropout2d(self.params['drop'][1])
        # self.drop2 = nn.Dropout2d(self.params['drop'][2])
        
        self.printed = False

        # TODO: Initialize layers

    def forward(self, img):

        # TODO: Implement forward pass
        x = img
        
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size())
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size())
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        
        x = F.relu(self.conv3(x))
        if not self.printed: 
            print("CONV3", x.size())
        x = F.relu(self.conv4(x))
        if not self.printed: 
            print("CONV4", x.size())
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size())
        '''x = self.drop1(x)
        if not self.printed: 
            print("DROP1", x.size())'''
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size())
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

# TODO: You can change these data augmentation and normalization strategies for
#  better training and testing (https://pytorch.org/vision/stable/transforms.html)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' # Suppose the dataset is stored under this folder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}
# trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=2)
# teatloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=2)

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

# TODO: Implement training and testing procedures (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(running_loss / 2000)
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    
    # save training results
    PATH = './cifar_net.pth'
    torch.save(model.state_dict(), PATH)
    
    
    # testing overall correct rate
    correct = 0
    total = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Training accuracy: %d %%' % (100 * correct / total))
    
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Testing accuracy: %d %%' % (100 * correct / total))
    
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}

    # again no gradients needed
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1


    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
    print()    
    
    return None

model_ft = Net() # Model initialization

model_ft = model_ft.to(device) # Move model to cpu

criterion = nn.CrossEntropyLoss() # Loss function initialization

# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy
#cyc_lr_scheduler = lr_scheduler.CyclicLR(optimizer_ft, base_lr=1e-4, max_lr=5e-4, step_size_up=21, step_size_down=19, mode='triangular', gamma=0.1, scale_fn=None, scale_mode='cycle', cycle_momentum=True, base_momentum=0.8, max_momentum=0.9, last_epoch=-1)
expon_lr_scheduler = lr_scheduler.ExponentialLR(optimizer_ft, gamma=0.1)

'''print("cyclic")
for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, cyc_lr_scheduler, num_epochs=5)'''
print("exponential")
for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, expon_lr_scheduler, num_epochs=5)

exponential
2021-05-07 15:58:43.725625
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30])
CONV2 torch.Size([4, 32, 30, 30])
POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13])
CONV4 torch.Size([4, 16, 13, 13])
POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
2021-05-07 15:59:32.621721  Epoch 1 : Average Loss [2.140716392248869, 1.432954012028873, 1.1940439390111715]
2021-05-07 16:00:41.053668  Epoch 2 : Average Loss [0.9760816143546254, 0.8912500327262096, 0.8436358041013591]
2021-05-07 16:01:52.143576  Epoch 3 : Average Loss [0.7530516283386387, 0.7475160048725665, 0.7027701566662872]
2021-05-07 16:02:49.144576  Epoch 4 : Average Loss [0.6486936410089256, 0.6308916582132224, 0.6333620286413061]
2021-05-07 16:03:48.438969  Epoch 5 : Average Loss [0.5813864413538249, 0.5565790085202897, 0.55848952240785]
Finished Training
Training accuracy: 83 %
Testing accuracy: 81 %
Testing accuracy (each class): 
0: 85.0%; 

In [10]:
from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(Net, self).__init__()
        
        self.params = {'conv':[(), 
                               (3, 12, 5, 1, 0), 
                               (12, 36, 5, 1, 0)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (36*4*4, 120),
                             (120, 90), 
                             (90, 10)], # in_channels, out_channels
                      }
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        # self.drop1 = nn.Dropout2d(self.params['drop'][1])
        # self.drop2 = nn.Dropout2d(self.params['drop'][2])
        
        self.printed = False

        # TODO: Initialize layers

    def forward(self, img):

        # TODO: Implement forward pass
        x = img
        
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size())
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size())
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size())
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size())
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

# TODO: You can change these data augmentation and normalization strategies for
#  better training and testing (https://pytorch.org/vision/stable/transforms.html)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((28, 28)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' # Suppose the dataset is stored under this folder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}
# trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=2)
# teatloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=2)

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

# TODO: Implement training and testing procedures (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(running_loss / 2000)
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    
    # save training results
    PATH = './cifar_net.pth'
    torch.save(model.state_dict(), PATH)
    
    
    # testing overall correct rate
    correct = 0
    total = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Training accuracy: %d %%' % (100 * correct / total))
    
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Testing accuracy: %d %%' % (100 * correct / total))
    
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}

    # again no gradients needed
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1


    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
    print()    
    
    return None

model_ft = Net() # Model initialization

model_ft = model_ft.to(device) # Move model to cpu

criterion = nn.CrossEntropyLoss() # Loss function initialization

# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

for n in range(6):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

2021-05-07 22:11:00.303391
epoch range:  1  to  5
CONV1 torch.Size([4, 12, 24, 24])
POOL1 torch.Size([4, 12, 12, 12])
CONV2 torch.Size([4, 36, 8, 8])
POOL2 torch.Size([4, 36, 4, 4])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
2021-05-07 22:11:47.097464  Epoch 1 : Average Loss [2.112942450135946, 1.5385193206220866, 1.361519385561347]
2021-05-07 22:12:44.365842  Epoch 2 : Average Loss [1.0534058191366493, 0.9741368007035927, 0.889476210184861]
2021-05-07 22:13:41.394395  Epoch 3 : Average Loss [0.814332571022911, 0.7984415438361466, 0.7524605754625517]
2021-05-07 22:14:34.687408  Epoch 4 : Average Loss [0.7169015335887671, 0.6947488844858016, 0.6904889244963415]
2021-05-07 22:15:23.488324  Epoch 5 : Average Loss [0.6492560619564319, 0.6484069976648897, 0.6209657117625101]
Finished Training
Training accuracy: 82 %
Testing accuracy: 79 %
Testing accuracy (each class): 
0: 87.8%;   1: 87.2%;   2: 79.8%;   3: 68.0%;   4: 88.4%;   5: 75.6%;   6: 66.8%;   7: 85.6%

KeyboardInterrupt: 

In [11]:
PATH = './cifar_net.pth'
net = Net()
net.load_state_dict(torch.load(PATH))

correct = 0
total = 0
    
with torch.no_grad():
    for i, data in enumerate(dataloaders['train'], 0):
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Training accuracy: %d %%' % (100 * correct / total))
    
correct = 0
total = 0
with torch.no_grad():
    for i, data in enumerate(dataloaders['test'], 0):
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Testing accuracy: %d %%' % (100 * correct / total))
    
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in class_names}
total_pred = {classname: 0 for classname in class_names}

# again no gradients needed
with torch.no_grad():
    for i, data in enumerate(dataloaders['test'], 0):
        images, labels = data
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[class_names[label]] += 1
            total_pred[class_names[label]] += 1


# print accuracy for each class
print("Testing accuracy (each class): ")
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
print()

CONV1 torch.Size([4, 12, 24, 24])
POOL1 torch.Size([4, 12, 12, 12])
CONV2 torch.Size([4, 36, 8, 8])
POOL2 torch.Size([4, 36, 4, 4])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
Training accuracy: 89 %
Testing accuracy: 84 %
Testing accuracy (each class): 
0: 82.2%;   1: 86.4%;   2: 82.8%;   3: 75.0%;   4: 88.2%;   5: 84.8%;   6: 82.8%;   7: 88.6%;   8: 86.4%;   9: 88.8%;   
