In [42]:
from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

net_param = {'conv':[(),
                          (3, 16, 5, 1, 0), 
                          (16, 32, 3, 1, 1),
                          (32, 32, 3, 1, 0),
                          (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
              'pool':[(), 
                      (2, 2, 0),
                      (2, 2, 0)], # kernel_size, stride, padding
              'fc':[(), 
                    (16*6*6, 120),
                    (120, 60), 
                    (60, 10)], # in_channels, out_channels
              'drop':[0, 
                      0.25, 
                      0.5]
             }
             
epoch = 30

optimizer_param_set = {'1e-4_expo': (1e-4, 'expo'),
                       '5e-4_expo': (5e-4, 'expo'),
                       '1e-5_step': (1e-5, 'step'), 
                       '1e-4_step': (1e-4, 'step'), 
                       '5e-4_step': (5e-4, 'step'), 
                       '1e-4_mult': (1e-4, 'mult'),
                       '5e-4_mult': (5e-4, 'mult')}

In [43]:
def test(model):
    
    # overall training correct rate
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Training accuracy: %.2f %%' % (100 * correct / total))
    
    # overall testing correct rate
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Testing accuracy: %.2f %% (%d / %d)' % ((100 * correct / total), correct, total))
    
    # count testing predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1

    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
        if classname == "5":
            print()
    print()
    
    return


def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()                
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', round(running_loss / 3000, 8))
        model.loss_.append(round(running_loss / 3000, 8))
        running_loss = 0.0
        
        '''if epoch % 5 == 4:
            print('epoch', (epoch+1))
            test(model)'''
    
    print('Finished Training')
    
    test(model)
    
    return None

# Data transformer
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' 
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu
paths = []

In [54]:
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    CONV1->CONV2->POOL1->CONV3->CONV4->POOL2->FC1->FC2->FC3
    """
    def __init__(self, params):
        super(Net, self).__init__()
        
        #Initialize layers
        self.params = params
        self.n_layers = len(params['conv'])+len(params['pool'])+len(params['fc'])+len(params['drop'])-4
        self.printed = False
        self.loss_ = []
        
        if self.n_layers == 11:
            self.conv1 = nn.Conv2d(*self.params['conv'][1])
            self.conv2 = nn.Conv2d(*self.params['conv'][2])
            self.conv3 = nn.Conv2d(*self.params['conv'][3])
            self.conv4 = nn.Conv2d(*self.params['conv'][4])
            
            self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
            self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
            
            self.fc1 = nn.Linear(*self.params['fc'][1])
            self.fc2 = nn.Linear(*self.params['fc'][2])
            self.fc3 = nn.Linear(*self.params['fc'][3])
            
            self.dropout1 = nn.Dropout(self.params['drop'][1])
            self.dropout2 = nn.Dropout(self.params['drop'][2])
            
        elif self.n_layers == 10:
            self.conv1 = nn.Conv2d(*self.params['conv'][1])
            self.conv2 = nn.Conv2d(*self.params['conv'][2])
            self.conv3 = nn.Conv2d(*self.params['conv'][3])
            
            self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
            self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
            
            self.fc1 = nn.Linear(*self.params['fc'][1])
            self.fc2 = nn.Linear(*self.params['fc'][2])
            self.fc3 = nn.Linear(*self.params['fc'][3])
            
            self.dropout1 = nn.Dropout(self.params['drop'][1])
            self.dropout2 = nn.Dropout(self.params['drop'][2])
        
        elif self.n_layers == 9:
            self.conv1 = nn.Conv2d(*self.params['conv'][1])
            self.conv2 = nn.Conv2d(*self.params['conv'][2])
            
            self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
            self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
            
            self.fc1 = nn.Linear(*self.params['fc'][1])
            self.fc2 = nn.Linear(*self.params['fc'][2])
            self.fc3 = nn.Linear(*self.params['fc'][3])
            
            self.dropout1 = nn.Dropout(self.params['drop'][1])
            self.dropout2 = nn.Dropout(self.params['drop'][2])
            

    def forward(self, img):
        # Implement forward pass
        x = img
        
        if self.n_layers == 11:
            x = F.relu(self.conv1(x))
            if not self.printed: 
                print("CONV1", x.size(), end=" || ")
            x = F.relu(self.conv2(x))
            if not self.printed: 
                print("CONV2", x.size(), end=" || ")
            x = self.pool1(x)
            if not self.printed: 
                print("POOL1", x.size())
        
            x = F.relu(self.conv3(x))
            if not self.printed: 
                print("CONV3", x.size(), end=" || ")
            x = F.relu(self.conv4(x))
            if not self.printed: 
                print("CONV4", x.size(), end=" || ")
            x = self.pool2(x)
            if not self.printed: 
                print("POOL2", x.size())
        
            x = x.view(x.size(0), -1)
            x = F.relu(self.fc1(x))
            if not self.printed: 
                print("FC1", x.size(), end=" || ")
            x = F.relu(self.fc2(x))
            if not self.printed: 
                print("FC2", x.size(), end=" || ")
            x = self.fc3(x)
            if not self.printed: 
                print("FC3", x.size())
                
        elif self.n_layers == 10:
            x = F.relu(self.conv1(x))
            if not self.printed: 
                print("CONV1", x.size(), end=" || ")
            x = self.pool1(x)
            if not self.printed: 
                print("POOL1", x.size())
        
            x = F.relu(self.conv2(x))
            if not self.printed: 
                print("CONV2", x.size(), end=" || ")
            x = F.relu(self.conv3(x))
            if not self.printed: 
                print("CONV3", x.size(), end=" || ")
            x = self.pool2(x)
            if not self.printed: 
                print("POOL2", x.size())
        
            x = x.view(x.size(0), -1)
            x = F.relu(self.fc1(x))
            if not self.printed: 
                print("FC1", x.size(), end=" || ")
            x = F.relu(self.fc2(x))
            if not self.printed: 
                print("FC2", x.size(), end=" || ")
            x = self.fc3(x)
            if not self.printed: 
                print("FC3", x.size())
        
        elif self.n_layers == 9:
            x = F.relu(self.conv1(x))
            if not self.printed: 
                print("CONV1", x.size(), end=" || ")
            x = self.pool1(x)
            if not self.printed: 
                print("POOL1", x.size())
        
            x = F.relu(self.conv2(x))
            if not self.printed: 
                print("CONV2", x.size(), end=" || ")
            x = self.pool2(x)
            if not self.printed: 
                print("POOL2", x.size())
        
            x = x.view(x.size(0), -1)
            x = F.relu(self.fc1(x))
            if not self.printed: 
                print("FC1", x.size(), end=" || ")
            x = F.relu(self.fc2(x))
            if not self.printed: 
                print("FC2", x.size(), end=" || ")
            x = self.fc3(x)
            if not self.printed: 
                print("FC3", x.size())
        
        self.printed = True

        return x


In [47]:
loss_col = []
for opt in ('1e-4_mult','1e-4expo','1e-4_step'):
    model_ft = Net(net_param)
    model_ft = model_ft.to(device)
    criterion = nn.CrossEntropyLoss()
    learning_rate, decay_strategy = optimizer_param_set[opt]
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)
    if decay_strategy=='expo':
        lr_scheduler_ft = lr_scheduler.ExponentialLR(optimizer_ft, gamma=0.2)
    elif decay_strategy=='step':
        lr_scheduler_ft = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.2)
    elif decay_strategy=='mult':
        lr_scheduler_ft = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[10,25,30], gamma=0.2)
    else:
        print("ERROR 2")
            
    print("optimizer parameter set: ", opt)
    print(datetime.datetime.now())
    epo = epoch
    train_test(model_ft, criterion, optimizer_ft, lr_scheduler_ft, num_epochs=epo)
        
    loss_col.append(model_ft.loss_)
    PATH = "./cnn_chosen_"+opt+"_.pth"
    paths.append(PATH)
    torch.save({'epoch': epoch,
                'model_state_dict': model_ft.state_dict(),
                'optimizer_state_dict': optimizer_ft.state_dict()
               }, PATH)
        
    print()
    

optimizer parameter set:  1e-4_mult
2021-05-09 20:04:09.520563
CONV1 torch.Size([4, 16, 28, 28]) || CONV2 torch.Size([4, 32, 28, 28]) || POOL1 torch.Size([4, 32, 14, 14])
CONV3 torch.Size([4, 32, 12, 12]) || CONV4 torch.Size([4, 16, 12, 12]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 60]) || FC3 torch.Size([4, 10])
2021-05-09 20:05:02.297021  Epoch 1 : Average Loss 3.71754031
2021-05-09 20:05:49.278300  Epoch 2 : Average Loss 2.16361152
2021-05-09 20:06:45.339119  Epoch 3 : Average Loss 1.74501321
2021-05-09 20:07:44.644929  Epoch 4 : Average Loss 1.50915171
2021-05-09 20:08:36.352695  Epoch 5 : Average Loss 1.35554418
2021-05-09 20:09:29.163194  Epoch 6 : Average Loss 1.23776768
2021-05-09 20:10:26.967405  Epoch 7 : Average Loss 1.15838255
2021-05-09 20:11:23.230014  Epoch 8 : Average Loss 1.07924611
2021-05-09 20:12:15.077418  Epoch 9 : Average Loss 1.03045697
2021-05-09 20:13:11.975314  Epoch 10 : Average Loss 0.9792995
2021-05-09 20:14:05.0305

KeyError: '1e-4expo'

In [51]:
for opt in ('1e-4_expo','1e-4_step'):
    model_ft = Net(net_param)
    model_ft = model_ft.to(device)
    criterion = nn.CrossEntropyLoss()
    learning_rate, decay_strategy = optimizer_param_set[opt]
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)
    if decay_strategy=='expo':
        lr_scheduler_ft = lr_scheduler.ExponentialLR(optimizer_ft, gamma=0.2)
    elif decay_strategy=='step':
        lr_scheduler_ft = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.2)
    elif decay_strategy=='mult':
        lr_scheduler_ft = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[10,25,30], gamma=0.2)
    else:
        print("ERROR 2")
            
    print("optimizer parameter set: ", opt)
    print(datetime.datetime.now())
    epo = epoch
    train_test(model_ft, criterion, optimizer_ft, lr_scheduler_ft, num_epochs=epo)
        
    loss_col.append(model_ft.loss_)
    PATH = "./cnn_chosen_"+opt+"_.pth"
    paths.append(PATH)
    torch.save({'epoch': epoch,
                'model_state_dict': model_ft.state_dict(),
                'optimizer_state_dict': optimizer_ft.state_dict()
               }, PATH)
        
    print()

optimizer parameter set:  1e-4_expo
2021-05-09 20:53:02.613134
CONV1 torch.Size([4, 16, 28, 28]) || CONV2 torch.Size([4, 32, 28, 28]) || POOL1 torch.Size([4, 32, 14, 14])
CONV3 torch.Size([4, 32, 12, 12]) || CONV4 torch.Size([4, 16, 12, 12]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 60]) || FC3 torch.Size([4, 10])
2021-05-09 20:53:51.070341  Epoch 1 : Average Loss 3.90760594
2021-05-09 20:54:37.831289  Epoch 2 : Average Loss 2.25920461
2021-05-09 20:55:25.889145  Epoch 3 : Average Loss 1.82911768
2021-05-09 20:56:17.023429  Epoch 4 : Average Loss 1.57326537
2021-05-09 20:57:03.012910  Epoch 5 : Average Loss 1.41169154
2021-05-09 20:57:48.154318  Epoch 6 : Average Loss 1.30345533
2021-05-09 20:58:37.629758  Epoch 7 : Average Loss 1.20659882
2021-05-09 20:59:29.338506  Epoch 8 : Average Loss 1.11869972
2021-05-09 21:00:15.387889  Epoch 9 : Average Loss 1.05078785
2021-05-09 21:01:03.143589  Epoch 10 : Average Loss 1.00435304
2021-05-09 21:01:55.152

In [55]:
opt = '1e-4_expo'
model_ft = Net(net_param)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
learning_rate, decay_strategy = optimizer_param_set[opt]
optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)
if decay_strategy=='expo':
    lr_scheduler_ft = lr_scheduler.ExponentialLR(optimizer_ft, gamma=0.2)
elif decay_strategy=='step':
    lr_scheduler_ft = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.2)
# elif decay_strategy=='lamb':
    # lanbda1 = lambda epoch: 0.0001**epoch
    # lr_scheduler_ft = lr_scheduler.LambdaLR(optimizer_ft, lr_lambda=lanbda1, last_epoch=-1)
elif decay_strategy=='mult':
    lr_scheduler_ft = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[10,25,30], gamma=0.2)
else:
    print("ERROR 2")
        
print("optimizer parameter set: ", opt)
print(datetime.datetime.now())
epo = epoch
train_test(model_ft, criterion, optimizer_ft, lr_scheduler_ft, num_epochs=epo)
    
PATH = "./final_model_.pth"
torch.save({'epoch': epoch,
            'model_state_dict': model_ft.state_dict(),
            'optimizer_state_dict': optimizer_ft.state_dict()
           }, PATH)
        
print()


optimizer parameter set:  1e-4_expo
2021-05-09 22:29:00.688938
CONV1 torch.Size([4, 16, 28, 28]) || CONV2 torch.Size([4, 32, 28, 28]) || POOL1 torch.Size([4, 32, 14, 14])
CONV3 torch.Size([4, 32, 12, 12]) || CONV4 torch.Size([4, 16, 12, 12]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 60]) || FC3 torch.Size([4, 10])
2021-05-09 22:29:48.142142  Epoch 1 : Average Loss 3.79017468
2021-05-09 22:30:36.748259  Epoch 2 : Average Loss 2.24843753
2021-05-09 22:31:34.084822  Epoch 3 : Average Loss 1.83018891
2021-05-09 22:32:28.511608  Epoch 4 : Average Loss 1.58050202
2021-05-09 22:33:18.313859  Epoch 5 : Average Loss 1.39712898
2021-05-09 22:34:17.373930  Epoch 6 : Average Loss 1.26700837
2021-05-09 22:35:11.916874  Epoch 7 : Average Loss 1.17886284
2021-05-09 22:35:58.239016  Epoch 8 : Average Loss 1.09539515
2021-05-09 22:36:48.165340  Epoch 9 : Average Loss 1.02682403
2021-05-09 22:37:42.452587  Epoch 10 : Average Loss 0.97644355
2021-05-09 22:38:31.151