In [None]:
# this document is for the trial CNN structure

In [1]:
from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F
import datetime

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    CONV1->CONV2->POOL1->CONV3->CONV4->POOL2->FC1->FC2->FC3
    """
    def __init__(self, params):
        super(Net, self).__init__()
        
        #Initialize layers
        self.params = params
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        self.conv3 = nn.Conv2d(*self.params['conv'][3])
        self.conv4 = nn.Conv2d(*self.params['conv'][4])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        self.printed = False

    def forward(self, img):
        # Implement forward pass
        x = img
        
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size(), end=" || ")
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size(), end=" || ")
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        
        x = F.relu(self.conv3(x))
        if not self.printed: 
            print("CONV3", x.size(), end=" || ")
        x = F.relu(self.conv4(x))
        if not self.printed: 
            print("CONV4", x.size(), end=" || ")
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size(), end=" || ")
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size(), end=" || ")
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

In [2]:
def test(model):
    
    # overall training correct rate
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Training accuracy: %d %%' % (100 * correct / total))
    
    # overall testing correct rate
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Testing accuracy: %d %%' % (100 * correct / total))
    
    # count testing predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1

    # print accuracy for each class
    print("Testing accuracy (each class): ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
        if classname == "5":
            print()
    print()
    
    return


def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(round(running_loss / 2000, 4))
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print(datetime.datetime.now(), ' Epoch', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    test(model)
    
    return None

# Data transformer
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' 
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

paths = []
params = []

In [3]:
param = {'conv':[(), 
                  (3, 16, 5, 1, 1), 
                  (16, 32, 3, 1, 1),
                  (32, 32, 3, 1, 0),
                  (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
          'pool':[(), 
                  (2, 2, 0),
                  (2, 2, 0)], # kernel_size, stride, padding
          'fc':[(), 
                (16*6*6, 120),
                (120, 90), 
                (90, 10)] # in_channels, out_channels
         }
params.append(param)

model_ft = Net(param) # Model initialization
model_ft = model_ft.to(device) # Move model to cpu
criterion = nn.CrossEntropyLoss() # Loss function initialization
# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization

exponen_lr_scheduler = lr_scheduler.ExponentialLR(optimizer_ft, gamma=0.1)
epoch = 30

for n in range((epoch // 5)):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exponen_lr_scheduler, num_epochs=5)

PATH = './9layer_30round_adam1e-4_exponlr.pth'
paths.append(PATH)
torch.save({
        'epoch': epoch,
        'model_state_dict': model_ft.state_dict(),
        'optimizer_state_dict': optimizer_ft.state_dict()
        }, PATH)

2021-05-08 12:11:26.344470
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
2021-05-08 12:12:25.040003  Epoch 1 : Average Loss [2.1886, 1.4595, 1.2883]
2021-05-08 12:13:16.364284  Epoch 2 : Average Loss [1.0321, 0.9216, 0.8706]
2021-05-08 12:14:09.959245  Epoch 3 : Average Loss [0.7593, 0.742, 0.7072]
2021-05-08 12:14:59.844014  Epoch 4 : Average Loss [0.6387, 0.6195, 0.6]
2021-05-08 12:15:49.619419  Epoch 5 : Average Loss [0.5561, 0.5468, 0.54]
Finished Training
Training accuracy: 84 %
Testing accuracy: 81 %
Testing accuracy (each class): 
0: 87.2%;   1: 90.4%;   2: 86.2%;   3: 65.6%;   4: 86.4%;   5: 81.2%;   
6: 78.8%;   7: 87.0%;   8: 71.2%;   9: 81.8%;   
2021-05-08 12:16:23.105982
epoch range:  6  to  10
2021-05-08 12

In [4]:
param = {'conv':[(), 
                  (3, 16, 5, 1, 1), 
                  (16, 32, 3, 1, 1),
                  (32, 32, 3, 1, 0),
                  (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
          'pool':[(), 
                  (2, 2, 0),
                  (2, 2, 0)], # kernel_size, stride, padding
          'fc':[(), 
                (16*6*6, 120),
                (120, 90), 
                (90, 10)] # in_channels, out_channels
         }
params.append(param)

model_ft = Net(param) # Model initialization
model_ft = model_ft.to(device) # Move model to cpu
criterion = nn.CrossEntropyLoss() # Loss function initialization
# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

epoch = 30

for n in range((epoch // 5)):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

PATH = './9layer_30round_adam1e-4_steplr.pth'
paths.append(PATH)
torch.save({
        'epoch': epoch,
        'model_state_dict': model_ft.state_dict(),
        'optimizer_state_dict': optimizer_ft.state_dict()
        }, PATH)

2021-05-08 12:40:35.209238
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
2021-05-08 12:41:28.348102  Epoch 1 : Average Loss [2.2071, 1.3967, 1.1092]
2021-05-08 12:42:15.807525  Epoch 2 : Average Loss [0.9484, 0.8519, 0.7937]
2021-05-08 12:43:03.267877  Epoch 3 : Average Loss [0.7265, 0.6834, 0.6487]
2021-05-08 12:43:54.520933  Epoch 4 : Average Loss [0.5878, 0.599, 0.5714]
2021-05-08 12:44:46.268942  Epoch 5 : Average Loss [0.5317, 0.5088, 0.5096]
Finished Training
Training accuracy: 85 %
Testing accuracy: 83 %
Testing accuracy (each class): 
0: 87.6%;   1: 85.2%;   2: 87.2%;   3: 73.0%;   4: 92.0%;   5: 81.0%;   
6: 76.2%;   7: 89.6%;   8: 80.0%;   9: 84.0%;   
2021-05-08 12:45:13.740000
epoch range:  6  to  10
2021-05-

In [5]:
param = {'conv':[(), 
                  (3, 16, 5, 1, 1), 
                  (16, 32, 3, 1, 1),
                  (32, 32, 3, 1, 0),
                  (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
          'pool':[(), 
                  (2, 2, 0),
                  (2, 2, 0)], # kernel_size, stride, padding
          'fc':[(), 
                (16*6*6, 120),
                (120, 90), 
                (90, 10)] # in_channels, out_channels
         }
params.append(param)

model_ft = Net(param) # Model initialization
model_ft = model_ft.to(device) # Move model to cpu
criterion = nn.CrossEntropyLoss() # Loss function initialization
# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-5) # Optimizer initialization
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

epoch = 30
for n in range((epoch // 5)):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

PATH = './9layer_30round_adam1e-5_steplr.pth'
paths.append(PATH)
torch.save({
        'epoch': epoch,
        'model_state_dict': model_ft.state_dict(),
        'optimizer_state_dict': optimizer_ft.state_dict()
        }, PATH)

2021-05-08 13:09:28.653811
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
2021-05-08 13:10:28.795619  Epoch 1 : Average Loss [2.3042, 2.3033, 2.2931]
2021-05-08 13:11:22.631020  Epoch 2 : Average Loss [2.0532, 1.8665, 1.7453]
2021-05-08 13:12:21.131912  Epoch 3 : Average Loss [1.6027, 1.5782, 1.5474]
2021-05-08 13:13:12.941022  Epoch 4 : Average Loss [1.4713, 1.4471, 1.435]
2021-05-08 13:13:59.063583  Epoch 5 : Average Loss [1.4231, 1.3721, 1.3719]
Finished Training
Training accuracy: 55 %
Testing accuracy: 55 %
Testing accuracy (each class): 
0: 66.6%;   1: 72.2%;   2: 50.8%;   3: 32.8%;   4: 66.0%;   5: 47.4%;   
6: 48.8%;   7: 75.0%;   8: 39.2%;   9: 51.4%;   
2021-05-08 13:14:26.838408
epoch range:  6  to  10
2021-05-

In [6]:
for i in range(len(paths)):
    model = Net(params[i])
    optimizer = optim.Adam(model_ft.parameters(), lr=1e-4)
    PATH = paths[i]
    print(PATH)
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']

    model.eval()
    test(model)
    print()

./9layer_30round_adam1e-4_exponlr.pth
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
Training accuracy: 96 %
Testing accuracy: 86 %
Testing accuracy (each class): 
0: 86.6%;   1: 87.0%;   2: 90.2%;   3: 83.0%;   4: 91.2%;   5: 85.0%;   
6: 85.0%;   7: 89.0%;   8: 86.6%;   9: 84.8%;   

./9layer_30round_adam1e-4_steplr.pth
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
Training accuracy: 96 %
Testing accuracy: 86 %
Testing accuracy (each class): 
0: 91.0%;   1: 87.4%;   2: 86.4%;   3: 85.0%;   4: 87.6%; 

In [8]:
param = {'conv':[(), 
                  (3, 16, 5, 1, 1), 
                  (16, 32, 3, 1, 1),
                  (32, 32, 3, 1, 0),
                  (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
          'pool':[(), 
                  (2, 2, 0),
                  (2, 2, 0)], # kernel_size, stride, padding
          'fc':[(), 
                (16*6*6, 120),
                (120, 90), 
                (90, 10)] # in_channels, out_channels
         }
params.append(param)

model_ft = Net(param) # Model initialization
model_ft = model_ft.to(device) # Move model to cpu
criterion = nn.CrossEntropyLoss() # Loss function initialization
# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=5e-4) # Optimizer initialization
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

epoch = 30
for n in range((epoch // 5)):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

PATH = './9layer_30round_adam5e-4_steplr.pth'
paths.append(PATH)
torch.save({
        'epoch': epoch,
        'model_state_dict': model_ft.state_dict(),
        'optimizer_state_dict': optimizer_ft.state_dict()
        }, PATH)

2021-05-08 13:44:30.875731
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
2021-05-08 13:45:21.591829  Epoch 1 : Average Loss [1.8685, 1.0733, 0.8645]
2021-05-08 13:46:17.655272  Epoch 2 : Average Loss [0.6927, 0.6657, 0.6519]
2021-05-08 13:47:09.573017  Epoch 3 : Average Loss [0.5697, 0.5904, 0.5428]
2021-05-08 13:48:01.873023  Epoch 4 : Average Loss [0.499, 0.5101, 0.4774]
2021-05-08 13:48:53.024517  Epoch 5 : Average Loss [0.4742, 0.4638, 0.4699]
Finished Training
Training accuracy: 87 %
Testing accuracy: 84 %
Testing accuracy (each class): 
0: 87.2%;   1: 86.0%;   2: 84.6%;   3: 71.2%;   4: 86.4%;   5: 89.8%;   
6: 84.4%;   7: 92.4%;   8: 82.6%;   9: 84.4%;   
2021-05-08 13:49:23.916877
epoch range:  6  to  10
2021-05-

In [9]:
param = {'conv':[(), 
                  (3, 16, 3, 1, 0), 
                  (16, 32, 3, 1, 1),
                  (32, 32, 3, 1, 0),
                  (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
          'pool':[(), 
                  (2, 2, 0),
                  (2, 2, 0)], # kernel_size, stride, padding
          'fc':[(), 
                (16*6*6, 120),
                (120, 90), 
                (90, 10)] # in_channels, out_channels
         }
params.append(param)

model_ft = Net(param) # Model initialization
model_ft = model_ft.to(device) # Move model to cpu
criterion = nn.CrossEntropyLoss() # Loss function initialization
# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

epoch = 30
for n in range((epoch // 5)):
    print(datetime.datetime.now())
    epo = 5*n+5
    print("epoch range: ", epo-4, " to ", epo)
    train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

PATH = './9layer_30round_adam1e-4_steplr_310.pth'
paths.append(PATH)
torch.save({
        'epoch': epoch,
        'model_state_dict': model_ft.state_dict(),
        'optimizer_state_dict': optimizer_ft.state_dict()
        }, PATH)

2021-05-08 15:22:35.418812
epoch range:  1  to  5
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
2021-05-08 15:23:20.967254  Epoch 1 : Average Loss [2.3018, 1.7417, 1.431]
2021-05-08 15:24:09.922952  Epoch 2 : Average Loss [1.2775, 1.2028, 1.1288]
2021-05-08 15:25:03.300994  Epoch 3 : Average Loss [1.0, 0.9492, 0.9183]
2021-05-08 15:25:51.106545  Epoch 4 : Average Loss [0.8433, 0.7675, 0.7655]
2021-05-08 15:26:37.336319  Epoch 5 : Average Loss [0.6912, 0.6726, 0.6586]
Finished Training
Training accuracy: 80 %
Testing accuracy: 77 %
Testing accuracy (each class): 
0: 85.2%;   1: 83.2%;   2: 82.6%;   3: 66.8%;   4: 82.6%;   5: 73.4%;   
6: 66.6%;   7: 90.0%;   8: 73.8%;   9: 71.8%;   
2021-05-08 15:27:08.380409
epoch range:  6  to  10
2021-05-08 

In [18]:
for i in range(3, len(paths)):
    model = Net(params[i+1])
    optimizer = optim.Adam(model_ft.parameters(), lr=1e-4)
    PATH = paths[i]
    print(PATH)
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']

    model.eval()
    test(model)
    print()

./9layer_30round_adam5e-4_steplr.pth
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
Training accuracy: 93 %
Testing accuracy: 87 %
Testing accuracy (each class): 
0: 88.2%;   1: 89.6%;   2: 86.6%;   3: 81.2%;   4: 89.4%;   5: 87.6%;   
6: 83.8%;   7: 90.8%;   8: 88.0%;   9: 85.2%;   

./9layer_30round_adam1e-4_steplr_310.pth
CONV1 torch.Size([4, 16, 30, 30]) || CONV2 torch.Size([4, 32, 30, 30]) || POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13]) || CONV4 torch.Size([4, 16, 13, 13]) || POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120]) || FC2 torch.Size([4, 90]) || FC3 torch.Size([4, 10])
Training accuracy: 95 %
Testing accuracy: 85 %
Testing accuracy (each class): 
0: 88.8%;   1: 89.2%;   2: 80.6%;   3: 81.8%;   4: 87.8