In [33]:
from __future__ import print_function, division

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, utils
import time
import os
import copy
import torch.nn as nn
import torch.nn.functional as F

# TODO: Implement a convolutional neural network (https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html)
class Net(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(Net, self).__init__()
        
        self.params = {'conv':[(), 
                               (3, 16, 5, 1, 1), 
                               (16, 32, 3, 1, 1),
                               (32, 32, 3, 1, 0),
                               (32, 16, 3, 1, 1), 
                               '''(16, 20, 2, 1, 0)'''], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0),
                               '''(3, 2, 1)'''], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*6*6, 120),
                             (120, 90), 
                             (90, 10)] # in_channels, out_channels
                      }
        
        self.conv1 = nn.Conv2d(*self.params['conv'][1])
        self.conv2 = nn.Conv2d(*self.params['conv'][2])
        self.conv3 = nn.Conv2d(*self.params['conv'][3])
        self.conv4 = nn.Conv2d(*self.params['conv'][4])
        # self.conv5 = nn.Conv2d(*self.params['conv'][5])
        
        self.pool1 = nn.MaxPool2d(*self.params['pool'][1])
        self.pool2 = nn.MaxPool2d(*self.params['pool'][2])
        # self.pool3 = nn.MaxPool2d(*self.params['pool'][3])
        
        self.fc1 = nn.Linear(*self.params['fc'][1])
        self.fc2 = nn.Linear(*self.params['fc'][2])
        self.fc3 = nn.Linear(*self.params['fc'][3])
        
        self.printed = False

        # TODO: Initialize layers

    def forward(self, img):

        # TODO: Implement forward pass
        x = img
        x = F.relu(self.conv1(x))
        if not self.printed: 
            print("CONV1", x.size())
        x = F.relu(self.conv2(x))
        if not self.printed: 
            print("CONV2", x.size())
        x = self.pool1(x)
        if not self.printed: 
            print("POOL1", x.size())
        x = F.relu(self.conv3(x))
        if not self.printed: 
            print("CONV3", x.size())
        x = F.relu(self.conv4(x))
        if not self.printed: 
            print("CONV4", x.size())
        x = self.pool2(x)
        if not self.printed: 
            print("POOL2", x.size())
        # x = F.relu(self.conv5(x))
        # if not self.printed: 
            # print("CONV5", x.size())
        # x = self.pool3(x)
        # if not self.printed: 
            # print("POOL3", x.size())
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        if not self.printed: 
            print("FC1", x.size())
        x = F.relu(self.fc2(x))
        if not self.printed: 
            print("FC2", x.size())
        x = self.fc3(x)
        if not self.printed: 
            print("FC3", x.size())
            self.printed = True

        return x

# TODO: You can change these data augmentation and normalization strategies for
#  better training and testing (https://pytorch.org/vision/stable/transforms.html)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Dataset initialization
data_dir = 'data' # Suppose the dataset is stored under this folder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # Read train and test sets, respectively.

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'test']}
# trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=2)
# teatloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=2)

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

# TODO: Implement training and testing procedures (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    
    for epoch in range(num_epochs):  

        running_loss = 0.0
        loss_record=[]
        for i, data in enumerate(dataloaders['train'], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                loss_record.append(running_loss / 2000)
                # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                
        print('Round', (epoch + 1), ': Average Loss', loss_record)

    print('Finished Training')
    
    
    # save training results
    PATH = './cifar_net.pth'
    torch.save(model.state_dict(), PATH)
    
    
    # testing overall correct rate
    correct = 0
    total = 0
    
    with torch.no_grad():
        for i, data in enumerate(dataloaders['train'], 0):
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
    
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in class_names}
    total_pred = {classname: 0 for classname in class_names}

    # again no gradients needed
    with torch.no_grad():
        for i, data in enumerate(dataloaders['test'], 0):
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[class_names[label]] += 1
                total_pred[class_names[label]] += 1


    # print accuracy for each class
    print("Accuracy: ")
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("{:1s}: {:.1f}%;  ".format(classname, accuracy), end=' ')
    print()    
    
    return None

model_ft = Net() # Model initialization

model_ft = model_ft.to(device) # Move model to cpu

criterion = nn.CrossEntropyLoss() # Loss function initialization

# TODO: Adjust the following hyper-parameters: learning rate, decay strategy, number of training epochs.
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4) # Optimizer initialization

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # Learning rate decay strategy

train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=30)

CONV1 torch.Size([4, 16, 30, 30])
CONV2 torch.Size([4, 32, 30, 30])
POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13])
CONV4 torch.Size([4, 16, 13, 13])
POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
Round 1 : Average Loss [2.0902830236256125, 1.4333030820563435, 1.2140446663387119]
Round 2 : Average Loss [0.9765195410065353, 0.8964032060522586, 0.8462817536561633]
Round 3 : Average Loss [0.7585549725522287, 0.7380199346821755, 0.7113457492097514]
Round 4 : Average Loss [0.6692875858271727, 0.6238972264256445, 0.6183585954619921]
Round 5 : Average Loss [0.5900628576474264, 0.5640837135501788, 0.5612593893792364]
Round 6 : Average Loss [0.5207931146665942, 0.5245295187898082, 0.5186015082289814]
Round 7 : Average Loss [0.4854678031520307, 0.49408743277014583, 0.47976339819948044]
Round 8 : Average Loss [0.44698137000064164, 0.4465921572317602, 0.4500701465805723]
Round 9 : Average Loss [0.42296299894234923, 0.423

In [26]:
'''
RECORD 1. 
self.params = {'conv':[(), (3, 6, 3, 1, 0), (6, 8, 3, 1, 1), (8, 12, 4, 1, 0), (12, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
               'pool':[(), (2, 2, 0), (3, 2, 0)], # kernel_size, stride, padding
               'fc':[(), (16*5*5, 120), (120, 84), (84, 10)] # in_channels, out_channels
               }

CONV1 torch.Size([4, 6, 30, 30])
CONV2 torch.Size([4, 8, 30, 30])
POOL1 torch.Size([4, 8, 15, 15])
CONV3 torch.Size([4, 12, 12, 12])
CONV4 torch.Size([4, 16, 12, 12])
POOL2 torch.Size([4, 16, 5, 5])

[1,  2000] loss: 2.301
[1,  4000] loss: 1.912
[1,  6000] loss: 1.412
[2,  2000] loss: 1.164
[2,  4000] loss: 1.124
[2,  6000] loss: 1.064
[3,  2000] loss: 0.999
[3,  4000] loss: 0.926
[3,  6000] loss: 0.889
[4,  2000] loss: 0.817
[4,  4000] loss: 0.803
[4,  6000] loss: 0.762
[5,  2000] loss: 0.735
[5,  4000] loss: 0.698
[5,  6000] loss: 0.693
Finished Training
Accuracy of the network on the 10000 test images: 79 %
Accuracy for class 0  is: 84.6 %
Accuracy for class 1  is: 84.4 %
Accuracy for class 2  is: 77.6 %
Accuracy for class 3  is: 63.4 %
Accuracy for class 4  is: 81.6 %
Accuracy for class 5  is: 76.6 %
Accuracy for class 6  is: 79.0 %
Accuracy for class 7  is: 85.6 %
Accuracy for class 8  is: 63.8 %
Accuracy for class 9  is: 82.4 %
'''

"\nRECORD 1. \nself.params = {'conv':[(), (3, 6, 3, 1, 0), (6, 8, 3, 1, 1), (8, 12, 4, 1, 0), (12, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding\n               'pool':[(), (2, 2, 0), (3, 2, 0)], # kernel_size, stride, padding\n               'fc':[(), (16*5*5, 120), (120, 84), (84, 10)] # in_channels, out_channels\n               }\n\nCONV1 torch.Size([4, 6, 30, 30])\nCONV2 torch.Size([4, 8, 30, 30])\nPOOL1 torch.Size([4, 8, 15, 15])\nCONV3 torch.Size([4, 12, 12, 12])\nCONV4 torch.Size([4, 16, 12, 12])\nPOOL2 torch.Size([4, 16, 5, 5])\n\n[1,  2000] loss: 2.301\n[1,  4000] loss: 1.912\n[1,  6000] loss: 1.412\n[2,  2000] loss: 1.164\n[2,  4000] loss: 1.124\n[2,  6000] loss: 1.064\n[3,  2000] loss: 0.999\n[3,  4000] loss: 0.926\n[3,  6000] loss: 0.889\n[4,  2000] loss: 0.817\n[4,  4000] loss: 0.803\n[4,  6000] loss: 0.762\n[5,  2000] loss: 0.735\n[5,  4000] loss: 0.698\n[5,  6000] loss: 0.693\nFinished Training\nAccuracy of the network on the 10000 test images:

In [None]:
'''
RECORD 2: 
self.params = {'conv':[(), 
                               (3, 6, 3, 1, 0), 
                               (6, 8, 3, 1, 1),
                               (8, 12, 4, 1, 0),
                               (12, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (3, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*5*5, 120),
                             (120, 84), 
                             (84, 10)] # in_channels, out_channels
                      }
CONV1 torch.Size([4, 6, 30, 30])
CONV2 torch.Size([4, 8, 30, 30])
POOL1 torch.Size([4, 8, 15, 15])
CONV3 torch.Size([4, 12, 12, 12])
CONV4 torch.Size([4, 16, 12, 12])
POOL2 torch.Size([4, 16, 5, 5])
[1,  2000] loss: 2.188
[1,  4000] loss: 1.593
[1,  6000] loss: 1.461
[2,  2000] loss: 1.226
[2,  4000] loss: 1.093
[2,  6000] loss: 1.070
[3,  2000] loss: 0.932
[3,  4000] loss: 0.908
[3,  6000] loss: 0.872
[4,  2000] loss: 0.788
[4,  4000] loss: 0.776
[4,  6000] loss: 0.757
[5,  2000] loss: 0.730
[5,  4000] loss: 0.696
[5,  6000] loss: 0.702
[6,  2000] loss: 0.632
[6,  4000] loss: 0.656
[6,  6000] loss: 0.638
[7,  2000] loss: 0.597
[7,  4000] loss: 0.594
[7,  6000] loss: 0.589
[8,  2000] loss: 0.567
[8,  4000] loss: 0.559
[8,  6000] loss: 0.537
[9,  2000] loss: 0.532
[9,  4000] loss: 0.531
[9,  6000] loss: 0.520
[10,  2000] loss: 0.489
[10,  4000] loss: 0.495
[10,  6000] loss: 0.504
[11,  2000] loss: 0.472
[11,  4000] loss: 0.458
[11,  6000] loss: 0.485
[12,  2000] loss: 0.467
[12,  4000] loss: 0.460
[12,  6000] loss: 0.442
[13,  2000] loss: 0.432
[13,  4000] loss: 0.452
[13,  6000] loss: 0.434
[14,  2000] loss: 0.408
[14,  4000] loss: 0.415
[14,  6000] loss: 0.429
[15,  2000] loss: 0.404
[15,  4000] loss: 0.417
[15,  6000] loss: 0.406
[16,  2000] loss: 0.379
[16,  4000] loss: 0.389
[16,  6000] loss: 0.399
[17,  2000] loss: 0.373
[17,  4000] loss: 0.366
[17,  6000] loss: 0.395
[18,  2000] loss: 0.358
[18,  4000] loss: 0.372
[18,  6000] loss: 0.376
[19,  2000] loss: 0.351
[19,  4000] loss: 0.346
[19,  6000] loss: 0.376
[20,  2000] loss: 0.331
[20,  4000] loss: 0.325
[20,  6000] loss: 0.364
[21,  2000] loss: 0.321
[21,  4000] loss: 0.349
[21,  6000] loss: 0.332
[22,  2000] loss: 0.334
[22,  4000] loss: 0.303
[22,  6000] loss: 0.329
[23,  2000] loss: 0.304
[23,  4000] loss: 0.310
[23,  6000] loss: 0.320
[24,  2000] loss: 0.305
[24,  4000] loss: 0.300
[24,  6000] loss: 0.304
[25,  2000] loss: 0.297
[25,  4000] loss: 0.292
[25,  6000] loss: 0.307
[26,  2000] loss: 0.284
[26,  4000] loss: 0.293
[26,  6000] loss: 0.287
[27,  2000] loss: 0.260
[27,  4000] loss: 0.287
[27,  6000] loss: 0.279
[28,  2000] loss: 0.276
[28,  4000] loss: 0.258
[28,  6000] loss: 0.272
[29,  2000] loss: 0.260
[29,  4000] loss: 0.262
[29,  6000] loss: 0.250
[30,  2000] loss: 0.257
[30,  4000] loss: 0.260
[30,  6000] loss: 0.266
Finished Training
Accuracy of the network on the 10000 test images: 92 %
Accuracy: 
0: 89.0%;   1: 89.0%;   2: 85.2%;   3: 80.2%;   4: 91.4%;   5: 83.2%;   6: 84.6%;   7: 88.0%;   8: 84.0%;   9: 85.4%;
'''

In [None]:
'''
RECORD 3:
self.params = {'conv':[(), 
                               (3, 6, 3, 1, 0), 
                               (6, 8, 3, 1, 1),
                               (8, 12, 4, 1, 0),
                               (12, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (3, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*5*5, 120),
                             (120, 90), 
                             (90, 10)] # in_channels, out_channels
                      }

CONV1 torch.Size([4, 6, 30, 30])
CONV2 torch.Size([4, 8, 30, 30])
POOL1 torch.Size([4, 8, 15, 15])
CONV3 torch.Size([4, 12, 12, 12])
CONV4 torch.Size([4, 16, 12, 12])
POOL2 torch.Size([4, 16, 5, 5])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
Round 1 : Average Loss [2.3035713073015214, 1.941719051003456, 1.56213891223073]
Round 2 : Average Loss [1.3903057081401349, 1.2865861642062664, 1.1882624207399786]
Round 3 : Average Loss [1.0652292382083832, 0.9908361865589396, 0.9391875851140358]
Round 4 : Average Loss [0.8548389935982413, 0.858120162406005, 0.8125418075383641]
Round 5 : Average Loss [0.7744570170817897, 0.7346646904296941, 0.7285068871197291]
Round 6 : Average Loss [0.6911415895468672, 0.683144955642405, 0.6658892105263657]
Round 7 : Average Loss [0.6423454087071295, 0.6346845802067473, 0.6316136937843694]
Round 8 : Average Loss [0.5956207122708874, 0.5900790372961692, 0.6225956782835529]
Round 9 : Average Loss [0.583510483637685, 0.571111173184734, 0.5430697990426706]
Round 10 : Average Loss [0.5331892936633085, 0.5573574690534733, 0.5294858090197958]
Round 11 : Average Loss [0.5093583702501637, 0.5255567904025665, 0.5150109651550593]
Round 12 : Average Loss [0.4898692976605671, 0.4954890231616737, 0.5066937725588941]
Round 13 : Average Loss [0.4749514111582248, 0.4750031013791304, 0.480081861673214]
Round 14 : Average Loss [0.4490284444755307, 0.49280197398806924, 0.45889962228435616]
Round 15 : Average Loss [0.431466754503097, 0.4654086917454406, 0.4183361925207646]
Round 16 : Average Loss [0.4070937013413386, 0.44256445748795614, 0.43112632668870354]
Round 17 : Average Loss [0.41363448423857335, 0.4246785718409774, 0.4274357688373966]
Round 18 : Average Loss [0.40053398570239007, 0.4087307949188489, 0.40943692623498645]
Round 19 : Average Loss [0.378528647976349, 0.39415539441806324, 0.4042966086589404]
Round 20 : Average Loss [0.37274205144034933, 0.3812238389723789, 0.3808740442254134]
Round 21 : Average Loss [0.36234476141276356, 0.3695408939281333, 0.3731981188849495]
Round 22 : Average Loss [0.33739207447494846, 0.3651314401366089, 0.36816517102223406]
Round 23 : Average Loss [0.35591416240938634, 0.34965600703982275, 0.34907923609313424]
Round 24 : Average Loss [0.33684175657530635, 0.3335704752922975, 0.34053686629116875]
Round 25 : Average Loss [0.3398178566102276, 0.3381331713457248, 0.3287389011128289]
Round 26 : Average Loss [0.3122837609841372, 0.31947983536304125, 0.3255162499377451]
Round 27 : Average Loss [0.3048614081873096, 0.32810262880601976, 0.3216131290484686]
Round 28 : Average Loss [0.32595594949402357, 0.29832948207880405, 0.3169481174704097]
Round 29 : Average Loss [0.2748509281708029, 0.30432500990091704, 0.31586595785092025]
Round 30 : Average Loss [0.2965935041750317, 0.3015805566419476, 0.2882957287800539]
Finished Training
Accuracy of the network on the 10000 test images: 91 %
Accuracy: 
0: 89.6%;   1: 86.2%;   2: 84.0%;   3: 82.0%;   4: 91.2%;   5: 79.4%;   6: 84.4%;   7: 89.6%;   8: 83.2%;   9: 88.0%;
'''

In [None]:
'''32min
RECORD 4
self.params = {'conv':[(), 
                               (3, 16, 5, 1, 1), 
                               (16, 32, 3, 1, 1),
                               (32, 32, 3, 1, 0),
                               (32, 16, 3, 1, 1)], # in_channels, out_channels, kernel_size, stride, padding
                       'pool':[(), 
                               (2, 2, 0),
                               (2, 2, 0)], # kernel_size, stride, padding
                       'fc':[(), 
                             (16*6*6, 120),
                             (120, 90), 
                             (90, 10)] # in_channels, out_channels
                      }

CONV1 torch.Size([4, 16, 30, 30])
CONV2 torch.Size([4, 32, 30, 30])
POOL1 torch.Size([4, 32, 15, 15])
CONV3 torch.Size([4, 32, 13, 13])
CONV4 torch.Size([4, 16, 13, 13])
POOL2 torch.Size([4, 16, 6, 6])
FC1 torch.Size([4, 120])
FC2 torch.Size([4, 90])
FC3 torch.Size([4, 10])
Round 1 : Average Loss [2.0902830236256125, 1.4333030820563435, 1.2140446663387119]
Round 2 : Average Loss [0.9765195410065353, 0.8964032060522586, 0.8462817536561633]
Round 3 : Average Loss [0.7585549725522287, 0.7380199346821755, 0.7113457492097514]
Round 4 : Average Loss [0.6692875858271727, 0.6238972264256445, 0.6183585954619921]
Round 5 : Average Loss [0.5900628576474264, 0.5640837135501788, 0.5612593893792364]
Round 6 : Average Loss [0.5207931146665942, 0.5245295187898082, 0.5186015082289814]
Round 7 : Average Loss [0.4854678031520307, 0.49408743277014583, 0.47976339819948044]
Round 8 : Average Loss [0.44698137000064164, 0.4465921572317602, 0.4500701465805723]
Round 9 : Average Loss [0.42296299894234923, 0.42327295894821143, 0.42104862778229835]
Round 10 : Average Loss [0.3944297524269759, 0.40249153210778604, 0.4009978407304006]
Round 11 : Average Loss [0.39751342270755774, 0.37654696094483736, 0.3559898646210695]
Round 12 : Average Loss [0.3503610491658437, 0.3480691869983109, 0.37059205556271807]
Round 13 : Average Loss [0.3346828219392792, 0.34588938937423064, 0.3367942728897972]
Round 14 : Average Loss [0.3160436117704885, 0.31082409932849986, 0.33992924181894957]
Round 15 : Average Loss [0.31177256046784535, 0.31448319979787537, 0.30790137863258965]
Round 16 : Average Loss [0.30004331115045213, 0.290345019038994, 0.3121658303022187]
Round 17 : Average Loss [0.29035273746625173, 0.28692193133823163, 0.2902792229271745]
Round 18 : Average Loss [0.27063419592217863, 0.2741615871034728, 0.2780295788845381]
Round 19 : Average Loss [0.26052654653166746, 0.273553431435128, 0.2617846004154726]
Round 20 : Average Loss [0.24709658144082028, 0.25059838774510934, 0.2516952038085965]
Round 21 : Average Loss [0.24365292568424934, 0.24378589261026126, 0.24659755952176998]
Round 22 : Average Loss [0.21218533174729684, 0.23864599350489774, 0.23116306565061792]
Round 23 : Average Loss [0.19022442725079877, 0.2400877691992176, 0.22422213045793904]
Round 24 : Average Loss [0.2008687358657496, 0.21801177830314217, 0.22202780519331292]
Round 25 : Average Loss [0.20461415364365973, 0.20306734254493153, 0.20149799013651068]
Round 26 : Average Loss [0.18161173680468568, 0.2019593750027281, 0.1973547676584371]
Round 27 : Average Loss [0.18910754263374777, 0.1689179060246418, 0.19448721481491632]
Round 28 : Average Loss [0.154682144312321, 0.17973288546768032, 0.18383646660964179]
Round 29 : Average Loss [0.1636580354213675, 0.1760761903548544, 0.17663645159545988]
Round 30 : Average Loss [0.1531912359708051, 0.15565126023796816, 0.16180228203233243]
Finished Training
Accuracy of the network on the 10000 test images: 95 %
Accuracy: 
0: 88.4%;   1: 91.2%;   2: 82.2%;   3: 81.8%;   4: 87.4%;   5: 79.6%;   6: 87.4%;   7: 87.8%;   8: 85.6%;   9: 89.8%; '''