## Import torch and model

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

In [3]:
import sys
sys.path.append("../common")

from model_generator import ModelGenerator
from net import Net

## Set hyper params

In [4]:
batch_size = 64

dropout_on = True
batchnorm_on = True 

scheduler_step_size = 20

## for recasting

lr_recasting = 0.001
num_epoch_recasting = 60

## for fine tune

lr_fine_tune = 0.001
num_epoch_fine_tune = 100



In [5]:
model_gen = ModelGenerator(dropout = dropout_on, batchnorm = batchnorm_on)

model_gen.CifarResnetConfig(num_layers = 83, block_type = 'Bottleneck', cifar = 10)

# Recasting block
# 0: conv layer, 1-27: Residual block
recasting_block_indices = range(1, 28)
target_block_type = 'ConvBlock'

# Compression rate
# the number of filters decreased to [compression_rate]

compression_ratio = 1

## file path
pretrained_model = './cifar10_resnet83_pretrained.pth'
compressed_model = './cifar10_resnet83_to_convenet.pth'

## Load dataset

In [6]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])

transform_train = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, 4),
     transforms.ToTensor(),
     transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


## Load pre-trained model (teacher network)

In [7]:
model = model_gen.GetCifarResnet()
teacher = Net(model)

state = torch.load(pretrained_model)
teacher.LoadFromStateDict(state)

teacher.Gpu()

correct = 0
total = 0
teacher.TestMode()
for data in testloader:
    images, labels = data
    outputs = teacher(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %4.2f %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 93.18 %


## Define student network

In [8]:
model = model_gen.GetCifarResnet()
student = Net(model)

state = torch.load(pretrained_model)
student.LoadFromStateDict(state)

student.Gpu()

In [9]:
correct = 0
total = 0
student.TestMode()
for data in testloader:
    images, labels = data
    outputs = student(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %4.2f %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 93.18 %


## Sequential recasting

In [10]:
# define MSE loss
MSE = nn.MSELoss()

teacher.TestMode()

for block_idx in recasting_block_indices:
    
    ################################################    Recasting process ######################################################
    # current block recasting
    
    config = student.GetBlockConfig(block_idx)
    
    config[2] = round(config[2] * compression_ratio)    # apply compression ratio
    
    # Handling corner case: bottleneck block recasting
    if len(config) == 5:                         
        is_bottleneck = True
        mid_feature = config[4]
        # We reduce the output dimension of bottleneck block.
        # output dimension of new block is the same with output dimension of 3x3 conv in bottleneck block
        config[4] = round(mid_feature * compression_ratio)
    else :
        is_bottleneck = False
        
    new_block = model_gen.GenNewBlock([target_block_type, config])
    source_block_type = config[0]
    
    student.Recasting(block_idx, new_block)
    
    
    # next block recasting
    
    config = student.GetBlockConfig(block_idx + 1)
    
    config[1] = round(config[1] * compression_ratio)    # apply compression ratio
    
    # Handling corner case: bottleneck block recasting
    if is_bottleneck == True:                         
        # Change next input dim to output dim of target block
        config[1] = round(mid_feature * compression_ratio)
    
    new_block = model_gen.GenNewBlock([config[0], config])
    student.Recasting(block_idx + 1, new_block)
    
    ################################################    Recasting process end ##################################################
    
    student.Gpu()
    
    params = student.GetCurrParams(block_idx)
    
    optimizer = optim.Adam(params, lr = lr_recasting)
    scheduler = lr_scheduler.StepLR(optimizer, step_size = scheduler_step_size)
    
    print('\nBlock %d recasting is done (%s -> %s).' %(block_idx, source_block_type, target_block_type))
    print('Training start\n')
    for epoch in range(num_epoch_recasting):  # loop over the dataset multiple times
        
        running_loss = 0.0
        scheduler.step()
        
        student.TrainMode()
            
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            corrects = teacher(inputs, next_block= block_idx + 1)
            outputs = student(inputs, next_block = block_idx + 1)

            targets = Variable(corrects.data.clone())
            
            loss = MSE(outputs, targets)
            loss.backward()
            optimizer.step()
            
            running_loss = (running_loss * i + loss.cpu().data.numpy()) / (i+1)

        
        correct = 0
        total = 0
        student.TestMode()
        for data in testloader:
            images, labels = data
            outputs = student(Variable(images.cuda()))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.cuda()).sum()
        
        test_acc = 100 * correct / total
        

        print('(%d/%d) epoch end, loss: %3.6f, Test Acc: %4.2f %%' %(epoch + 1, num_epoch_recasting, running_loss, test_acc))
    
    
print('\nFine tuning is finished')


Block 1 recasting is done (ResidualBlock -> ConvBlock).
Training start

(1/60) epoch end, loss: 0.075992, Test Acc: 92.11 %
(2/60) epoch end, loss: 0.007733, Test Acc: 92.84 %
(3/60) epoch end, loss: 0.004199, Test Acc: 93.06 %
(4/60) epoch end, loss: 0.002575, Test Acc: 93.10 %
(5/60) epoch end, loss: 0.001912, Test Acc: 93.21 %
(6/60) epoch end, loss: 0.001709, Test Acc: 93.11 %
(7/60) epoch end, loss: 0.001458, Test Acc: 93.15 %
(8/60) epoch end, loss: 0.001361, Test Acc: 93.23 %
(9/60) epoch end, loss: 0.001291, Test Acc: 93.14 %
(10/60) epoch end, loss: 0.001226, Test Acc: 93.27 %
(11/60) epoch end, loss: 0.001175, Test Acc: 93.15 %
(12/60) epoch end, loss: 0.001207, Test Acc: 92.94 %
(13/60) epoch end, loss: 0.001362, Test Acc: 93.11 %
(14/60) epoch end, loss: 0.001055, Test Acc: 93.10 %
(15/60) epoch end, loss: 0.001045, Test Acc: 93.24 %
(16/60) epoch end, loss: 0.001000, Test Acc: 93.18 %
(17/60) epoch end, loss: 0.001015, Test Acc: 93.07 %
(18/60) epoch end, loss: 0.001000, 

(32/60) epoch end, loss: 0.003663, Test Acc: 93.09 %
(33/60) epoch end, loss: 0.003670, Test Acc: 93.06 %
(34/60) epoch end, loss: 0.003665, Test Acc: 93.10 %
(35/60) epoch end, loss: 0.003648, Test Acc: 93.10 %
(36/60) epoch end, loss: 0.003629, Test Acc: 93.11 %
(37/60) epoch end, loss: 0.003644, Test Acc: 93.18 %
(38/60) epoch end, loss: 0.003625, Test Acc: 93.05 %
(39/60) epoch end, loss: 0.003608, Test Acc: 93.09 %
(40/60) epoch end, loss: 0.003591, Test Acc: 93.14 %
(41/60) epoch end, loss: 0.003569, Test Acc: 93.04 %
(42/60) epoch end, loss: 0.003588, Test Acc: 93.08 %
(43/60) epoch end, loss: 0.003574, Test Acc: 93.10 %
(44/60) epoch end, loss: 0.003587, Test Acc: 93.07 %
(45/60) epoch end, loss: 0.003593, Test Acc: 93.06 %
(46/60) epoch end, loss: 0.003572, Test Acc: 93.07 %
(47/60) epoch end, loss: 0.003567, Test Acc: 93.00 %
(48/60) epoch end, loss: 0.003607, Test Acc: 93.11 %
(49/60) epoch end, loss: 0.003581, Test Acc: 93.10 %
(50/60) epoch end, loss: 0.003568, Test Acc: 9

(3/60) epoch end, loss: 0.031963, Test Acc: 91.99 %
(4/60) epoch end, loss: 0.024923, Test Acc: 92.51 %
(5/60) epoch end, loss: 0.021906, Test Acc: 92.38 %
(6/60) epoch end, loss: 0.020848, Test Acc: 92.70 %
(7/60) epoch end, loss: 0.017649, Test Acc: 92.74 %
(8/60) epoch end, loss: 0.014721, Test Acc: 92.80 %
(9/60) epoch end, loss: 0.014370, Test Acc: 92.67 %
(10/60) epoch end, loss: 0.014047, Test Acc: 92.48 %
(11/60) epoch end, loss: 0.013872, Test Acc: 92.75 %
(12/60) epoch end, loss: 0.013666, Test Acc: 92.85 %
(13/60) epoch end, loss: 0.012005, Test Acc: 92.85 %
(14/60) epoch end, loss: 0.009055, Test Acc: 92.80 %
(15/60) epoch end, loss: 0.008898, Test Acc: 92.62 %
(16/60) epoch end, loss: 0.008812, Test Acc: 92.86 %
(17/60) epoch end, loss: 0.008738, Test Acc: 92.67 %
(18/60) epoch end, loss: 0.008610, Test Acc: 92.74 %
(19/60) epoch end, loss: 0.008552, Test Acc: 92.81 %
(20/60) epoch end, loss: 0.008457, Test Acc: 92.98 %
(21/60) epoch end, loss: 0.008169, Test Acc: 92.82 %


(36/60) epoch end, loss: 0.015732, Test Acc: 92.78 %
(37/60) epoch end, loss: 0.015723, Test Acc: 92.92 %
(38/60) epoch end, loss: 0.015688, Test Acc: 92.81 %
(39/60) epoch end, loss: 0.015707, Test Acc: 92.77 %
(40/60) epoch end, loss: 0.015702, Test Acc: 92.77 %
(41/60) epoch end, loss: 0.015651, Test Acc: 92.82 %
(42/60) epoch end, loss: 0.015633, Test Acc: 92.77 %
(43/60) epoch end, loss: 0.015622, Test Acc: 92.82 %
(44/60) epoch end, loss: 0.015632, Test Acc: 92.82 %
(45/60) epoch end, loss: 0.015618, Test Acc: 92.84 %
(46/60) epoch end, loss: 0.015639, Test Acc: 92.82 %
(47/60) epoch end, loss: 0.015618, Test Acc: 92.81 %
(48/60) epoch end, loss: 0.015634, Test Acc: 92.79 %
(49/60) epoch end, loss: 0.015620, Test Acc: 92.79 %
(50/60) epoch end, loss: 0.015620, Test Acc: 92.78 %
(51/60) epoch end, loss: 0.015627, Test Acc: 92.90 %
(52/60) epoch end, loss: 0.015628, Test Acc: 92.88 %
(53/60) epoch end, loss: 0.015639, Test Acc: 92.87 %
(54/60) epoch end, loss: 0.015628, Test Acc: 9

(7/60) epoch end, loss: 0.018918, Test Acc: 92.21 %
(8/60) epoch end, loss: 0.018510, Test Acc: 91.17 %
(9/60) epoch end, loss: 0.018119, Test Acc: 90.81 %
(10/60) epoch end, loss: 0.017765, Test Acc: 91.04 %
(11/60) epoch end, loss: 0.017240, Test Acc: 92.11 %
(12/60) epoch end, loss: 0.017135, Test Acc: 91.59 %
(13/60) epoch end, loss: 0.017048, Test Acc: 91.87 %
(14/60) epoch end, loss: 0.016929, Test Acc: 91.86 %
(15/60) epoch end, loss: 0.016861, Test Acc: 91.73 %
(16/60) epoch end, loss: 0.016765, Test Acc: 92.03 %
(17/60) epoch end, loss: 0.016543, Test Acc: 92.34 %
(18/60) epoch end, loss: 0.016262, Test Acc: 91.66 %
(19/60) epoch end, loss: 0.015983, Test Acc: 91.72 %
(20/60) epoch end, loss: 0.015915, Test Acc: 92.14 %
(21/60) epoch end, loss: 0.015523, Test Acc: 92.65 %
(22/60) epoch end, loss: 0.015524, Test Acc: 92.63 %
(23/60) epoch end, loss: 0.015504, Test Acc: 92.67 %
(24/60) epoch end, loss: 0.015504, Test Acc: 92.71 %
(25/60) epoch end, loss: 0.015489, Test Acc: 92.7

(40/60) epoch end, loss: 0.019764, Test Acc: 92.50 %
(41/60) epoch end, loss: 0.019695, Test Acc: 92.65 %
(42/60) epoch end, loss: 0.019685, Test Acc: 92.58 %
(43/60) epoch end, loss: 0.019703, Test Acc: 92.57 %
(44/60) epoch end, loss: 0.019694, Test Acc: 92.53 %
(45/60) epoch end, loss: 0.019674, Test Acc: 92.54 %
(46/60) epoch end, loss: 0.019669, Test Acc: 92.55 %
(47/60) epoch end, loss: 0.019704, Test Acc: 92.60 %
(48/60) epoch end, loss: 0.019707, Test Acc: 92.62 %
(49/60) epoch end, loss: 0.019662, Test Acc: 92.60 %
(50/60) epoch end, loss: 0.019657, Test Acc: 92.55 %
(51/60) epoch end, loss: 0.019673, Test Acc: 92.60 %
(52/60) epoch end, loss: 0.019667, Test Acc: 92.56 %
(53/60) epoch end, loss: 0.019700, Test Acc: 92.57 %
(54/60) epoch end, loss: 0.019666, Test Acc: 92.56 %
(55/60) epoch end, loss: 0.019660, Test Acc: 92.59 %
(56/60) epoch end, loss: 0.019640, Test Acc: 92.52 %
(57/60) epoch end, loss: 0.019642, Test Acc: 92.58 %
(58/60) epoch end, loss: 0.019635, Test Acc: 9

(11/60) epoch end, loss: 0.032979, Test Acc: 89.92 %
(12/60) epoch end, loss: 0.032352, Test Acc: 90.35 %
(13/60) epoch end, loss: 0.032105, Test Acc: 91.53 %
(14/60) epoch end, loss: 0.031893, Test Acc: 91.07 %
(15/60) epoch end, loss: 0.031722, Test Acc: 91.96 %
(16/60) epoch end, loss: 0.031248, Test Acc: 91.24 %
(17/60) epoch end, loss: 0.030775, Test Acc: 90.99 %
(18/60) epoch end, loss: 0.030324, Test Acc: 91.97 %
(19/60) epoch end, loss: 0.030107, Test Acc: 92.03 %
(20/60) epoch end, loss: 0.029732, Test Acc: 91.58 %
(21/60) epoch end, loss: 0.029054, Test Acc: 92.31 %
(22/60) epoch end, loss: 0.029020, Test Acc: 92.27 %
(23/60) epoch end, loss: 0.029015, Test Acc: 92.33 %
(24/60) epoch end, loss: 0.029035, Test Acc: 92.23 %
(25/60) epoch end, loss: 0.028965, Test Acc: 92.29 %
(26/60) epoch end, loss: 0.028950, Test Acc: 92.20 %
(27/60) epoch end, loss: 0.028930, Test Acc: 92.18 %
(28/60) epoch end, loss: 0.028938, Test Acc: 92.33 %
(29/60) epoch end, loss: 0.028911, Test Acc: 9

(44/60) epoch end, loss: 0.011461, Test Acc: 92.59 %
(45/60) epoch end, loss: 0.011448, Test Acc: 92.63 %
(46/60) epoch end, loss: 0.011439, Test Acc: 92.54 %
(47/60) epoch end, loss: 0.011439, Test Acc: 92.51 %
(48/60) epoch end, loss: 0.011433, Test Acc: 92.49 %
(49/60) epoch end, loss: 0.011456, Test Acc: 92.56 %
(50/60) epoch end, loss: 0.011436, Test Acc: 92.60 %
(51/60) epoch end, loss: 0.011436, Test Acc: 92.63 %
(52/60) epoch end, loss: 0.011425, Test Acc: 92.52 %
(53/60) epoch end, loss: 0.011423, Test Acc: 92.60 %
(54/60) epoch end, loss: 0.011427, Test Acc: 92.56 %
(55/60) epoch end, loss: 0.011423, Test Acc: 92.48 %
(56/60) epoch end, loss: 0.011436, Test Acc: 92.58 %
(57/60) epoch end, loss: 0.011427, Test Acc: 92.61 %
(58/60) epoch end, loss: 0.011425, Test Acc: 92.65 %
(59/60) epoch end, loss: 0.011432, Test Acc: 92.57 %
(60/60) epoch end, loss: 0.011404, Test Acc: 92.61 %

Block 19 recasting is done (ResidualBlock -> ConvBlock).
Training start

(1/60) epoch end, loss: 0

(15/60) epoch end, loss: 0.024824, Test Acc: 91.35 %
(16/60) epoch end, loss: 0.024696, Test Acc: 91.46 %
(17/60) epoch end, loss: 0.024588, Test Acc: 91.67 %
(18/60) epoch end, loss: 0.024470, Test Acc: 90.74 %
(19/60) epoch end, loss: 0.024363, Test Acc: 91.76 %
(20/60) epoch end, loss: 0.024227, Test Acc: 91.19 %
(21/60) epoch end, loss: 0.023500, Test Acc: 92.26 %
(22/60) epoch end, loss: 0.023470, Test Acc: 92.16 %
(23/60) epoch end, loss: 0.023438, Test Acc: 92.34 %
(24/60) epoch end, loss: 0.023434, Test Acc: 92.39 %
(25/60) epoch end, loss: 0.023411, Test Acc: 92.22 %
(26/60) epoch end, loss: 0.023413, Test Acc: 92.21 %
(27/60) epoch end, loss: 0.023380, Test Acc: 92.27 %
(28/60) epoch end, loss: 0.023369, Test Acc: 92.25 %
(29/60) epoch end, loss: 0.023361, Test Acc: 92.20 %
(30/60) epoch end, loss: 0.023357, Test Acc: 92.37 %
(31/60) epoch end, loss: 0.023356, Test Acc: 92.25 %
(32/60) epoch end, loss: 0.023355, Test Acc: 92.46 %
(33/60) epoch end, loss: 0.023347, Test Acc: 9

(48/60) epoch end, loss: 0.027100, Test Acc: 91.90 %
(49/60) epoch end, loss: 0.027085, Test Acc: 91.96 %
(50/60) epoch end, loss: 0.027100, Test Acc: 91.83 %
(51/60) epoch end, loss: 0.027102, Test Acc: 91.95 %
(52/60) epoch end, loss: 0.027088, Test Acc: 91.91 %
(53/60) epoch end, loss: 0.027074, Test Acc: 91.98 %
(54/60) epoch end, loss: 0.027082, Test Acc: 91.95 %
(55/60) epoch end, loss: 0.027098, Test Acc: 91.86 %
(56/60) epoch end, loss: 0.027065, Test Acc: 91.92 %
(57/60) epoch end, loss: 0.027071, Test Acc: 91.92 %
(58/60) epoch end, loss: 0.027065, Test Acc: 91.87 %
(59/60) epoch end, loss: 0.027079, Test Acc: 91.94 %
(60/60) epoch end, loss: 0.027073, Test Acc: 91.93 %

Block 24 recasting is done (ResidualBlock -> ConvBlock).
Training start

(1/60) epoch end, loss: 0.069216, Test Acc: 87.48 %
(2/60) epoch end, loss: 0.038987, Test Acc: 83.92 %
(3/60) epoch end, loss: 0.035400, Test Acc: 87.40 %
(4/60) epoch end, loss: 0.033948, Test Acc: 88.49 %
(5/60) epoch end, loss: 0.032

(19/60) epoch end, loss: 0.115996, Test Acc: 90.03 %
(20/60) epoch end, loss: 0.115427, Test Acc: 88.32 %
(21/60) epoch end, loss: 0.108629, Test Acc: 91.85 %
(22/60) epoch end, loss: 0.107703, Test Acc: 91.95 %
(23/60) epoch end, loss: 0.107286, Test Acc: 91.82 %
(24/60) epoch end, loss: 0.107008, Test Acc: 92.04 %
(25/60) epoch end, loss: 0.106890, Test Acc: 91.78 %
(26/60) epoch end, loss: 0.106650, Test Acc: 91.80 %
(27/60) epoch end, loss: 0.106627, Test Acc: 91.98 %
(28/60) epoch end, loss: 0.106370, Test Acc: 92.04 %
(29/60) epoch end, loss: 0.106366, Test Acc: 92.03 %
(30/60) epoch end, loss: 0.106102, Test Acc: 91.61 %
(31/60) epoch end, loss: 0.106225, Test Acc: 91.86 %
(32/60) epoch end, loss: 0.105893, Test Acc: 91.88 %
(33/60) epoch end, loss: 0.105760, Test Acc: 91.72 %
(34/60) epoch end, loss: 0.105857, Test Acc: 91.66 %
(35/60) epoch end, loss: 0.105771, Test Acc: 92.09 %
(36/60) epoch end, loss: 0.105492, Test Acc: 92.05 %
(37/60) epoch end, loss: 0.105607, Test Acc: 9

## Fine-tuning (KD + Cross-entropy)

In [11]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

# define loss functions
MSE = nn.MSELoss()
criterion = nn.CrossEntropyLoss()

# pruning ratio for every layer    
optimizer = optim.Adam(student.GetTotalParams(), lr = lr_fine_tune)
scheduler = lr_scheduler.StepLR(optimizer, step_size = scheduler_step_size)
teacher.TestMode()
student.Gpu()

print('Fine tuning start\n')

for epoch in range(num_epoch_fine_tune):  # loop over the dataset multiple times

    running_loss = 0.0
    scheduler.step()
    student.TrainMode()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs = Variable(inputs.cuda())
        labels = Variable(labels.cuda())

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        corrects = teacher(inputs)
        outputs = student(inputs)

        targets = Variable(corrects.data.clone())
        loss_KD = MSE(outputs, targets)
        loss_CE = criterion(outputs, labels)
        
        loss = loss_KD + loss_CE
        
        loss.backward()
        optimizer.step()

        running_loss = (running_loss * i + loss.cpu().data.numpy()) / (i+1)

    correct = 0
    total = 0
    student.TestMode()
    for data in testloader:
        images, labels = data
        outputs = student(Variable(images.cuda()))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum()

    print('(%d/%d) epoch end, loss: %3.6f, Test Acc: %4.2f %%' %(epoch + 1, num_epoch_fine_tune, running_loss, 100 * correct / total))
    
print('\nSequential recasting is finished')

Fine tuning start

(1/100) epoch end, loss: 3.812535, Test Acc: 89.91 %
(2/100) epoch end, loss: 3.813318, Test Acc: 87.07 %
(3/100) epoch end, loss: 3.837231, Test Acc: 90.80 %
(4/100) epoch end, loss: 3.785224, Test Acc: 90.62 %
(5/100) epoch end, loss: 3.804142, Test Acc: 90.09 %
(6/100) epoch end, loss: 3.753816, Test Acc: 87.70 %
(7/100) epoch end, loss: 3.752407, Test Acc: 88.14 %
(8/100) epoch end, loss: 3.715977, Test Acc: 90.63 %
(9/100) epoch end, loss: 3.712279, Test Acc: 90.35 %
(10/100) epoch end, loss: 3.722168, Test Acc: 89.21 %
(11/100) epoch end, loss: 3.712134, Test Acc: 90.94 %
(12/100) epoch end, loss: 3.660364, Test Acc: 89.74 %
(13/100) epoch end, loss: 3.655332, Test Acc: 89.01 %
(14/100) epoch end, loss: 3.638200, Test Acc: 89.92 %
(15/100) epoch end, loss: 3.624550, Test Acc: 90.38 %
(16/100) epoch end, loss: 3.616776, Test Acc: 90.90 %
(17/100) epoch end, loss: 3.572198, Test Acc: 89.10 %
(18/100) epoch end, loss: 3.542428, Test Acc: 89.85 %
(19/100) epoch end

In [12]:
student.PrintBlocksDetail()

[[Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1