## Import torch and model

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

In [3]:
import sys
sys.path.append("../common")

from model_generator import ModelGenerator
from net import Net

## Set hyper params

In [4]:
batch_size = 64

dropout_on = True
batchnorm_on = True 

scheduler_step_size = 20

## for recasting

lr_recasting = 0.001
num_epoch_recasting = 60

## for fine tune

lr_fine_tune = 0.001
num_epoch_fine_tune = 100

In [5]:
model_gen = ModelGenerator(dropout = dropout_on, batchnorm = batchnorm_on)

model_gen.CifarResnetConfig(num_layers = 56, cifar = 10)

# Recasting block
# 0: conv layer, 1-27: Residual block
recasting_block_indices = range(1, 28)
target_block_type = 'ConvBlock'

# Compression rate
# the number of filters decreased to [compression_rate]

compression_ratio = 1

## file path
pretrained_model = './cifar10_resnet56_pretrained.pth'
compressed_model = './cifar10_resnet56_to_convenet.pth'

## Load dataset

In [6]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])

transform_train = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, 4),
     transforms.ToTensor(),
     transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


## Load pre-trained model (teacher network)

In [7]:
model = model_gen.GetCifarResnet()
teacher = Net(model)

state = torch.load(pretrained_model)
teacher.LoadFromStateDict(state)

teacher.Gpu()

correct = 0
total = 0
teacher.TestMode()
for data in testloader:
    images, labels = data
    outputs = teacher(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %4.2f %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 92.97 %


## Define student network

In [8]:
model = model_gen.GetCifarResnet()
student = Net(model)

state = torch.load(pretrained_model)
student.LoadFromStateDict(state)

student.Gpu()

In [9]:
correct = 0
total = 0
student.TestMode()
for data in testloader:
    images, labels = data
    outputs = student(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %4.2f %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 92.97 %


## Sequential recasting

In [10]:
# define MSE loss
MSE = nn.MSELoss()

teacher.TestMode()

for block_idx in recasting_block_indices:
    
    ################################################    Recasting process ######################################################
    # current block recasting
    
    config = student.GetBlockConfig(block_idx)
    
    config[2] = round(config[2] * compression_ratio)    # apply compression ratio
    
    # Handling corner case: bottleneck block recasting
    if len(config) == 5:                         
        is_bottleneck = True
        mid_feature = config[4]
        # We reduce the output dimension of bottleneck block.
        # output dimension of new block is the same with output dimension of 3x3 conv in bottleneck block
        config[4] = round(mid_feature * compression_ratio)
    else :
        is_bottleneck = False
        
    new_block = model_gen.GenNewBlock([target_block_type, config])
    source_block_type = config[0]
    
    student.Recasting(block_idx, new_block)
    
    
    # next block recasting
    
    config = student.GetBlockConfig(block_idx + 1)
    
    config[1] = round(config[1] * compression_ratio)    # apply compression ratio
    
    # Handling corner case: bottleneck block recasting
    if is_bottleneck == True:                         
        # Change next input dim to output dim of target block
        config[1] = round(mid_feature * compression_ratio)
    
    new_block = model_gen.GenNewBlock([config[0], config])
    student.Recasting(block_idx + 1, new_block)
    
    ################################################    Recasting process end ##################################################
    
    
    student.Gpu()
    
    params = student.GetCurrParams(block_idx)
    
    optimizer = optim.Adam(params, lr = lr_recasting)
    scheduler = lr_scheduler.StepLR(optimizer, step_size = scheduler_step_size)
    
    print('\nBlock %d recasting is done (%s -> %s).' %(block_idx, source_block_type, target_block_type))
    print('Training start\n')
    for epoch in range(num_epoch_recasting):  # loop over the dataset multiple times
        
        running_loss = 0.0
        scheduler.step()
        
        student.TrainMode()
            
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            corrects = teacher(inputs, next_block= block_idx + 1)
            outputs = student(inputs, next_block = block_idx + 1)

            targets = Variable(corrects.data.clone())
            
            loss = MSE(outputs, targets)
            loss.backward()
            optimizer.step()
            
            running_loss = (running_loss * i + loss.cpu().data.numpy()) / (i+1)

        
        correct = 0
        total = 0
        student.TestMode()
        for data in testloader:
            images, labels = data
            outputs = student(Variable(images.cuda()))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.cuda()).sum()
        
        test_acc = 100 * correct / total
        

        print('(%d/%d) epoch end, loss: %3.6f, Test Acc: %4.2f %%' %(epoch + 1, num_epoch_recasting, running_loss, test_acc))
    
    
print('\nSequential recasting is finished')


Block 1 recasting is done (ResidualBlock -> ConvBlock).
Training start

(1/60) epoch end, loss: 0.344140, Test Acc: 92.67 %
(2/60) epoch end, loss: 0.026142, Test Acc: 92.78 %
(3/60) epoch end, loss: 0.018260, Test Acc: 92.84 %
(4/60) epoch end, loss: 0.016363, Test Acc: 92.72 %
(5/60) epoch end, loss: 0.015341, Test Acc: 92.71 %
(6/60) epoch end, loss: 0.014612, Test Acc: 92.78 %
(7/60) epoch end, loss: 0.013637, Test Acc: 92.69 %
(8/60) epoch end, loss: 0.013232, Test Acc: 92.87 %
(9/60) epoch end, loss: 0.012734, Test Acc: 92.89 %
(10/60) epoch end, loss: 0.012271, Test Acc: 92.89 %
(11/60) epoch end, loss: 0.011965, Test Acc: 92.94 %
(12/60) epoch end, loss: 0.011936, Test Acc: 92.90 %
(13/60) epoch end, loss: 0.011556, Test Acc: 92.97 %
(14/60) epoch end, loss: 0.011285, Test Acc: 92.93 %
(15/60) epoch end, loss: 0.011128, Test Acc: 92.99 %
(16/60) epoch end, loss: 0.010924, Test Acc: 92.99 %
(17/60) epoch end, loss: 0.010867, Test Acc: 92.89 %
(18/60) epoch end, loss: 0.010928, 

(32/60) epoch end, loss: 0.014658, Test Acc: 92.93 %
(33/60) epoch end, loss: 0.014734, Test Acc: 92.93 %
(34/60) epoch end, loss: 0.014576, Test Acc: 92.94 %
(35/60) epoch end, loss: 0.014520, Test Acc: 92.96 %
(36/60) epoch end, loss: 0.014565, Test Acc: 92.97 %
(37/60) epoch end, loss: 0.014743, Test Acc: 93.01 %
(38/60) epoch end, loss: 0.014629, Test Acc: 92.97 %
(39/60) epoch end, loss: 0.014556, Test Acc: 93.01 %
(40/60) epoch end, loss: 0.014569, Test Acc: 92.91 %
(41/60) epoch end, loss: 0.014434, Test Acc: 93.01 %
(42/60) epoch end, loss: 0.014376, Test Acc: 92.97 %
(43/60) epoch end, loss: 0.014410, Test Acc: 93.00 %
(44/60) epoch end, loss: 0.014522, Test Acc: 92.98 %
(45/60) epoch end, loss: 0.014485, Test Acc: 92.92 %
(46/60) epoch end, loss: 0.014458, Test Acc: 92.94 %
(47/60) epoch end, loss: 0.014581, Test Acc: 92.96 %
(48/60) epoch end, loss: 0.014501, Test Acc: 92.93 %
(49/60) epoch end, loss: 0.014305, Test Acc: 92.95 %
(50/60) epoch end, loss: 0.014354, Test Acc: 9

(3/60) epoch end, loss: 0.069038, Test Acc: 92.67 %
(4/60) epoch end, loss: 0.058303, Test Acc: 92.69 %
(5/60) epoch end, loss: 0.054008, Test Acc: 92.71 %
(6/60) epoch end, loss: 0.051321, Test Acc: 92.68 %
(7/60) epoch end, loss: 0.049582, Test Acc: 92.64 %
(8/60) epoch end, loss: 0.048292, Test Acc: 92.64 %
(9/60) epoch end, loss: 0.047317, Test Acc: 92.53 %
(10/60) epoch end, loss: 0.046338, Test Acc: 92.68 %
(11/60) epoch end, loss: 0.045348, Test Acc: 92.75 %
(12/60) epoch end, loss: 0.044375, Test Acc: 92.58 %
(13/60) epoch end, loss: 0.043918, Test Acc: 92.78 %
(14/60) epoch end, loss: 0.043003, Test Acc: 92.64 %
(15/60) epoch end, loss: 0.042923, Test Acc: 92.76 %
(16/60) epoch end, loss: 0.042350, Test Acc: 92.74 %
(17/60) epoch end, loss: 0.042189, Test Acc: 92.76 %
(18/60) epoch end, loss: 0.041723, Test Acc: 92.80 %
(19/60) epoch end, loss: 0.041595, Test Acc: 92.55 %
(20/60) epoch end, loss: 0.041087, Test Acc: 92.67 %
(21/60) epoch end, loss: 0.039601, Test Acc: 92.83 %


(36/60) epoch end, loss: 0.069530, Test Acc: 92.67 %
(37/60) epoch end, loss: 0.069489, Test Acc: 92.64 %
(38/60) epoch end, loss: 0.069359, Test Acc: 92.59 %
(39/60) epoch end, loss: 0.069271, Test Acc: 92.69 %
(40/60) epoch end, loss: 0.069330, Test Acc: 92.66 %
(41/60) epoch end, loss: 0.068916, Test Acc: 92.59 %
(42/60) epoch end, loss: 0.068976, Test Acc: 92.69 %
(43/60) epoch end, loss: 0.068910, Test Acc: 92.64 %
(44/60) epoch end, loss: 0.068771, Test Acc: 92.57 %
(45/60) epoch end, loss: 0.068976, Test Acc: 92.66 %
(46/60) epoch end, loss: 0.068956, Test Acc: 92.62 %
(47/60) epoch end, loss: 0.069110, Test Acc: 92.65 %
(48/60) epoch end, loss: 0.068962, Test Acc: 92.62 %
(49/60) epoch end, loss: 0.068970, Test Acc: 92.60 %
(50/60) epoch end, loss: 0.068846, Test Acc: 92.67 %
(51/60) epoch end, loss: 0.068890, Test Acc: 92.61 %
(52/60) epoch end, loss: 0.068964, Test Acc: 92.66 %
(53/60) epoch end, loss: 0.068825, Test Acc: 92.65 %
(54/60) epoch end, loss: 0.068848, Test Acc: 9

(7/60) epoch end, loss: 0.030734, Test Acc: 92.23 %
(8/60) epoch end, loss: 0.030067, Test Acc: 92.25 %
(9/60) epoch end, loss: 0.029386, Test Acc: 92.39 %
(10/60) epoch end, loss: 0.028858, Test Acc: 92.26 %
(11/60) epoch end, loss: 0.028290, Test Acc: 92.40 %
(12/60) epoch end, loss: 0.027948, Test Acc: 92.39 %
(13/60) epoch end, loss: 0.027667, Test Acc: 92.48 %
(14/60) epoch end, loss: 0.027402, Test Acc: 92.27 %
(15/60) epoch end, loss: 0.027228, Test Acc: 92.42 %
(16/60) epoch end, loss: 0.026986, Test Acc: 92.53 %
(17/60) epoch end, loss: 0.026722, Test Acc: 92.34 %
(18/60) epoch end, loss: 0.026426, Test Acc: 92.14 %
(19/60) epoch end, loss: 0.026286, Test Acc: 92.53 %
(20/60) epoch end, loss: 0.026212, Test Acc: 92.28 %
(21/60) epoch end, loss: 0.025257, Test Acc: 92.57 %
(22/60) epoch end, loss: 0.025204, Test Acc: 92.45 %
(23/60) epoch end, loss: 0.025153, Test Acc: 92.55 %
(24/60) epoch end, loss: 0.025133, Test Acc: 92.56 %
(25/60) epoch end, loss: 0.025147, Test Acc: 92.6

(40/60) epoch end, loss: 0.037677, Test Acc: 92.41 %
(41/60) epoch end, loss: 0.037563, Test Acc: 92.41 %
(42/60) epoch end, loss: 0.037528, Test Acc: 92.46 %
(43/60) epoch end, loss: 0.037514, Test Acc: 92.45 %
(44/60) epoch end, loss: 0.037463, Test Acc: 92.47 %
(45/60) epoch end, loss: 0.037513, Test Acc: 92.38 %
(46/60) epoch end, loss: 0.037478, Test Acc: 92.47 %
(47/60) epoch end, loss: 0.037451, Test Acc: 92.41 %
(48/60) epoch end, loss: 0.037533, Test Acc: 92.41 %
(49/60) epoch end, loss: 0.037462, Test Acc: 92.48 %
(50/60) epoch end, loss: 0.037449, Test Acc: 92.42 %
(51/60) epoch end, loss: 0.037509, Test Acc: 92.43 %
(52/60) epoch end, loss: 0.037515, Test Acc: 92.49 %
(53/60) epoch end, loss: 0.037533, Test Acc: 92.45 %
(54/60) epoch end, loss: 0.037567, Test Acc: 92.43 %
(55/60) epoch end, loss: 0.037511, Test Acc: 92.40 %
(56/60) epoch end, loss: 0.037519, Test Acc: 92.48 %
(57/60) epoch end, loss: 0.037442, Test Acc: 92.45 %
(58/60) epoch end, loss: 0.037549, Test Acc: 9

(11/60) epoch end, loss: 0.065855, Test Acc: 91.68 %
(12/60) epoch end, loss: 0.065574, Test Acc: 91.79 %
(13/60) epoch end, loss: 0.065274, Test Acc: 91.90 %
(14/60) epoch end, loss: 0.064683, Test Acc: 92.15 %
(15/60) epoch end, loss: 0.064461, Test Acc: 91.53 %
(16/60) epoch end, loss: 0.064139, Test Acc: 92.34 %
(17/60) epoch end, loss: 0.063764, Test Acc: 92.37 %
(18/60) epoch end, loss: 0.063505, Test Acc: 92.16 %
(19/60) epoch end, loss: 0.063191, Test Acc: 90.94 %
(20/60) epoch end, loss: 0.063036, Test Acc: 91.48 %
(21/60) epoch end, loss: 0.061121, Test Acc: 92.36 %
(22/60) epoch end, loss: 0.061028, Test Acc: 92.44 %
(23/60) epoch end, loss: 0.060953, Test Acc: 92.56 %
(24/60) epoch end, loss: 0.061025, Test Acc: 92.39 %
(25/60) epoch end, loss: 0.061045, Test Acc: 92.39 %
(26/60) epoch end, loss: 0.060853, Test Acc: 92.34 %
(27/60) epoch end, loss: 0.061011, Test Acc: 92.49 %
(28/60) epoch end, loss: 0.060852, Test Acc: 92.54 %
(29/60) epoch end, loss: 0.060896, Test Acc: 9

(44/60) epoch end, loss: 0.017809, Test Acc: 92.55 %
(45/60) epoch end, loss: 0.017822, Test Acc: 92.58 %
(46/60) epoch end, loss: 0.017771, Test Acc: 92.54 %
(47/60) epoch end, loss: 0.017792, Test Acc: 92.59 %
(48/60) epoch end, loss: 0.017780, Test Acc: 92.52 %
(49/60) epoch end, loss: 0.017775, Test Acc: 92.45 %
(50/60) epoch end, loss: 0.017751, Test Acc: 92.55 %
(51/60) epoch end, loss: 0.017769, Test Acc: 92.53 %
(52/60) epoch end, loss: 0.017827, Test Acc: 92.54 %
(53/60) epoch end, loss: 0.017756, Test Acc: 92.56 %
(54/60) epoch end, loss: 0.017745, Test Acc: 92.53 %
(55/60) epoch end, loss: 0.017791, Test Acc: 92.58 %
(56/60) epoch end, loss: 0.017768, Test Acc: 92.62 %
(57/60) epoch end, loss: 0.017774, Test Acc: 92.56 %
(58/60) epoch end, loss: 0.017758, Test Acc: 92.47 %
(59/60) epoch end, loss: 0.017809, Test Acc: 92.54 %
(60/60) epoch end, loss: 0.017792, Test Acc: 92.57 %

Block 19 recasting is done (ResidualBlock -> ConvBlock).
Training start

(1/60) epoch end, loss: 0

(15/60) epoch end, loss: 0.047457, Test Acc: 91.60 %
(16/60) epoch end, loss: 0.047142, Test Acc: 91.57 %
(17/60) epoch end, loss: 0.046946, Test Acc: 90.95 %
(18/60) epoch end, loss: 0.046739, Test Acc: 91.36 %
(19/60) epoch end, loss: 0.046567, Test Acc: 91.72 %
(20/60) epoch end, loss: 0.046363, Test Acc: 91.45 %
(21/60) epoch end, loss: 0.044721, Test Acc: 91.96 %
(22/60) epoch end, loss: 0.044546, Test Acc: 92.09 %
(23/60) epoch end, loss: 0.044446, Test Acc: 92.11 %
(24/60) epoch end, loss: 0.044417, Test Acc: 92.09 %
(25/60) epoch end, loss: 0.044505, Test Acc: 92.10 %
(26/60) epoch end, loss: 0.044453, Test Acc: 92.05 %
(27/60) epoch end, loss: 0.044328, Test Acc: 91.91 %
(28/60) epoch end, loss: 0.044366, Test Acc: 92.02 %
(29/60) epoch end, loss: 0.044288, Test Acc: 92.07 %
(30/60) epoch end, loss: 0.044362, Test Acc: 92.04 %
(31/60) epoch end, loss: 0.044328, Test Acc: 92.08 %
(32/60) epoch end, loss: 0.044385, Test Acc: 92.19 %
(33/60) epoch end, loss: 0.044289, Test Acc: 9

(48/60) epoch end, loss: 0.055997, Test Acc: 92.11 %
(49/60) epoch end, loss: 0.056039, Test Acc: 92.08 %
(50/60) epoch end, loss: 0.056095, Test Acc: 92.06 %
(51/60) epoch end, loss: 0.055971, Test Acc: 92.02 %
(52/60) epoch end, loss: 0.056013, Test Acc: 92.02 %
(53/60) epoch end, loss: 0.056084, Test Acc: 92.08 %
(54/60) epoch end, loss: 0.056061, Test Acc: 92.03 %
(55/60) epoch end, loss: 0.055919, Test Acc: 92.11 %
(56/60) epoch end, loss: 0.055981, Test Acc: 92.02 %
(57/60) epoch end, loss: 0.056106, Test Acc: 92.16 %
(58/60) epoch end, loss: 0.056051, Test Acc: 92.07 %
(59/60) epoch end, loss: 0.056081, Test Acc: 92.01 %
(60/60) epoch end, loss: 0.055995, Test Acc: 92.04 %

Block 24 recasting is done (ResidualBlock -> ConvBlock).
Training start

(1/60) epoch end, loss: 0.134737, Test Acc: 88.82 %
(2/60) epoch end, loss: 0.090692, Test Acc: 90.72 %
(3/60) epoch end, loss: 0.084855, Test Acc: 88.80 %
(4/60) epoch end, loss: 0.081935, Test Acc: 90.00 %
(5/60) epoch end, loss: 0.080

(19/60) epoch end, loss: 0.235692, Test Acc: 91.30 %
(20/60) epoch end, loss: 0.233471, Test Acc: 89.32 %
(21/60) epoch end, loss: 0.216670, Test Acc: 92.47 %
(22/60) epoch end, loss: 0.212494, Test Acc: 92.42 %
(23/60) epoch end, loss: 0.211749, Test Acc: 92.42 %
(24/60) epoch end, loss: 0.211226, Test Acc: 92.39 %
(25/60) epoch end, loss: 0.210134, Test Acc: 92.33 %
(26/60) epoch end, loss: 0.209672, Test Acc: 92.47 %
(27/60) epoch end, loss: 0.209278, Test Acc: 92.51 %
(28/60) epoch end, loss: 0.210028, Test Acc: 92.36 %
(29/60) epoch end, loss: 0.210021, Test Acc: 92.43 %
(30/60) epoch end, loss: 0.209843, Test Acc: 92.37 %
(31/60) epoch end, loss: 0.208901, Test Acc: 92.39 %
(32/60) epoch end, loss: 0.209646, Test Acc: 92.50 %
(33/60) epoch end, loss: 0.209315, Test Acc: 92.41 %
(34/60) epoch end, loss: 0.209009, Test Acc: 92.46 %
(35/60) epoch end, loss: 0.208670, Test Acc: 92.59 %
(36/60) epoch end, loss: 0.208222, Test Acc: 92.57 %
(37/60) epoch end, loss: 0.208464, Test Acc: 9

## Fine-tuning (KD + Cross-entropy)

In [11]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

# define loss functions
MSE = nn.MSELoss()
criterion = nn.CrossEntropyLoss()

# pruning ratio for every layer    
optimizer = optim.Adam(student.GetTotalParams(), lr = lr_fine_tune)
scheduler = lr_scheduler.StepLR(optimizer, step_size = scheduler_step_size)
teacher.TestMode()
student.Gpu()

print('Fine tuning start')

for epoch in range(num_epoch_fine_tune):  # loop over the dataset multiple times

    running_loss = 0.0
    scheduler.step()
    student.TrainMode()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs = Variable(inputs.cuda())
        labels = Variable(labels.cuda())

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        corrects = teacher(inputs)
        outputs = student(inputs)

        targets = Variable(corrects.data.clone())
        loss_KD = MSE(outputs, targets)
        loss_CE = criterion(outputs, labels)
        
        loss = loss_KD + loss_CE
        
        loss.backward()
        optimizer.step()

        running_loss = (running_loss * i + loss.cpu().data.numpy()) / (i+1)

    correct = 0
    total = 0
    student.TestMode()
    for data in testloader:
        images, labels = data
        outputs = student(Variable(images.cuda()))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum()

    print('(%d/%d) epoch end, loss: %3.6f, Test Acc: %4.2f %%' %(epoch + 1, num_epoch_fine_tune, running_loss, 100 * correct / total))
    
print('\nFine tuning is finished')

Fine tuning start
(1/100) epoch end, loss: 2.508985, Test Acc: 91.09 %
(2/100) epoch end, loss: 2.544088, Test Acc: 90.55 %
(3/100) epoch end, loss: 2.559813, Test Acc: 90.02 %
(4/100) epoch end, loss: 2.541172, Test Acc: 90.77 %
(5/100) epoch end, loss: 2.568682, Test Acc: 90.29 %
(6/100) epoch end, loss: 2.521923, Test Acc: 91.02 %
(7/100) epoch end, loss: 2.540660, Test Acc: 91.55 %
(8/100) epoch end, loss: 2.490369, Test Acc: 91.05 %
(9/100) epoch end, loss: 2.494807, Test Acc: 90.98 %
(10/100) epoch end, loss: 2.469059, Test Acc: 90.36 %
(11/100) epoch end, loss: 2.498348, Test Acc: 89.54 %
(12/100) epoch end, loss: 2.508371, Test Acc: 91.50 %
(13/100) epoch end, loss: 2.466558, Test Acc: 90.61 %
(14/100) epoch end, loss: 2.440403, Test Acc: 91.07 %
(15/100) epoch end, loss: 2.497175, Test Acc: 91.63 %
(16/100) epoch end, loss: 2.425900, Test Acc: 90.95 %
(17/100) epoch end, loss: 2.409329, Test Acc: 91.62 %
(18/100) epoch end, loss: 2.424791, Test Acc: 91.51 %
(19/100) epoch end,

In [12]:
student.PrintBlocksDetail()

[[Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
  BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
  ReLU(inplace),
  'ConvBlock'],
 [Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1