In [None]:
import argparse
import os
import time
import shutil

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

    
import torchvision
import torchvision.transforms as transforms

from models import *   # bring everything in the folder models

global best_prec
use_gpu = torch.cuda.is_available()
print('=> Building model...')
    
    
batch_size = 128

model_name = "vggnet_gamma_x"
model = VGG16()

#print(model)
        
normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])

train_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


test_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


print_freq = 100 # every 100 batches, accuracy printed. Here, each batch includes "batch_size" data points
# CIFAR10 has 50,000 training data, and 10,000 validation data.

def train(trainloader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()   ## at the begining of each epoch, this should be reset
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    

    model.train()

    end = time.time()  # measure current time
    
    for i, (input, target) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)  # data loading time

        input, target = input.cuda(), target.cuda()

        # compute output
        output = model(input)
        loss1 = criterion(output, target)
        loss2 = model.features[0].weight.abs().sum()
        
        #loss = loss1 #only accuracy loss
        #loss = loss1 + loss2 #  1:1 accuracy:energy loss
        gamma = 1
        loss = loss1 + gamma*loss2 # 1:gamma accuracy:energy loss
        

        # measure accuracy and record loss
        prec = accuracy(output, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end) # time spent to process one batch
        end = time.time()
        
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Loss1 {loss1:4f}\t'
                  'Loss2 {loss2:.4f}\t'
                  'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
                   epoch, i, len(trainloader), batch_time=batch_time,
                   data_time=data_time, loss=losses, loss1=loss1,loss2=loss2, top1=top1))

            

def validate(val_loader, model, criterion ):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
         
            input, target = input.cuda(), target.cuda()

            # compute output
            output = model(input)
            #loss = criterion(output, target)
            loss1 = criterion(output, target)
            loss2 = model.features[0].weight.abs().sum()
            
            loss = loss1 #only accuracy loss
            #loss = loss1 + loss2 #  1:1 accuracy:energy loss
            #gamma = 0.2
            #loss = loss1 + gamma*loss2 # 1:gamma accuracy:energy loss
            
            # measure accuracy and record loss
            prec = accuracy(output, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:  # This line shows how frequently print out the status. e.g., i%5 => every 5 batch, prints out
                print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Loss1 {loss1:.4f}\t'
                  'Loss2 {loss2:.4f}\t'
                  'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,loss1=loss1, loss2=loss2,
                   top1=top1))

    print(' * Prec {top1.avg:.3f}% '.format(top1=top1))
    return top1.avg


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n    ## n is impact factor
        self.count += n
        self.avg = self.sum / self.count

        
def save_checkpoint(state, is_best, fdir, customised_cost_gamma=0):
    filepath = os.path.join(fdir, 'checkpoint.pth')
    torch.save(state, filepath)
    if is_best and (customised_cost_gamma == 0):
        shutil.copyfile(filepath, os.path.join(fdir, 'model_best.pth.tar'))
    #if is_best and (customised_cost_gamma == 1):
    #    shutil.copyfile(filepath, os.path.join(fdir, 'customised_cost_gamma_1.pth.tar'))
    #if is_best and (customised_cost_gamma == 2):
    #    shutil.copyfile(filepath, os.path.join(fdir, 'customised_cost_gamma_x.pth.tar'))


def adjust_learning_rate(optimizer, epoch):
    """For resnet, the lr starts from 0.1, and is divided by 10 at 80 and 120 epochs"""
    adjust_list = [25, 65]
    if epoch in adjust_list:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.1        

#model = nn.DataParallel(model).cuda()
#all_params = checkpoint['state_dict']
#model.load_state_dict(all_params, strict=False)
#criterion = nn.CrossEntropyLoss().cuda()
#validate(testloader, model, criterion)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(testloader)
images, labels = next(dataiter) ## If you run this line, the next data batch is called subsequently.

# show images
imshow(torchvision.utils.make_grid(images))


In [None]:
# This cell is from the website

lr = 4e-3
weight_decay = 1e-4
epochs = 80
best_prec = 0

model = model.cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
# weight decay: for regularization to prevent overfitting


if not os.path.exists('result'):
    os.makedirs('result')
    
fdir = 'result/'+str(model_name)

if not os.path.exists(fdir):
    os.makedirs(fdir)
        

for epoch in range(0, epochs):
    adjust_learning_rate(optimizer, epoch)

    train(trainloader, model, criterion, optimizer, epoch)
    
    # evaluate on test set
    print("Validation starts")
    prec = validate(testloader, model, criterion)

    # remember best precision and save checkpoint
    is_best = prec > best_prec
    best_prec = max(prec,best_prec)
    print('best acc: {:1f}'.format(best_prec))
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_prec': best_prec,
        'optimizer': optimizer.state_dict(),
    }, is_best, fdir)
    

Epoch: [0][0/391]	Time 1.970 (1.970)	Data 0.332 (0.332)	Loss 35.8755 (35.8755)	Loss1 2.539712	Loss2 166.6788	Prec 12.500% (12.500%)
Epoch: [0][100/391]	Time 0.024 (0.044)	Data 0.007 (0.008)	Loss 3.8670 (8.7214)	Loss1 1.791746	Loss2 10.3765	Prec 32.812% (30.128%)
Epoch: [0][200/391]	Time 0.023 (0.034)	Data 0.005 (0.006)	Loss 3.4716 (6.2210)	Loss1 1.748529	Loss2 8.6152	Prec 34.375% (33.714%)
Epoch: [0][300/391]	Time 0.026 (0.031)	Data 0.008 (0.006)	Loss 3.3102 (5.3077)	Loss1 1.692913	Loss2 8.0865	Prec 43.750% (36.145%)
Validation starts
Test: [0/79]	Time 0.355 (0.355)	Loss 4.4639 (4.4639)	Loss1 1.7515	Loss2 13.5620	Prec 38.281% (38.281%)
 * Prec 40.800% 
best acc: 40.800000
Epoch: [1][0/391]	Time 0.467 (0.467)	Data 0.442 (0.442)	Loss 4.2277 (4.2277)	Loss1 1.515310	Loss2 13.5620	Prec 35.938% (35.938%)
Epoch: [1][100/391]	Time 0.023 (0.028)	Data 0.002 (0.007)	Loss 3.3544 (3.5980)	Loss1 1.569272	Loss2 8.9257	Prec 36.719% (46.078%)
Epoch: [1][200/391]	Time 0.028 (0.026)	Data 0.010 (0.006)	Lo

Epoch: [12][100/391]	Time 0.026 (0.029)	Data 0.009 (0.009)	Loss 2.7734 (2.5930)	Loss1 0.561801	Loss2 11.0582	Prec 81.250% (80.159%)
Epoch: [12][200/391]	Time 0.024 (0.027)	Data 0.002 (0.006)	Loss 2.1488 (2.4269)	Loss1 0.634251	Loss2 7.5730	Prec 79.688% (80.675%)
Epoch: [12][300/391]	Time 0.025 (0.026)	Data 0.002 (0.005)	Loss 3.0816 (2.5189)	Loss1 0.721417	Loss2 11.8009	Prec 71.094% (80.552%)
Validation starts
Test: [0/79]	Time 0.199 (0.199)	Loss 2.5586 (2.5586)	Loss1 0.9082	Loss2 8.2520	Prec 71.094% (71.094%)
 * Prec 69.840% 
best acc: 71.430000
Epoch: [13][0/391]	Time 0.537 (0.537)	Data 0.514 (0.514)	Loss 2.1887 (2.1887)	Loss1 0.538275	Loss2 8.2520	Prec 81.250% (81.250%)
Epoch: [13][100/391]	Time 0.025 (0.029)	Data 0.002 (0.008)	Loss 1.9351 (2.1359)	Loss1 0.483884	Loss2 7.2560	Prec 81.250% (81.799%)
Epoch: [13][200/391]	Time 0.025 (0.027)	Data 0.002 (0.005)	Loss 2.1469 (2.3577)	Loss1 0.480329	Loss2 8.3329	Prec 85.156% (81.456%)
Epoch: [13][300/391]	Time 0.023 (0.026)	Data 0.002 (0.004

Epoch: [24][200/391]	Time 0.025 (0.027)	Data 0.002 (0.005)	Loss 1.7586 (1.8283)	Loss1 0.318010	Loss2 7.2028	Prec 89.844% (88.293%)
Epoch: [24][300/391]	Time 0.024 (0.026)	Data 0.002 (0.004)	Loss 1.8596 (2.0129)	Loss1 0.397657	Loss2 7.3099	Prec 89.844% (87.985%)
Validation starts
Test: [0/79]	Time 0.406 (0.406)	Loss 9.5642 (9.5642)	Loss1 0.6360	Loss2 44.6412	Prec 79.688% (79.688%)
 * Prec 79.280% 
best acc: 79.280000
Epoch: [25][0/391]	Time 0.628 (0.628)	Data 0.603 (0.603)	Loss 9.3369 (9.3369)	Loss1 0.408697	Loss2 44.6412	Prec 84.375% (84.375%)
Epoch: [25][100/391]	Time 0.031 (0.031)	Data 0.009 (0.011)	Loss 7.6841 (8.0569)	Loss1 0.219823	Loss2 37.3214	Prec 90.625% (90.787%)
Epoch: [25][200/391]	Time 0.024 (0.028)	Data 0.003 (0.007)	Loss 7.2841 (7.7811)	Loss1 0.214545	Loss2 35.3476	Prec 92.188% (91.192%)
Epoch: [25][300/391]	Time 0.024 (0.027)	Data 0.002 (0.006)	Loss 6.8758 (7.5412)	Loss1 0.284605	Loss2 32.9557	Prec 89.062% (91.383%)
Validation starts
Test: [0/79]	Time 0.325 (0.325)	Loss

Epoch: [36][300/391]	Time 0.025 (0.026)	Data 0.004 (0.004)	Loss 0.6134 (0.5956)	Loss1 0.149792	Loss2 2.3183	Prec 96.094% (94.635%)
Validation starts
Test: [0/79]	Time 0.459 (0.459)	Loss 0.9601 (0.9601)	Loss1 0.5394	Loss2 2.1033	Prec 83.594% (83.594%)
 * Prec 82.180% 
best acc: 84.410000
Epoch: [37][0/391]	Time 0.439 (0.439)	Data 0.416 (0.416)	Loss 0.5444 (0.5444)	Loss1 0.123734	Loss2 2.1033	Prec 95.312% (95.312%)
Epoch: [37][100/391]	Time 0.025 (0.029)	Data 0.002 (0.007)	Loss 0.5207 (0.5874)	Loss1 0.116944	Loss2 2.0187	Prec 96.875% (94.802%)
Epoch: [37][200/391]	Time 0.027 (0.027)	Data 0.000 (0.005)	Loss 0.6424 (0.5778)	Loss1 0.172227	Loss2 2.3510	Prec 92.969% (94.978%)
Epoch: [37][300/391]	Time 0.025 (0.026)	Data 0.006 (0.004)	Loss 0.7405 (0.5944)	Loss1 0.197247	Loss2 2.7164	Prec 93.750% (94.900%)
Validation starts
Test: [0/79]	Time 0.366 (0.366)	Loss 0.9495 (0.9495)	Loss1 0.5404	Loss2 2.0459	Prec 82.812% (82.812%)
 * Prec 84.040% 
best acc: 84.410000
Epoch: [38][0/391]	Time 0.589 (0.

Validation starts
Test: [0/79]	Time 0.348 (0.348)	Loss 1.0681 (1.0681)	Loss1 0.6253	Loss2 2.2140	Prec 84.375% (84.375%)
 * Prec 83.940% 
best acc: 84.410000
Epoch: [49][0/391]	Time 0.583 (0.583)	Data 0.564 (0.564)	Loss 0.5858 (0.5858)	Loss1 0.142948	Loss2 2.2140	Prec 95.312% (95.312%)
Epoch: [49][100/391]	Time 0.024 (0.030)	Data 0.002 (0.008)	Loss 0.4474 (0.5435)	Loss1 0.054607	Loss2 1.9638	Prec 98.438% (96.279%)
Epoch: [49][200/391]	Time 0.025 (0.027)	Data 0.000 (0.005)	Loss 0.4864 (0.5252)	Loss1 0.086178	Loss2 2.0013	Prec 96.094% (96.362%)
Epoch: [49][300/391]	Time 0.025 (0.027)	Data 0.002 (0.004)	Loss 0.5944 (0.5327)	Loss1 0.096726	Loss2 2.4884	Prec 97.656% (96.278%)
Validation starts
Test: [0/79]	Time 0.346 (0.346)	Loss 0.9540 (0.9540)	Loss1 0.4702	Loss2 2.4190	Prec 86.719% (86.719%)
 * Prec 83.320% 
best acc: 84.410000
Epoch: [50][0/391]	Time 0.601 (0.601)	Data 0.576 (0.576)	Loss 0.6000 (0.6000)	Loss1 0.116173	Loss2 2.4190	Prec 96.875% (96.875%)
Epoch: [50][100/391]	Time 0.024 (0.

In [10]:
##Accuracy wiht Customised cost function
fdir = 'result/vggnet/model_best.pth.tar'

checkpoint = torch.load(fdir)
model.load_state_dict(checkpoint['state_dict'])


criterion = nn.CrossEntropyLoss().cuda()

model.eval()
model.cuda()


prec = validate(testloader, model, criterion)
energy_loss_initial = model.features[0].weight.abs().sum()
print("Absolute sum of conv1 weights: ", energy_loss_initial)

Test: [0/79]	Time 0.231 (0.231)	Loss 0.2937 (0.2937)	Loss1 0.2937	Loss2 194.8504	Prec 95.312% (95.312%)
 * Prec 91.260% 
Absolute sum of conv1 weights:  tensor(194.8504, device='cuda:0', grad_fn=<SumBackward0>)


In [None]:
##Accuracy wiht Customised cost function, gamma=1
fdir = 'result/vggnet_gamma_1/model_best.pth.tar'

checkpoint = torch.load(fdir)
model.load_state_dict(checkpoint['state_dict'])


criterion = nn.CrossEntropyLoss().cuda()

model.eval()
model.cuda()


prec = validate(testloader, model, criterion)
energy_loss_gamma_1 = model.features[0].weight.abs().sum()
print("Absolute sum of conv1 weights: ", energy_loss_gamma_1)


In [None]:
# HW

#  1. train resnet20 and vgg16 to achieve >90% accuracy 
#  2. save your trained model in the result folder 
#  3. Restart your jupyter notebook by "Kernel - Restart & Clear Output"
#  4. Load your saved model for vgg16 and validate to see the accuracy
#  5. such as the last part of "[W2S2_example2]_CNN_for_MNIST.ipynb", prehook the input layers of all the conv layers.
#  6. from the first prehooked input, compute to get the second prehooked input. 
#  7. Compare your computed second input vs. the prehooked second input.


In [18]:
##Accuracy wiht Customised cost function, gamma=x
fdir = 'result/vggnet_gamma_x/model_best.pth.tar'

checkpoint = torch.load(fdir)
model.load_state_dict(checkpoint['state_dict'])


criterion = nn.CrossEntropyLoss().cuda()

model.eval()
model.cuda()


prec = validate(testloader, model, criterion)
energy_loss_gamma_x = model.features[0].weight.abs().sum()
print("Absolute sum of conv1 weights: ", energy_loss_gamma_x)


Test: [0/79]	Time 0.228 (0.228)	Loss 0.5841 (0.5841)	Loss1 0.5841	Loss2 0.5295	Prec 82.031% (82.031%)
 * Prec 84.190% 
Absolute sum of conv1 weights:  tensor(0.5295, device='cuda:0', grad_fn=<SumBackward0>)


In [51]:
class SaveOutput:
    def __init__(self):
        self.outputs = []
    def __call__(self, module, module_in):
        self.outputs.append(module_in)
    def clear(self):
        self.outputs = []  
        
######### Save inputs from selected layer ##########
save_output = SaveOutput()

for layer in model.modules():
    if isinstance(layer, torch.nn.Conv2d):
        layer.register_forward_pre_hook(save_output)       ## Input for the module will be grapped
        
for i, (input, target) in enumerate(trainloader):
    input, target = input.cuda(), target.cuda() ## transfer to gpu
    output = model(input)
    break

In [34]:
my_input = save_output.outputs[0][0]
my_input.size()

torch.Size([128, 3, 32, 32])

In [46]:
out1 = model.features[0](my_input)
out2 = model.features[1](out1)
my_output = model.features[2](out2)
(my_output - save_output.outputs[1][0]).sum()

tensor(0., device='cuda:0', grad_fn=<SumBackward0>)