In [12]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from advertorch.attacks import GradientSignAttack, CarliniWagnerL2Attack, PGDAttack
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import time
import sys
sys.path.append("..")
import utils.adv_ex_utils as aus
import utils.interp_generators as igs
import utils.utils as utils
from utils.models import LeNet, DDNet, SimpleCNN
from utils.data_loaders import DataLoader

# makes default tensor a CUDA tensor so GPU can be used
device = torch.device(2 if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)
if device != 'cpu':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [13]:
def train():
    start = time.time()
    net.train()
    
    for batch_idx, (samples, labels) in enumerate(train_loader):
        # sends to GPU, i.e. essentially converts from torch.FloatTensor to torch.cuda.FloatTensor
        samples, labels = samples.to(device), labels.to(device)
        # replace each sample with PGD perturbed sample
        samples, labels = aus.generate_adv_exs(samples, labels, adversary)
                
        optimizer.zero_grad()
        output = net(samples)
        loss = F.cross_entropy(output, labels)
        loss.backward()
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            j = utils.avg_norm_jacobian(net, samples, output.shape[1], tr, for_loss=False)
            i,_ = utils.norm_diff_interp(net, samples, labels, dataset, scale=.15, for_loss=False)
            print(f'\tLoss: {loss.item():.6f}, Average norm of Jacobian: {j:.6f}, Norm of difference in interpretations: {i:.6f}')
    
    finish = time.time()
    print(f'Time to complete epoch: {finish - start:.1f} seconds')
    
    

In [14]:
def test():
    net.eval()
    adv_test_loss = 0
    correct = 0
    
    for samples, labels in test_loader:
        samples, labels = samples.to(device), labels.to(device)
        samples, labels = aus.generate_adv_exs(samples, labels, adversary)
        
        output = net(samples)
        adv_test_loss += F.cross_entropy(output, labels)
        
        # output is a tensor, .data retrieves its data, max returns the index of the highest valued element
        preds = output.data.max(1, keepdim=True)[1]
        correct += preds.eq(labels.data.view_as(preds)).sum().item()
                
    adv_test_loss /= len(test_loader.dataset)
    adv_test_accuracy = 100. * float(correct / len(test_loader.dataset))
    
    print(f'PGD-perturbed test set accuracy: ({adv_test_accuracy:.2f}%)')

In [19]:
dataset = 'MNIST'

dl = DataLoader(dataset=dataset, augment=False, path='../data')
train_loader = dl.train_loader
test_loader = dl.test_loader
tr_batch_size = dl.tr_batch_size
te_batch_size = dl.te_batch_size

# matrices used to calculate norm of jacobian and 
# norm of difference between interpretations
tr = utils.bp_matrix(tr_batch_size, 10)
te = utils.bp_matrix(te_batch_size, 10)

# training details
n_epochs = 15
log_interval = 200
torch.manual_seed(7)

# dictionary to record each model's training/testing stats
performance = {}

# instantiate model and optimizer
net = SimpleCNN()
net_name = f'../trained_models/MNIST/interp_generators/simplecnn_pgd_eps1.5_niter40_epsiter.05_ord2_epochs30'
net.load_state_dict(torch.load(net_name))
optimizer = optim.SGD(net.parameters(), lr=.01, momentum=.9)
lr_decayer = StepLR(optimizer, step_size=10, gamma=0.1)

# make model CUDA enabled and define GPU/device to use
net.cuda()

# define adversary to train against if needed
adversary = PGDAttack(predict=net, loss_fn=F.cross_entropy, eps=1.5, 
                      nb_iter=40, eps_iter=0.05, rand_init=True, 
                      clip_min=0., clip_max=1., ord=2, targeted=False)

In [20]:
for epoch in range(1, n_epochs + 1):
    print(f'Epoch #{epoch}')
    train()
    test()
    lr_decayer.step()

Epoch #1
	Loss: 1.652884, Average norm of Jacobian: 0.216562, Norm of difference in interpretations: 31.000843
	Loss: 1.789160, Average norm of Jacobian: 0.280221, Norm of difference in interpretations: 28.133446
	Loss: 1.713718, Average norm of Jacobian: 0.012917, Norm of difference in interpretations: 31.478661
	Loss: 1.763574, Average norm of Jacobian: 0.117077, Norm of difference in interpretations: 30.162430
	Loss: 1.656541, Average norm of Jacobian: 0.233692, Norm of difference in interpretations: 31.451622
Time to complete epoch: 143.8 seconds
PGD-perturbed test set accuracy: (75.46%)
Epoch #2
	Loss: 1.732963, Average norm of Jacobian: 0.396023, Norm of difference in interpretations: 30.866264
	Loss: 1.695391, Average norm of Jacobian: 0.288273, Norm of difference in interpretations: 30.926460
	Loss: 1.663254, Average norm of Jacobian: 0.314258, Norm of difference in interpretations: 32.695797
	Loss: 1.645886, Average norm of Jacobian: 0.253915, Norm of difference in interpretat

In [21]:
torch.save(net.state_dict(), f'../trained_models/MNIST/interp_generators/simplecnn_pgd_eps1.5_niter40_epsiter.05_ord2_epochs45')