In [2]:
"""Code assumes the ability to train using a GPU with CUDA.
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from advertorch.attacks import GradientSignAttack, CarliniWagnerL2Attack, PGDAttack
import matplotlib.pyplot as plt
import utils.adv_ex_utils as aus
import utils.interp_generators as igs
import utils.utils as utils
from utils.models import LeNet, DDNet
from utils.data_loaders import DataLoader

# makes default tensor a CUDA tensor so GPU can be used
device = torch.device(1 if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)
if device != 'cpu':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

### Define train and test functions 

In [3]:
def train():
    net.train()
    
    for batch_idx, (samples, labels) in enumerate(train_loader):
        # sends to GPU, i.e. essentially converts from torch.FloatTensor to torch.cuda.FloatTensor
        samples, labels = samples.to(device), labels.to(device)
        
        # expand dataset with adversarial examples if adversary specified
        if adversary != None:
            adv_samples, adv_labels = aus.generate_adv_exs(samples, labels, adversary)
            samples, labels = torch.cat([samples, adv_samples], 0), torch.cat([labels, adv_labels], 0)
                
        optimizer.zero_grad()
        
        output = net(samples)
        
        loss = utils.my_loss(output, labels, net=net, optimizer=optimizer,
                             alpha_wd=alpha_wd, alpha_jr=alpha_jr, x=samples, 
                             bp_mat=tr, alpha_ir1=alpha_ir1, alpha_ir2=alpha_ir2, scale=scale, dataset=dataset)
        loss.backward()
        
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            j = utils.avg_norm_jacobian(net, samples, output.shape[1], tr, for_loss=False)
            i,_ = utils.norm_diff_interp(net, samples, labels, dataset, for_loss=False)
            print(f'\tLoss: {loss.item():.6f}, Average norm of Jacobian: {j:6f}, Norm of difference in interpretations: {i:6f}')

In [4]:
def test():
    net.eval()
    test_loss = 0
    correct = 0
    
    for samples, labels in test_loader:
        samples, labels = samples.to(device), labels.to(device)
        output = net(samples)
        test_loss += utils.my_loss(output, labels, net=net, optimizer=optimizer,
                             alpha_wd=alpha_wd, alpha_jr=alpha_jr, x=samples, 
                             bp_mat=tr, alpha_ir1=alpha_ir1, alpha_ir2=alpha_ir2, scale=scale, dataset=dataset).item()
        # output is a tensor, .data retrieves its data, max returns the index of the highest valued element
        preds = output.data.max(1, keepdim=True)[1]
        correct += preds.eq(labels.data.view_as(preds)).sum().item()
                
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * float(correct / len(test_loader.dataset))
    
    print(f'\tTest set accuracy: ({test_accuracy:.2f}%)')

### Training with interpretation regularization

In [5]:
dataset = 'MNIST'

dl = DataLoader(dataset=dataset)
train_loader = dl.train_loader
test_loader = dl.test_loader
tr_batch_size = dl.tr_batch_size
te_batch_size = dl.te_batch_size

In [6]:
# training details
n_epochs = 30
log_interval = 200
training_round = 1
torch.manual_seed(7)
adversary = None

In [7]:
# alpha_ir1s = [0, 1e-10, 2.5e-10, 5e-10, 7.5e-10, 1e-9, 2.5e-9, 5e-9]
alpha_ir1s = [0, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1]

# loss hyperparameters
alpha_wd = 0
alpha_jr = 0
alpha_ir2 = 0
scale = .15

for alpha_ir1 in alpha_ir1s:
    model_name = f'{dataset}_ir1{alpha_ir1}_scale{scale}'
    # instantiate model and optimizer
    learning_rate = 0.01
    momentum = 0.9
    if dataset == 'MNIST':
        net = LeNet()
    elif dataset == 'CIFAR-10':
        net = DDNet()
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
    lr_decayer = StepLR(optimizer, step_size=10, gamma=0.1)

    # make model CUDA enabled and define GPU/device to use
    net.cuda()

    tr = utils.bp_matrix(tr_batch_size, 10)
    te = utils.bp_matrix(te_batch_size, 10)

    print(f'Beginning training for {model_name}:')
    for epoch in range(1, n_epochs + 1):
        print(f'Epoch #{epoch}')
        train()
        test()
        lr_decayer.step()

    torch.save(net.state_dict(), f'trained_models/{dataset}/interp_reg_tests2/{model_name}')

Beginning training for MNIST_ir10_scale0.15:
Epoch #1
	Loss: 2.314794, Average norm of Jacobian: 0.021167, Norm of difference in interpretations: 0.350569
	Loss: 0.468431, Average norm of Jacobian: 9.758289, Norm of difference in interpretations: 4.422338
	Loss: 0.260466, Average norm of Jacobian: 23.489870, Norm of difference in interpretations: 6.455780
	Loss: 0.225185, Average norm of Jacobian: 34.545021, Norm of difference in interpretations: 7.731114
	Loss: 0.193403, Average norm of Jacobian: 38.747131, Norm of difference in interpretations: 8.023989
	Test set accuracy: (97.49%)
Epoch #2
	Loss: 0.356123, Average norm of Jacobian: 41.147068, Norm of difference in interpretations: 9.860940
	Loss: 0.167601, Average norm of Jacobian: 66.832703, Norm of difference in interpretations: 11.303518
	Loss: 0.115067, Average norm of Jacobian: 55.849159, Norm of difference in interpretations: 10.520661
	Loss: 0.111011, Average norm of Jacobian: 76.293312, Norm of difference in interpretations: