In [1]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import sys
sys.path.append("..")
import utils.utils as utils
import utils.adv_ex_utils as aus
from utils.models import LeNet, DDNet, SimpleCNN
from utils.data_loaders import DataLoader
from advertorch.attacks import GradientSignAttack, CarliniWagnerL2Attack, PGDAttack

# makes default tensor a CUDA tensor so GPU can be used
device = torch.device(2 if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)
if device != 'cpu':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

### Define train and test functions 

In [2]:
def train():
    net.train()
    
    for batch_idx, (samples,labels,target_interps) in enumerate(train_loader):
        # sends to GPU, i.e. essentially converts from torch.FloatTensor to torch.cuda.FloatTensor
        samples,labels,target_interps = samples.to(device),labels.to(device),target_interps.to(device)
        samples = aus.perturb_randomly(samples, scale=.15, min=0., max=1.)
                        
        optimizer.zero_grad()
        output = net(samples)
        loss = utils.interp_match_loss(output, labels, x=samples, target_interps=target_interps, 
                                       net=net, optimizer=optimizer, alpha=alpha)
        loss.backward()
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            j = utils.avg_norm_jacobian(net, samples, output.shape[1], tr, for_loss=False)
            i,_ = utils.norm_diff_interp(net, samples, labels, scale=.15, for_loss=False)
            print(f'\tLoss: {loss.item():.6f}, Average norm of Jacobian: {j:6f}, Norm of difference in interpretations: {i:6f}')
            train_losses.append(loss.item())
            jacobian_norms.append(j)
            interp_norm_diffs.append(i)

In [3]:
def test():
    net.eval()
    correct = 0
    adv_correct = 0
    
    for samples,labels,target_interps in test_loader:
        samples,labels,target_interps = samples.to(device),labels.to(device),target_interps.to(device)
        
        output = net(samples)
        
        adv_samples,labels = aus.generate_adv_exs(samples, labels, adversary)
        adv_output = net(adv_samples)
        
        # output is a tensor, .data retrieves its data, max returns the index of the highest valued element
        preds = output.data.max(1, keepdim=True)[1]
        correct += preds.eq(labels.data.view_as(preds)).sum().item()
        
        adv_preds = adv_output.data.max(1, keepdim=True)[1]
        adv_correct += adv_preds.eq(labels.data.view_as(adv_preds)).sum().item()
                
    test_accuracy = 100. * float(correct / len(test_loader.dataset))
    adv_test_accuracy = 100. * float(adv_correct / len(test_loader.dataset))
    
    print(f'\tTtest set accuracy: ({test_accuracy:.2f}%)')
    print(f'\tPGD-perturbed test set accuracy: ({adv_test_accuracy:.2f}%)')
    
    test_accuracies.append(test_accuracy)
    adv_test_accuracies.append(adv_test_accuracy)

### Training

In [None]:
dataset = 'MNIST'

dl = DataLoader(dataset='MNIST_interps', augment=False, model='lenet', path='../data', thresh=1.)
train_loader = dl.train_loader
test_loader = dl.test_loader
tr_batch_size = dl.tr_batch_size
te_batch_size = dl.te_batch_size

# training details
n_epochs = 30
log_interval = 200
training_round = 3
torch.manual_seed(7)
alphas = [0, 1e-3, 4e-3, 7e-3, 1e-2, 4e-2, 7e-2, 1e-1]

# dictionary to record each model's training/testing stats
performance = {}

# create matrices for back propagation
tr = utils.bp_matrix(tr_batch_size, 10)
te = utils.bp_matrix(te_batch_size, 10)

for alpha in alphas:
    # instantiate model and optimizer
    learning_rate = 0.01
    momentum = 0.9
    net = SimpleCNN()
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
    lr_decayer = StepLR(optimizer, step_size=10, gamma=0.1)

    # make model CUDA enabled and define GPU/device to use
    net.cuda()

    # define adversary to train against if needed
    adversary = PGDAttack(predict=net, loss_fn=F.cross_entropy, eps=1.5, 
                          nb_iter=40, eps_iter=0.05, rand_init=True, 
                          clip_min=0., clip_max=1., ord=2, targeted=False)

    # for tracking training progress
    train_losses = []
    test_accuracies = []
    adv_test_accuracies = []
    jacobian_norms = []
    interp_norm_diffs = []

    for epoch in range(1, n_epochs + 1):
        print(f'Epoch #{epoch}')
        train()
        test()
        lr_decayer.step()

    performance[f'simplecnn_alpha{alpha}'] = (train_losses, test_accuracies, adv_test_accuracies, jacobian_norms, interp_norm_diffs)
    torch.save(net.state_dict(), f'../trained_models/{dataset}/interp_match_reg2/simplecnn_alpha{alpha}')

Epoch #1
	Loss: 2.302015, Average norm of Jacobian: 0.002556, Norm of difference in interpretations: 1.145142
	Loss: 1.794961, Average norm of Jacobian: 0.332941, Norm of difference in interpretations: 6.133467
	Loss: 1.738459, Average norm of Jacobian: 0.443577, Norm of difference in interpretations: 9.629074
	Loss: 1.566728, Average norm of Jacobian: 0.287748, Norm of difference in interpretations: 11.642480
	Loss: 1.602690, Average norm of Jacobian: 0.263026, Norm of difference in interpretations: 13.430804
	Ttest set accuracy: (85.96%)
	PGD-perturbed test set accuracy: (58.13%)
Epoch #2
	Loss: 1.650705, Average norm of Jacobian: 0.488221, Norm of difference in interpretations: 13.006514
	Loss: 1.580172, Average norm of Jacobian: 0.261305, Norm of difference in interpretations: 16.123606
	Loss: 1.522096, Average norm of Jacobian: 0.244330, Norm of difference in interpretations: 18.252075
	Loss: 1.536858, Average norm of Jacobian: 0.352778, Norm of difference in interpretations: 20.2

### Write performance dictionary to text file

In [5]:
f = open(f'../trained_models/{dataset}/interp_match_reg2/performance.txt','w')
f.write(str(performance))
f.close()

# to read dictionary from file:
# f = open(f'models/training_round_{training_round}_performance.txt','r')
# d = eval(f.read())