In [5]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import sys
sys.path.append("..")
import utils.utils as utils
import utils.adv_ex_utils as aus
from utils.models import LeNet, DDNet, SimpleCNN
from utils.data_loaders import DataLoader
from advertorch.attacks import GradientSignAttack, CarliniWagnerL2Attack, PGDAttack

# makes default tensor a CUDA tensor so GPU can be used
device = torch.device(1 if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)
if device != 'cpu':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

### Define train and test functions 

In [6]:
def train():
    net.train()
    
    for batch_idx, (samples, labels, target_interps) in enumerate(train_loader):
        # sends to GPU, i.e. essentially converts from torch.FloatTensor to torch.cuda.FloatTensor
        samples,labels,target_interps = samples.to(device).squeeze(dim=1),labels.to(device),target_interps.to(device).squeeze()
        samples = aus.perturb_randomly(samples, scale=.15, min=0., max=1.)
                        
        optimizer.zero_grad()
        output = net(samples)
        loss = utils.interp_match_loss(output, labels, x=samples, target_interps=target_interps, 
                                       net=net, optimizer=optimizer, alpha=alpha)
        loss.backward()
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            j = utils.avg_norm_jacobian(net, samples, output.shape[1], tr, for_loss=False)
            i,_ = utils.norm_diff_interp(net, samples, labels, scale=.15, for_loss=False)
            print(f'\tLoss: {loss.item():.6f}, Average norm of Jacobian: {j:6f}, Norm of difference in interpretations: {i:6f}')
            train_losses.append(loss.item())
            jacobian_norms.append(j)
            interp_norm_diffs.append(i)

In [7]:
def test():
    net.eval()
    test_loss = 0
    correct = 0
    
    for samples, labels, target_interps in test_loader:
        samples, labels = samples.to(device), labels.to(device)
        samples, labels = aus.generate_adv_exs(samples, labels, adversary)
        output = net(samples)
        test_loss += utils.interp_match_loss(output, labels, x=samples, target_interps=target_interps, 
                                       net=net, optimizer=optimizer, alpha=alpha)
        
        # output is a tensor, .data retrieves its data, max returns the index of the highest valued element
        preds = output.data.max(1, keepdim=True)[1]
        correct += preds.eq(labels.data.view_as(preds)).sum().item()
                
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * float(correct / len(test_loader.dataset))
    
    print(f'\tPGD-perturbed test set accuracy: ({test_accuracy:.2f}%)')
    
    test_accuracies.append(test_accuracy)
    test_losses.append(test_loss)

### Training

In [8]:
dataset = 'MNIST'

dl = DataLoader(dataset='MNIST_interps', augment=False, model='simplecnn', path='../data')
train_loader = dl.train_loader
test_loader = dl.test_loader
tr_batch_size = dl.tr_batch_size
te_batch_size = dl.te_batch_size

# training details
n_epochs = 30
log_interval = 200
training_round = 3
torch.manual_seed(7)
alpha = 1e-2

# dictionary to record each model's training/testing stats
performance = {}

# create matrices for back propagation
tr = utils.bp_matrix(tr_batch_size, 10)
te = utils.bp_matrix(te_batch_size, 10)

# instantiate model and optimizer
learning_rate = 0.01
momentum = 0.9
net = SimpleCNN()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
lr_decayer = StepLR(optimizer, step_size=10, gamma=0.1)

# make model CUDA enabled and define GPU/device to use
net.cuda()

# define adversary to train against if needed
adversary = PGDAttack(predict=net, loss_fn=F.cross_entropy, eps=1.5, 
                      nb_iter=40, eps_iter=0.05, rand_init=True, 
                      clip_min=0., clip_max=1., ord=2, targeted=False)

# for tracking training progress
train_losses = []
test_losses = []
test_accuracies = []
jacobian_norms = []
interp_norm_diffs = []

for epoch in range(1, n_epochs + 1):
    print(f'Epoch #{epoch}')
    train()
    test()
    lr_decayer.step()

Epoch #1
	Loss: 2.311933, Average norm of Jacobian: 0.002612, Norm of difference in interpretations: 1.136621


RuntimeError: Function StackBackward returned an invalid gradient at index 33 - expected device cuda:0 but got cuda:1

In [5]:
performance[model_name] = (train_losses, test_losses, test_accuracies, jacobian_norms, interp_norm_diffs)
torch.save(net.state_dict(), f'trained_models/{dataset}/interp_reg_tests3/{model_name}')


Beginning training for model: trained_models/MNIST/interp_reg_tests3/MNIST_ir10_scale0.15_3
Epoch #1
	Loss: 2.314794, Average norm of Jacobian: 0.021167, Norm of difference in interpretations: 0.352246
	Loss: 0.482052, Average norm of Jacobian: 9.570333, Norm of difference in interpretations: 4.400657
	Loss: 0.193064, Average norm of Jacobian: 23.038416, Norm of difference in interpretations: 6.305837
	Loss: 0.225012, Average norm of Jacobian: 34.761086, Norm of difference in interpretations: 7.924159
	Loss: 0.256693, Average norm of Jacobian: 38.476143, Norm of difference in interpretations: 8.102613
	Test set accuracy: (97.11%)
Epoch #2
	Loss: 0.395056, Average norm of Jacobian: 36.310696, Norm of difference in interpretations: 9.855186
	Loss: 0.171817, Average norm of Jacobian: 63.664017, Norm of difference in interpretations: 11.167590
	Loss: 0.063765, Average norm of Jacobian: 58.784264, Norm of difference in interpretations: 11.347184
	Loss: 0.119696, Average norm of Jacobian: 7

### Write performance dictionary to text file

In [6]:
f = open(f'trained_models/{dataset}/interp_reg_tests3/performance.txt','w')
f.write(str(performance))
f.close()

# to read dictionary from file:
# f = open(f'models/training_round_{training_round}_performance.txt','r')
# d = eval(f.read())

In [None]:
performance[]