In [1]:
"""Code assumes the ability to train using a GPU with CUDA.
"""
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from advertorch.attacks import GradientSignAttack, CarliniWagnerL2Attack, PGDAttack
import matplotlib.pyplot as plt
import utils.adv_ex_utils as aus
import utils.interp_generators as igs
import utils.utils as utils
from utils.models import LeNet, DDNet
from utils.data_loaders import DataLoader

# makes default tensor a CUDA tensor so GPU can be used
device = torch.device(1 if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)
if device != 'cpu':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

### Define data loaders and data preprocessing steps

In [2]:
dataset = 'CIFAR-10'

dl = DataLoader(dataset=dataset, augment=False)
train_loader = dl.train_loader
test_loader = dl.test_loader
tr_batch_size = dl.tr_batch_size
te_batch_size = dl.te_batch_size

Files already downloaded and verified
Files already downloaded and verified


### Define train and test functions 

In [3]:
def train():
    net.train()
    
    for batch_idx, (samples, labels) in enumerate(train_loader):
        # sends to GPU, i.e. essentially converts from torch.FloatTensor to torch.cuda.FloatTensor
        samples, labels = samples.to(device), labels.to(device)
        
        # expand dataset with adversarial examples if adversary specified
        if adversary != None:
            adv_samples, adv_labels = aus.generate_adv_exs(samples, labels, adversary)
            samples, labels = torch.cat([samples, adv_samples], 0), torch.cat([labels, adv_labels], 0)
                
        optimizer.zero_grad()
        
        output = net(samples)
        
        loss = utils.my_loss(output, labels, net, optimizer,
                             alpha_wd=alpha_wd, alpha_jr=alpha_jr, 
                             x=samples, bp_mat=tr)
        loss.backward()
        
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            j = utils.avg_norm_jacobian(net, samples, output.shape[1], tr, for_loss=False)
            print(f'\tLoss: {loss.item():.6f} Average norm of Jacobian: {j:6f}')
            train_losses.append(loss.item())
            jacobian_norms.append(j)

In [4]:
def test():
    net.eval()
    test_loss = 0
    correct = 0
    
    for samples, labels in test_loader:
        samples, labels = samples.to(device), labels.to(device)
        output = net(samples)
        test_loss += utils.my_loss(output, labels, net, optimizer,
                                   alpha_wd=alpha_wd, alpha_jr=alpha_jr, 
                                   x=samples, bp_mat=te).item()
        # output is a tensor, .data retrieves its data, max returns the index of the highest valued element
        preds = output.data.max(1, keepdim=True)[1]
        correct += preds.eq(labels.data.view_as(preds)).sum().item()
                
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * float(correct / len(test_loader.dataset))
    
    print(f'\tTest set accuracy: ({test_accuracy:.2f}%)')
    
    test_accuracies.append(test_accuracy)
    test_losses.append(test_loss)

### Training

In [6]:
# training details
n_epochs = 30
log_interval = 200
training_round = 1
torch.manual_seed(training_round)

# varying values for certain hyperparameters to produce models with varying degrees of robustness
epsilons = [0, .1, .2, .3, .4, .5, .6]
alpha_wds = [0, .0000001, .000001, .00001, .0001, .001, .01]
alpha_jrs = [0.01, .0000001, .000001, .00001, .0001, .001, .01]

# dictionary to record each model's training/testing stats
performance = {}

for hyp_param_to_vary in [alpha_jrs, epsilons, alpha_wds]:
    epsilon = 0
    alpha_wd = 0
    alpha_jr = 0
    
    model_name = f'ep{epsilon}_wd{alpha_wd}_jr{alpha_jr}_{training_round}'
    
    for value in hyp_param_to_vary:        
        # change hyperparameter that is being varied
        if hyp_param_to_vary == epsilons:
            epsilon = value
            tr = utils.bp_matrix(tr_batch_size*2, 10)
            te = utils.bp_matrix(te_batch_size*2, 10)
        elif hyp_param_to_vary == alpha_wds:
            alpha_wd = value
            tr = utils.bp_matrix(tr_batch_size, 10)
            te = utils.bp_matrix(te_batch_size, 10)
        else:
            alpha_jr = value
            tr = utils.bp_matrix(tr_batch_size, 10)
            te = utils.bp_matrix(te_batch_size, 10)
        
        print(f'\nBeginning training for model: trained_models/{dataset}/training_round_{training_round}/{model_name}')

        # instantiate model and optimizer
        learning_rate = 0.01
        momentum = 0.9
        
        # net should be instance of LeNet if using MNIST, DDNet if using CIFAR-10
        if dataset == 'MNIST':
            net = LeNet()
        elif dataset == 'CIFAR-10':
            net = DDNet()
            
        optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
        lr_decayer = StepLR(optimizer, step_size=10, gamma=0.1)

        # make model CUDA enabled and define GPU/device to use
        net.cuda()
        
        # define adversary to train against if needed
        adversary = None
        if epsilon != 0:
            adversary = GradientSignAttack(predict=net, loss_fn=F.cross_entropy, 
                            eps=epsilon, clip_min=-3., clip_max=3., targeted=False)

        # for tracking training progress
        train_losses = []
        test_losses = []
        test_accuracies = []
        jacobian_norms = []

        for epoch in range(1, n_epochs + 1):
            print(f'Epoch #{epoch}')
            train()
            test()
            lr_decayer.step()
        
        performance[model_name] = (train_losses, test_losses, test_accuracies, jacobian_norms)
        torch.save(net.state_dict(), f'trained_models/{dataset}/training_round_{training_round}/{model_name}')
        


Beginning training for model: trained_models/CIFAR-10/training_round_1/ep0_wd0_jr0.01_1
Epoch #1
	Loss: 2.311766 Average norm of Jacobian: 0.001591
	Loss: 1.953117 Average norm of Jacobian: 0.141008
	Loss: 1.500202 Average norm of Jacobian: 1.335765
	Loss: 1.524923 Average norm of Jacobian: 2.332409
	Test set accuracy: (52.93%)
Epoch #2
	Loss: 1.543494 Average norm of Jacobian: 2.835848
	Loss: 1.166420 Average norm of Jacobian: 6.567211
	Loss: 1.173705 Average norm of Jacobian: 7.201236
	Loss: 1.287471 Average norm of Jacobian: 9.942127
	Test set accuracy: (63.21%)
Epoch #3
	Loss: 1.238295 Average norm of Jacobian: 7.961130
	Loss: 0.924732 Average norm of Jacobian: 14.440787
	Loss: 0.948359 Average norm of Jacobian: 9.668236
	Loss: 0.839496 Average norm of Jacobian: 13.218650
	Test set accuracy: (68.96%)
Epoch #4
	Loss: 1.048609 Average norm of Jacobian: 11.521887
	Loss: 0.987433 Average norm of Jacobian: 11.495178
	Loss: 1.091940 Average norm of Jacobian: 14.036659
	Loss: 0.694132 Av

FileNotFoundError: [Errno 2] No such file or directory: 'trained_models/CIFAR-10/training_round_1/ep0_wd0_jr0.01_1'

### Write performance dictionary to text file

In [None]:
f = open(f'trained_models/{dataset}/training_round_{training_round}/training_round_{training_round}_performance.txt','w')
f.write(str(performance))
f.close()

# to read dictionary from file:
# f = open(f'models/training_round_{training_round}_performance.txt','r')
# d = eval(f.read())