In [1]:
from google.colab import drive
drive.mount('/content/drive')

# put folder name here
FOLDERNAME = 'CVX_Robust_NN/'

import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader, RandomSampler

import os

import torchvision.datasets as datasets
import torchvision.transforms as transforms

from prepare_data import *
from models.mobilenetv2 import *
from models.vgg import *
from models.spliced import *
from fgsm import *
#from models.vit_small import *
from models.praresnet import *
from cvx_scripts.losses import *
from cvx_scripts.cvx_nn import *
from cvx_scripts.cvx_training import *

Mounted at /content/drive


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
# Load CIFAR-10 data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True)

testset = datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

mean = torch.tensor([0.4914, 0.4822, 0.4465]).to(device)
std = torch.tensor([0.2023, 0.1994, 0.2010]).to(device)

beta = 1e-2
batch_size = 128
rho, solver_type = 1e-2, "adam"
trunc_d = 512

# test loader with batch size of one for fast gradient sign method
testloader_fgsm = torch.utils.data.DataLoader(
    testset, batch_size=1, shuffle=False)

==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:06<00:00, 28298856.75it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
# load in pre-trained Pre-Activaiton ResNet-18 models trained via sharpness-aware minimization and standard training.
pr18_sam = PreActResNet18(10)
pr18_sam.load_state_dict(torch.load(sys.path[-1] + 'praresnet.pth', map_location=torch.device('cpu')))
pr18_sam.to(device)

pr18 = PreActResNet18(10)
pr18.load_state_dict(torch.load(sys.path[-1] + 'praresnet_nonsam.pth', map_location=torch.device('cpu')))
pr18.to(device)

# load in a pre-trained convex two-layer ReLU network
cvx = custom_cvx_layer(512, 500)
cvx.load_state_dict(torch.load(sys.path[-1] + 'praresnet_nonsam_500_inf_5.pth', map_location=torch.device('cpu')))
cvx.to(device)
uvec = torch.from_numpy(torch.load(sys.path[-1] + 'u_vec_praresnet_nonsam_500.pth')).to(device).float()

spliced = Spliced(pr18, cvx, uvec)

In [None]:
# run FGSM attacks of varying sizes on each of the three models and plot the results
attack_sizes = np.arange(9)
acc_dict = {'PreActResNet18-SAM': [], 'PreActResNet18-ST': [], 'PreActResNet18-ST-CVX': []}
for attack in attack_sizes:
  acc_dict['PreActResNet18-SAM'].append(eval_fgsm(pr18_sam, device, testloader_fgsm, attack/255, mean, std))
  acc_dict['PreActResNet18-ST'].append(eval_fgsm(pr18, device, testloader_fgsm, attack/255, mean, std))
  acc_dict['PreActResNet18-ST-CVX'].append(eval_fgsm(spliced, device, testloader_fgsm, attack/255, mean, std))

plt.plot(attack_sizes, acc_dict['PreActResNet18-ST-CVX'], label='CVX', color='red')
plt.plot(attack_sizes, acc_dict['PreActResNet18-SAM'], label='SAM', color='blue')
plt.plot(attack_sizes, acc_dict['PreActResNet18-ST'], label='ST', color='green')
plt.xlabel(r'$\epsilon$')
plt.ylabel(r'robust test accuracy')
plt.legend()
plt.savefig(sys.path[-1] + '/REPRODUCED_robust_relu_accs.png')

# Make Accuracy Plots for varying robustness parameter choices


In [5]:
trunc_d = 512
# data extraction
print('Extracting the data')
dummy_loader= torch.utils.data.DataLoader(
    trainset, batch_size=1000, shuffle=False,
    pin_memory=True, sampler=None)
i = 0
A = torch.zeros(0, trunc_d).to(device)
y_all = torch.zeros(0).to(device)
for img, y in dummy_loader:
    i += 1
    img, y = img.to(device), y.to(device)
    out = pr18.truncated_forward(img).detach()
    A = torch.vstack((A,out))

    y_all = torch.cat((y_all, y))
    pass
Apatch=A.detach().clone()
A = A.view(A.shape[0], -1)
n,trunc_d=A.size()

Extracting the data


In [None]:
# train convex heads for varying values of the robustness parameter. plot the results

epochs = [5]
epsilons = {np.inf: np.linspace(20,50,16)} #{np.inf: [7.5, 10, 12.5, 15]} # for {1: [.1, .15, .2], 2: [1, 2, 3], np.inf: [10, 20, 30, 40]}
learning_rates = [1e-6]
norm_orders =  [np.inf]#[1, 2, np.inf]
num_neurons = [500]
LBFGS_param = [10, 4]
best_models = {}
rob_acc = []
clean_acc = []
acc = []
for hidden_size in num_neurons:

  hidden_size,sign_pattern_list, u_vector_list = generate_sign_patterns(A.cpu(), hidden_size, False)
  sign_patterns = np.array([sign_pattern_list[i].int().data.numpy() for i in range(hidden_size)])
  u_vectors = np.asarray(u_vector_list).reshape((hidden_size, A.shape[1])).T
  ds_train = PrepareData3D(X=A, y=y_all, z=sign_patterns.T)
  ds_train = DataLoader(ds_train, batch_size=batch_size)

  best_model, best_acc = None, 0
  key = None
  best_acc = 0

  for norm_order in norm_orders:
    for epoch in epochs:
      for eps in epsilons[norm_order]:
        for lr in learning_rates:
          print('training under the following hyperparameters:', hidden_size, norm_order, epoch, eps, lr)
          # train a robust convex NN
          results_cvx_conv1 = sgd_solver_cvxproblem(pr18, ds_train, testloader, epoch, hidden_size, beta,
                                  lr, batch_size, rho, u_vectors, solver_type, LBFGS_param, verbose=True,
                                                    n=n, d=trunc_d, device='cuda', robust=True, eps=eps, norm=norm_order)

          # merge convex and base model:
          splice_vecs = torch.tensor(u_vectors)
          splice_vecs = splice_vecs.to(device)
          cur_spliced = Spliced(pr18, results_cvx_conv1[-1], splice_vecs)
          cur_spliced.robust = True

          # record avg accuracy on test set under two different attack sizes of FGSM:
          print('evaluating...')

          acc.append(results_cvx_conv1[3])
          rob_acc.append(eval_fgsm(cur_spliced, device, testloader_fgsm, 1/255, mean, std))

plt.plot(epsilons[np.inf], rob_acc, label = r'CVX with parameter $r$', color='red')
plt.xlabel(r'$r$')
plt.ylabel(r'robust test accuracy for $\epsilon=1$')
# 0.6132 is standard-trained robust accuracy at eps = 1/255
# 0.7321 is SAM-trained robust accuracy at eps = 1/255
plt.hlines(0.6132, 20, 50, color='green', linestyles='--', label='ST')
plt.hlines(0.7321, 20, 50, color='blue', linestyles='--', label='SAM')
plt.legend()
plt.savefig(sys.path[-1] + '/REPRODUCED_relu_robust_acc_vs_r.png')