In [None]:
# imports
import torch
from torch import nn
import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import MNIST # Training dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import numpy as np
from joblib import dump, load
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
#####################
# my files
# target model
from net_ import target_net
#from net_conv import target_net
# gan architectures
import gans_archs
# advgan training class
from GAN_ import advGAN

if torch.cuda.is_available():  
    dev = 'cuda:0'
else:  
    dev = 'cpu'

print('device: ', dev)

# functions
def transform_data(data_loader_obj):
    ims = []
    lls = []
    for imgs,lbls in data_loader_obj:
        for img,lbl in zip(imgs,lbls):
            ims.append(img)
            lls.append(lbl)
    ims = torch.stack(ims)
    lls = torch.stack(lls)
    return ims,lls

def show_tensor_images(image_tensor, num_images=25):
    image_tensor = image_tensor.reshape(image_tensor.shape[0],1,28,28).cpu().detach()
    size = (1,28,28)
    image_unflat = image_tensor.detach().view(-1, *size)
    image_grid = make_grid(image_unflat[:num_images], nrow=5)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.show()

In [None]:
# obtain mnist data and process
batch_size = 128
num_of_classes = 2

def get_indices(dataset,ind_array):
    indices =  []
    for i in range(len(dataset.targets)):
        for ind in ind_array:
            if dataset.targets[i] == ind:
                indices.append(i)
    return indices

dataset = MNIST('.', train=True, download=False,transform=transforms.ToTensor())

idx = get_indices(dataset, np.arange(num_of_classes))
data_loader_rftarget = DataLoader(dataset, batch_size=batch_size, sampler = SubsetRandomSampler(idx))
data_loader_target = DataLoader(dataset, batch_size=batch_size, sampler = SubsetRandomSampler(idx))
data_loader_gan = DataLoader(dataset, batch_size=batch_size, sampler = SubsetRandomSampler(idx))
data_loader_test = DataLoader(dataset, batch_size=batch_size, sampler = SubsetRandomSampler(idx))

classes = np.arange(num_of_classes)

In [None]:
# target model
n_estimators = 500
ims_train,lls_train = transform_data(data_loader_rftarget)
rf = RandomForestClassifier(n_estimators=n_estimators)
#rf.fit(ims_train.reshape(len(ims_train),28*28).detach().numpy(),lls_train)
# save the model
PATH = './target_models/rf_model'+str(num_of_classes)+'classes'+'_nest'+str(n_estimators)+'.joblib'
#dump(rf, PATH) 
# load the model
rf = load(PATH)

# test random forest accuracy
ims_test,lls_test = transform_data(data_loader_gan)
print('accuracy: ', accuracy_score(rf.predict(ims_test.reshape(len(ims_test),28*28).detach().numpy()),lls_test))

In [None]:
# train distilled neural network on output from random forest
net = target_net(num_of_classes).to(dev)
criterion_tar = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

path_disttar = './target_models/ff_net_distilled_'+str(num_of_classes)+'classes_device-'+dev+'.pth'
# train and and save the model
#net.train(data_loader_target, criterion_tar, optimizer, dev, master_model=rf.predict)
#torch.save(net.state_dict(), path_disttar)
# load the model
net = target_net(num_of_classes).to(dev)
net.load_state_dict(torch.load(path_disttar))

print('model accuracy: ', net.accuracy(data_loader_gan,dev))

In [None]:
# import gen/disc
gen = gans_archs.Generator1()
disc = gans_archs.Discriminator1()

# arguments for GAN training 
tar_criterion=nn.CrossEntropyLoss()
criterion=nn.BCEWithLogitsLoss()
n_epochs=200
batch_size=128
lr=0.00001
device=dev
display_step=500
gen_arch='cov'
###############################
gen_arch_num=1
disc_coeff=2150.
hinge_coeff=150.
adv_coeff=700.
c=0.2
gen_path_extra='distilledrf_genarch_'+str(gen_arch_num)
shape=(1,28,28)
num_of_classes=num_of_classes
################################

# things to investigate
'''
add noise vector in generator
change to 0-1 loss ? 
'''

# initiate advgan
advgan = advGAN(net,gen,disc,tar_criterion=tar_criterion,
                criterion=criterion,n_epochs=n_epochs,
                batch_size=batch_size,num_of_classes=num_of_classes,
                lr=lr,disc_coeff=disc_coeff,hinge_coeff=hinge_coeff,
                adv_coeff=adv_coeff,c=c,gen_path_extra=gen_path_extra,
                device=device,display_step=display_step,shape=shape,gen_arch=gen_arch)

In [None]:
%%time 
# train the gan
gen,disc = advgan.train(data_loader_gan)

In [None]:
# load the generator 
path = advgan.gen_path
full_path = path[0:15] + '/models_keep' + path[15::]
gen = advgan.load_gen(full_path)

In [None]:
# test transferability from adv examples for the nn to the rf
counter = 0.
total_rf_clean = 0.
total_nn_clean = 0.
total_rf_adv = 0.
total_nn_adv = 0.
for data,label in DataLoader(dataset, batch_size=batch_size, sampler = SubsetRandomSampler(idx)):
    rf_output_clean = rf.predict(data.reshape(len(data),28*28).cpu().detach().numpy())
    nn_output_clean = torch.argmax(net(data.reshape(len(data),28*28).to(dev)),dim=1)
    # make adv example
    pert = gen(data.reshape(len(data),28*28).to(dev))
    adv_img = (data.reshape(len(data),28*28).to(dev) + pert).to(dev)
    rf_output_adv = rf.predict(adv_img.cpu().detach().numpy())
    nn_output_adv = torch.argmax(net(adv_img.to(dev)),dim=1)
    # calculate and return accuracy 
    total_rf_clean += accuracy_score(rf_output_clean,label.cpu().detach().numpy())
    total_nn_clean += accuracy_score(nn_output_clean.cpu().detach().numpy(),label.cpu().detach().numpy())
    total_rf_adv += accuracy_score(rf_output_adv,label.cpu().detach().numpy())
    total_nn_adv += accuracy_score(nn_output_adv.cpu().detach().numpy(),label.cpu().detach().numpy())
    counter += 1.
print('rf accuracy (clean): ', total_rf_clean/counter)
print('nn accuracy (clean): ', total_nn_clean/counter)
print('% adv rf: ', 1.-total_rf_adv/counter)
print('% adv nn: ', 1.-total_nn_adv/counter)

show_tensor_images(adv_img)

In [None]:
# load a net w/o the train method (messes up art if it is there)
from net_ import target_net
del target_net.train
net_art = target_net(num_of_classes)
net_art.load_state_dict(torch.load(path_disttar))

# data to attack
ims_attack,actual_lables = transform_data(data_loader_test)
ims_attack = ims_attack.reshape(ims_attack.shape[0],28*28)

In [None]:
# ART torch classifier model
from art.estimators.classification import PyTorchClassifier
classifier = PyTorchClassifier(
    model=net_art,
    clip_values=(0, 1),
    loss=criterion_tar,
    optimizer=optimizer,
    input_shape=(28*28,),
    nb_classes=num_of_classes)

# transferability of FGM attack
from art.attacks.evasion import FastGradientMethod
attack = FastGradientMethod(estimator=classifier, eps=0.2)
x_fgm_adv = attack.generate(x=ims_attack)

# model output
rf_output_clean = rf.predict(ims_attack)
nn_output_clean = torch.argmax(net_art(ims_attack.to(dev)),dim=1)
rf_output_adv = rf.predict(x_fgm_adv)
nn_output_adv = torch.argmax(net_art(torch.from_numpy(x_fgm_adv).to(dev)),dim=1)
    
# calculate and return accuracy 
acc_rf_clean = accuracy_score(rf_output_clean,actual_lables)
acc_nn_clean = accuracy_score(nn_output_clean.cpu().detach().numpy(),actual_lables)
acc_rf_adv = accuracy_score(rf_output_adv,actual_lables)
acc_nn_adv = accuracy_score(nn_output_adv.cpu().detach().numpy(),actual_lables)

print('rf accuracy (clean): ', acc_rf_clean)
print('nn accuracy (clean): ', acc_nn_clean)
print('% adv rf: ', 1.-acc_rf_adv)
print('% adv nn: ', 1.-acc_nn_adv)
show_tensor_images(torch.from_numpy(x_fgm_adv))

In [None]:
# ART torch classifier model
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import CarliniL2Method,CarliniLInfMethod
classifier = PyTorchClassifier(
    model=net_art,
    clip_values=(0, 1),
    loss=criterion_tar,
    optimizer=optimizer,
    input_shape=(28*28,),
    nb_classes=num_of_classes)

# transferability of CW attack
attack = CarliniL2Method(classifier)
CW_adv_img = attack.generate(x=ims_attack[0:100],y=((actual_lables+1)%2)[0:100])
np.save('./target_models/CW_adv_img.npy',CW_adv_img)
#CW_adv_img = np.load('./target_models/CW_adv_img.npy')

# model output
rf_output_clean = rf.predict(ims_attack[0:100])
nn_output_clean = torch.argmax(net_art(ims_attack.to(dev)[0:100]),dim=1)
rf_output_adv = rf.predict(CW_adv_img)
nn_output_adv = torch.argmax(net_art(torch.from_numpy(CW_adv_img).to(dev)),dim=1)
    
# calculate and return accuracy 
acc_rf_clean = accuracy_score(rf_output_clean,actual_lables[0:100])
acc_nn_clean = accuracy_score(nn_output_clean.cpu().detach().numpy(),actual_lables[0:100])
acc_rf_adv = accuracy_score(rf_output_adv,actual_lables[0:100])
acc_nn_adv = accuracy_score(nn_output_adv.cpu().detach().numpy(),actual_lables[0:100])

print('rf accuracy (clean): ', acc_rf_clean)
print('nn accuracy (clean): ', acc_nn_clean)
print('% adv rf: ', 1.-acc_rf_adv)
print('% adv nn: ', 1.-acc_nn_adv)
show_tensor_images(torch.from_numpy(x_pgd_adv))

In [None]:
# comparing to noisy data 
imgs_dirty = ims_attack + 0.5*torch.rand(ims_attack.shape)

# model output
rf_output_clean = rf.predict(ims_attack)
nn_output_clean = torch.argmax(net_art(ims_attack.to(dev)),dim=1)
rf_output_adv = rf.predict(imgs_dirty.detach().cpu().numpy())
nn_output_adv = torch.argmax(net_art(imgs_dirty.to(dev)),dim=1)
    
# calculate and return accuracy 
acc_rf_clean = accuracy_score(rf_output_clean,actual_lables)
acc_nn_clean = accuracy_score(nn_output_clean.cpu().detach().numpy(),actual_lables)
acc_rf_adv = accuracy_score(rf_output_adv,actual_lables)
acc_nn_adv = accuracy_score(nn_output_adv.cpu().detach().numpy(),actual_lables)

print('rf accuracy (clean): ', acc_rf_clean)
print('nn accuracy (clean): ', acc_nn_clean)
print('% adv rf: ', 1.-acc_rf_adv)
print('% adv nn: ', 1.-acc_nn_adv)
show_tensor_images(torch.from_numpy(x_fgm_adv))