In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib
import matplotlib.pyplot as plt

import gc
from captum.attr import *
import quantus
from torch.utils.data import DataLoader
import gc
import torchvision.transforms as transforms

import torch.optim as optim
import os
import torch.optim as optim
import torchvision.models as models

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:

cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)


def test():
    net = VGG('VGG11')
    x = torch.randn(2,3,32,32)
    y = net(x)
    print(y.size())

In [5]:
def load_cifar_model(path):
    model = VGG('VGG16')
    ckpt_dict = torch.load(path, lambda storage, loc: storage)
    model.load_state_dict(ckpt_dict)
    model.train(False)
    return model

In [6]:
path = "cifar10_vgg16_model_final.pt"

In [7]:
normal_model = load_cifar_model(path)
normal_model.to(device)
normal_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [8]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                           download=True, transform=torchvision.transforms.ToTensor())
train_loader_cifar = DataLoader(trainset, shuffle=True, batch_size=10)

Files already downloaded and verified


In [9]:
def make_noise(x_batch, y_batch, spread):
    new_x_batch = []
    for x in x_batch:
        x = x.data.cpu().numpy()
        stdev = spread * (np.max(x)-np.min(x))
        noise = np.random.normal(0, stdev, x.shape).astype(np.float32)
        x_plus_noise = x + noise
        x_plus_noise = np.clip(x_plus_noise, 0, 1)
        x_plus_noise = torch.from_numpy(x_plus_noise).cpu()
        new_x_batch.append(x_plus_noise)
    new_batch = torch.stack(new_x_batch).to(device)
    return new_batch

# Evaluate

In [31]:
def compute_metrics_benign(adv_path, normal_model): 
    
    print("Computing metrics for {} for benign")
    
    npobj = np.load(adv_path)
    adaptive_image = npobj['b_images']
    adaptive_label = npobj['b_labels']
    
    
    #attribution robustness
    attribution_gaussian1 = []
    attribution_gaussian2 = []
    attribution_gaussian3 = []
    
    #logit robustness
    logit_gaussian1 = []
    logit_gaussian2 = []
    logit_gaussian3 = []
    
    images, labels = torch.from_numpy(adaptive_image), torch.from_numpy(adaptive_label)
    #images, labels = images.to(device), labels.to(device)
    
    end = len(adaptive_label)
    if end > 1000:
        end = 1000
    
    for i in range(0, end, 2):
        
        images_adv, y_pred_adv = images[i:i+2], labels[i:i+2]
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        x_logits = normal_model(images_adv)
        gc.collect()
        torch.cuda.empty_cache()
        
        #approach: attribution and logit robustness
        a_batch = quantus.explain(
            model=normal_model, inputs=images_adv, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        gaussian_noisy_images_1 = make_noise(images_adv, y_pred_adv, spread = 0.05)
        gaussian_logits_1 = normal_model(gaussian_noisy_images_1)
        gaussian_noisy_images_2 = make_noise(images_adv, y_pred_adv, spread = 0.10)
        gaussian_logits_2 = normal_model(gaussian_noisy_images_2)
        gaussian_noisy_images_3 = make_noise(images_adv, y_pred_adv, spread = 0.15)
        gaussian_logits_3 = normal_model(gaussian_noisy_images_3)
        
        diff1 = torch.norm(x_logits-gaussian_logits_1,p=1, dim=1) 
        diff2 = torch.norm(x_logits-gaussian_logits_2,p=1, dim=1) 
        diff3 = torch.norm(x_logits-gaussian_logits_3,p=1, dim=1) 
        
        logit_gaussian1.extend(diff1.detach().cpu().numpy())
        logit_gaussian2.extend(diff2.detach().cpu().numpy())
        logit_gaussian3.extend(diff3.detach().cpu().numpy())
        
        
        a_batch_gaussian1 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_1, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian2 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_2, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian3 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_3, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        for a, b in zip(a_batch, a_batch_gaussian1):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian1.append(c)
            
        for a, b in zip(a_batch, a_batch_gaussian2):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian2.append(c)
        
        for a, b in zip(a_batch, a_batch_gaussian3):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian3.append(c)
        
        
    df = pd.DataFrame([
            
            attribution_gaussian1,
            attribution_gaussian2,
            attribution_gaussian3,
            logit_gaussian1,
            logit_gaussian2,
            logit_gaussian3], index = [
            "Gaussian1 attribution", 
            "Gaussian2 attribution", 
            "Gaussian3 attribution", 
            "Gaussian1 logit robusntess",
            "Gaussian2 logit robusntess",
            "Gaussian3 logit robusntess",
                    ])
            
    path = "adaptive_Benign.csv"
    df.to_csv(path)

In [32]:
def compute_metrics_adv(adv_path, normal_model): 
    
    print("Computing metrics for {} for adv")
    npobj = np.load(adv_path)
    adaptive_image = npobj['a_images']
    adaptive_label = npobj['a_labels']
    
    #attribution robustness
    attribution_gaussian1 = []
    attribution_gaussian2 = []
    attribution_gaussian3 = []
    
    #logit robustness
    logit_gaussian1 = []
    logit_gaussian2 = []
    logit_gaussian3 = []
    
    images, labels = torch.from_numpy(adaptive_image), torch.from_numpy(adaptive_label)
    #images, labels = images.to(device), labels.to(device)
    
    end = len(adaptive_label)
    if end > 1000:
        end = 1000
    
    for i in range(0, end, 2):
        
        images_adv, y_pred_adv = images[i:i+2], labels[i:i+2]
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        x_logits = normal_model(images_adv)
        gc.collect()
        torch.cuda.empty_cache()
        
        #approach: attribution and logit robustness
        a_batch = quantus.explain(
            model=normal_model, inputs=images_adv, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        gaussian_noisy_images_1 = make_noise(images_adv, y_pred_adv, spread = 0.05)
        gaussian_logits_1 = normal_model(gaussian_noisy_images_1)
        gaussian_noisy_images_2 = make_noise(images_adv, y_pred_adv, spread = 0.10)
        gaussian_logits_2 = normal_model(gaussian_noisy_images_2)
        gaussian_noisy_images_3 = make_noise(images_adv, y_pred_adv, spread = 0.15)
        gaussian_logits_3 = normal_model(gaussian_noisy_images_3)
        
        diff1 = torch.norm(x_logits-gaussian_logits_1,p=1, dim=1) 
        diff2 = torch.norm(x_logits-gaussian_logits_2,p=1, dim=1) 
        diff3 = torch.norm(x_logits-gaussian_logits_3,p=1, dim=1) 
        
        logit_gaussian1.extend(diff1.detach().cpu().numpy())
        logit_gaussian2.extend(diff2.detach().cpu().numpy())
        logit_gaussian3.extend(diff3.detach().cpu().numpy())
        
        
        a_batch_gaussian1 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_1, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian2 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_2, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian3 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_3, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        for a, b in zip(a_batch, a_batch_gaussian1):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian1.append(c)
            
        for a, b in zip(a_batch, a_batch_gaussian2):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian2.append(c)
        
        for a, b in zip(a_batch, a_batch_gaussian3):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian3.append(c)
        
        
    df = pd.DataFrame([
            
            attribution_gaussian1,
            attribution_gaussian2,
            attribution_gaussian3,
            logit_gaussian1,
            logit_gaussian2,
            logit_gaussian3], index = [
            "Gaussian1 attribution", 
            "Gaussian2 attribution", 
            "Gaussian3 attribution", 
            "Gaussian1 logit robusntess",
            "Gaussian2 logit robusntess",
            "Gaussian3 logit robusntess",
                    ])
            
    path = "adaptive_Adv.csv"
    df.to_csv(path)

In [33]:
def compute_TPR(adv1, a, b, adv2, c, d): 
    TN=0
    FN=0
    FP = 0 
    TP=0
    
    for value1, value2 in zip(adv1, adv2): 
        if value1<a or value1>b:
            TP += 1
        else:
            if value2<c or value2>d:
                TP+=1
            else: 
                FN+=1
    
    return (TP/(TP+FN))*100

In [34]:
def compute_FPR(ap2a, k, l, ap2b, m, n): 
    TN=0
    FN=0
    FP=0 
    TP=0
    
    for value6, value7 in zip(ap2a,ap2b):
        if value6<k or value6>l:
            FP +=1
        else:
            if value7<m or value7>n:
                FP +=1

    return (FP/(len(ap2a)))*100

In [35]:
import sklearn
from sklearn.metrics import roc_auc_score

In [48]:
def return_auc(adv_path, model):
    #logitgaussian3
    k=[3,7,11,15, 20, 22, 25, 28, 30, 35, 37]
    l=[38,38,38,38,38,38,38,38,38,38, 38]

    m=[500,520,530, 600, 750, 700,800, 900, 1000, 1300, 1600]
    n=[1950,1950,1950,1950,1950,1950,1950,1950,1950,1950, 1950]

    compute_metrics_benign(adv_path, model)
    compute_metrics_adv(adv_path, model)
    df_cifar = pd.read_csv("adaptive_Benign.csv")
    attr_gaussian3 = df_cifar.iloc[2].values.flatten().tolist()[1:]
    logit_gaussian3 = df_cifar.iloc[5].values.flatten().tolist()[1:]
        
    fpr_results =[]
    for t1,t2,t3,t4 in zip(k,l,m,n):
        FPR = compute_FPR(logit_gaussian3, t1,t2, attr_gaussian3,t3,t4)
        fpr_results.append(FPR/100)
        
    df_pgd_eps1 = pd.read_csv("adaptive_Adv.csv")
    attr_gaussian3_eps1 = df_pgd_eps1.iloc[2].values.flatten().tolist()[1:]
    logit_gaussian3_eps1 = df_pgd_eps1.iloc[5].values.flatten().tolist()[1:]
    
    tpr_results =[]
    for t1,t2,t3,t4 in zip(k,l,m,n):
        TPR = compute_TPR(logit_gaussian3_eps1, t1,t2, attr_gaussian3_eps1,t3,t4)
        tpr_results.append(TPR/100)
    return(sklearn.metrics.auc(fpr_results, tpr_results), fpr_results, tpr_results)

In [49]:
# FGSM
adv_path1 = 'adv samples/FGSM/0.03137254901960784eps.npz'
adv_path2 = 'adv samples/FGSM/0.06274509803921569eps.npz'
adv_path3 = 'adv samples/FGSM/0.12549019607843137eps.npz'
adv_path4 = 'adv samples/FGSM/0.25098039215686274eps.npz'

In [50]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.6166820000000002,
 [0.044,
  0.063,
  0.109,
  0.17,
  0.39,
  0.431,
  0.637,
  0.81,
  0.895,
  0.9940000000000001,
  1.0],
 [0.135,
  0.16,
  0.207,
  0.325,
  0.568,
  0.605,
  0.7760000000000001,
  0.894,
  0.944,
  0.998,
  1.0])

In [51]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.7018224999999999, [0.033, 0.05800000000000001, 0.113, 0.193, 0.38299999999999995, 0.417, 0.611, 0.791, 0.8809999999999999, 0.988, 1.0], [0.12, 0.153, 0.21600000000000003, 0.387, 0.703, 0.7559999999999999, 0.903, 0.968, 0.9890000000000001, 1.0, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.865714, [0.035, 0.063, 0.10100000000000002, 0.185, 0.405, 0.435, 0.637, 0.813, 0.91, 0.996, 1.0], [0.152, 0.271, 0.559, 0.81, 0.964, 0.98, 0.996, 1.0, 1.0, 1.0, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.9360025000000001, [0.046, 0.066, 0.114, 0.187, 0.3990000000000001, 0.42699999999999994, 0.636, 0.8169999999999998, 0.889, 0.995, 1.0], [0.614, 0.7559999999999999, 0.927, 0.99, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])


In [52]:
#PGD
adv_path1 = 'adv samples/PGD/0.03137254901960784eps.npz'
adv_path2 = 'adv samples/PGD/0.06274509803921569eps.npz'
adv_path3 = 'adv samples/PGD/0.12549019607843137eps.npz'
adv_path4 = 'adv samples/PGD/0.25098039215686274eps.npz'

In [53]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.6020519999999999,
 [0.031,
  0.043,
  0.088,
  0.169,
  0.377,
  0.418,
  0.636,
  0.823,
  0.9060000000000001,
  0.998,
  1.0],
 [0.15,
  0.184,
  0.226,
  0.3390000000000001,
  0.549,
  0.557,
  0.727,
  0.862,
  0.924,
  0.995,
  1.0])

In [54]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.6014479999999999, [0.043, 0.069, 0.114, 0.192, 0.387, 0.425, 0.612, 0.799, 0.895, 0.991, 1.0], [0.117, 0.145, 0.195, 0.303, 0.529, 0.573, 0.7559999999999999, 0.888, 0.955, 0.993, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.7253850000000001, [0.045, 0.078, 0.12300000000000001, 0.19699999999999998, 0.413, 0.458, 0.638, 0.82, 0.905, 0.993, 0.9990000000000001], [0.08900000000000001, 0.137, 0.269, 0.465, 0.759, 0.841, 0.956, 0.9890000000000001, 0.9990000000000001, 1.0, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.898431, [0.038, 0.059, 0.102, 0.174, 0.379, 0.43200000000000005, 0.615, 0.782, 0.897, 0.995, 1.0], [0.234, 0.41, 0.679, 0.887, 0.9890000000000001, 0.992, 0.9990000000000001, 1.0, 1.0, 1.0, 1.0])


In [55]:
#BIM
adv_path1 = 'adv samples/BIM/0.03137254901960784eps.npz'
adv_path2 = 'adv samples/BIM/0.06274509803921569eps.npz'
adv_path3 = 'adv samples/BIM/0.12549019607843137eps.npz'
adv_path4 = 'adv samples/BIM/0.25098039215686274eps.npz'

In [56]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.5776094999999999,
 [0.035,
  0.049,
  0.094,
  0.165,
  0.374,
  0.41600000000000004,
  0.609,
  0.813,
  0.9010000000000001,
  0.998,
  1.0],
 [0.134,
  0.157,
  0.196,
  0.289,
  0.512,
  0.534,
  0.696,
  0.825,
  0.9129999999999999,
  0.993,
  0.9990000000000001])

In [57]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.581305, [0.043, 0.068, 0.11200000000000002, 0.175, 0.369, 0.43, 0.626, 0.8030000000000002, 0.889, 0.993, 1.0], [0.117, 0.15, 0.18899999999999997, 0.293, 0.509, 0.54, 0.716, 0.858, 0.922, 0.9940000000000001, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.5981124999999999, [0.04100000000000001, 0.067, 0.113, 0.204, 0.403, 0.441, 0.631, 0.805, 0.909, 0.993, 1.0], [0.117, 0.151, 0.204, 0.307, 0.538, 0.61, 0.7659999999999999, 0.8809999999999999, 0.9419999999999998, 0.9990000000000001, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.683261, [0.035, 0.05600000000000001, 0.111, 0.169, 0.395, 0.424, 0.604, 0.7929999999999999, 0.8909999999999999, 0.991, 1.0], [0.066, 0.096, 0.163, 0.332, 0.661, 0.758, 0.893, 0.961, 0.9840000000000001, 1.0, 1.0])


In [58]:
#CW
adv_path1 = 'adv samples/CW/0.15eps.npz'

In [59]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.7991775000000001,
 [0.037,
  0.073,
  0.12300000000000001,
  0.18600000000000003,
  0.407,
  0.44299999999999995,
  0.619,
  0.805,
  0.8909999999999999,
  0.99,
  0.9990000000000001],
 [0.18600000000000003,
  0.262,
  0.395,
  0.612,
  0.878,
  0.922,
  0.983,
  0.9990000000000001,
  1.0,
  1.0,
  1.0])