In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib
import matplotlib.pyplot as plt
import gc
from captum.attr import *
import quantus
from torch.utils.data import DataLoader
import gc
import torchvision.transforms as transforms
import torchvision.models as models

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# the validation transforms
valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    )
])

In [5]:
images = '/home/db1702/Downloads/imagenet-mini/train/'

In [6]:
def load_imagenet_model():
    model = models.resnet50(pretrained=True).to(device)
    model.to('cuda')
    model.train(False)
    return model

In [7]:
normal_model = load_imagenet_model()
normal_model.to(device)
normal_model.eval()

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 11.90 GiB total capacity; 34.74 MiB already allocated; 34.38 MiB free; 38.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [8]:
#get dataset
test = torchvision.datasets.ImageFolder(images, transform=valid_transform)
test_loader = DataLoader(test, shuffle=True, batch_size = 2)

# For given adversarial images and benign images, collect metrics of feature attribution sensitivity and model prediction sensitivity. Save in csv that will be used for inspecting detection performance. 

In [9]:
import pandas as pd

In [10]:
def make_noise(x_batch, y_batch, spread):
    new_x_batch = []
    for x in x_batch:
        x = x.data.cpu().numpy()
        stdev = spread * (np.max(x)-np.min(x))
        noise = np.random.normal(0, stdev, x.shape).astype(np.float32)
        x_plus_noise = x + noise
        x_plus_noise = np.clip(x_plus_noise, 0, 1)
        x_plus_noise = torch.from_numpy(x_plus_noise).cpu()
        new_x_batch.append(x_plus_noise)
    new_batch = torch.stack(new_x_batch).to(device)
    return new_batch

In [11]:
# Define uniform noise function
def add_uniform_noise(image):
    # Generate uniform noise with mean 0 and standard deviation 25
    noise = np.random.uniform(low=-0.5, high=0.5, size=image.shape).astype(np.float32)
    noisy_image = np.clip(image + noise, 0, 1).astype(np.uint8)
    return noisy_image

def uniform_noise(x_batch, y_batch): 
    # Convert batch of images to numpy array
    images = x_batch.detach().cpu().numpy().transpose(0, 2, 3, 1) * 1.0
    # Add Poisson noise to each image in the batch
    noisy_images = [add_uniform_noise(image) for image in images]
    # Convert noisy images back to Tensor format
    noisy_inputs = torch.from_numpy(np.array(noisy_images).transpose(0, 3, 1, 2) / 1.0).float()
    return noisy_inputs.to(device)


In [12]:
def compute_metrics_benign(adv_path, normal_model): 
    
    print("Computing metrics for {} for benign")
    
    npobj = np.load(adv_path)
    adaptive_image = npobj['b_images']
    adaptive_label = npobj['b_labels']
    
    
    #attribution robustness
    attribution_gaussian1 = []
    attribution_gaussian2 = []
    attribution_gaussian3 = []
    attribution_uniform = []
    
    #logit robustness
    logit_gaussian1 = []
    logit_gaussian2 = []
    logit_gaussian3 = []
    logit_uniform = []
    
    images, labels = torch.from_numpy(adaptive_image), torch.from_numpy(adaptive_label)
    #images, labels = images.to(device), labels.to(device)
    
    end = len(adaptive_label)
    if end > 1000:
        end = 1000
    
    for i in range(0, end, 2):
        
        images_adv, y_pred_adv = images[i:i+2], labels[i:i+2]
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        x_logits = normal_model(images_adv)
        gc.collect()
        torch.cuda.empty_cache()
        
        #approach: attribution and logit robustness
        a_batch = quantus.explain(
            model=normal_model, inputs=images_adv, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        gaussian_noisy_images_1 = make_noise(images_adv, y_pred_adv, spread = 0.15)
        gaussian_logits_1 = normal_model(gaussian_noisy_images_1)
        gaussian_noisy_images_2 = make_noise(images_adv, y_pred_adv, spread = 0.25)
        gaussian_logits_2 = normal_model(gaussian_noisy_images_2)
        gaussian_noisy_images_3 = make_noise(images_adv, y_pred_adv, spread = 0.35)
        gaussian_logits_3 = normal_model(gaussian_noisy_images_3)
        uniform_noisy_images = uniform_noise(images_adv, y_pred_adv)
        uniform_logits = normal_model(uniform_noisy_images)
        
        
        diff1 = torch.norm(x_logits-gaussian_logits_1,p=1, dim=1) 
        diff2 = torch.norm(x_logits-gaussian_logits_2,p=1, dim=1) 
        diff3 = torch.norm(x_logits-gaussian_logits_3,p=1, dim=1) 
        diff4 = torch.norm(x_logits-uniform_logits,p=1, dim=1) 
        
        logit_gaussian1.extend(diff1.detach().cpu().numpy())
        logit_gaussian2.extend(diff2.detach().cpu().numpy())
        logit_gaussian3.extend(diff3.detach().cpu().numpy())
        logit_uniform.extend(diff4.detach().cpu().numpy())
        
        a_batch_gaussian1 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_1, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian2 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_2, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian3 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_3, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_uniform = quantus.explain(
        model=normal_model, inputs=uniform_noisy_images, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        
        for a, b in zip(a_batch, a_batch_gaussian1):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian1.append(c)
            
        for a, b in zip(a_batch, a_batch_gaussian2):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian2.append(c)
        
        for a, b in zip(a_batch, a_batch_gaussian3):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian3.append(c)
        
        for a, b in zip(a_batch, a_batch_uniform):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_uniform.append(c)
        
        
    df = pd.DataFrame([
            
            attribution_gaussian1,
            attribution_gaussian2,
            attribution_gaussian3,
        attribution_uniform,
            logit_gaussian1,
            logit_gaussian2,
            logit_gaussian3,
        logit_uniform
    ], index = [
            "Gaussian1 attribution", 
            "Gaussian2 attribution", 
            "Gaussian3 attribution",
        "uniform attr",
            "Gaussian1 logit robusntess",
            "Gaussian2 logit robusntess",
            "Gaussian3 logit robusntess",
        "uniform logit"
                    ])
            
    path = "adaptive_Benign.csv"
    df.to_csv(path)

In [13]:
def compute_metrics_adv(adv_path, normal_model): 
    
    print("Computing metrics for {} for adv")
    npobj = np.load(adv_path)
    adaptive_image = npobj['a_images']
    adaptive_label = npobj['a_labels']
    
    #attribution robustness
    attribution_gaussian1 = []
    attribution_gaussian2 = []
    attribution_gaussian3 = []
    attribution_uniform = []
    
    #logit robustness
    logit_gaussian1 = []
    logit_gaussian2 = []
    logit_gaussian3 = []
    logit_uniform = []
    
    images, labels = torch.from_numpy(adaptive_image), torch.from_numpy(adaptive_label)
    #images, labels = images.to(device), labels.to(device)
    
    end = len(adaptive_label)
    if end > 1000:
        end = 1000
    
    for i in range(0, end, 2):
        
        images_adv, y_pred_adv = images[i:i+2], labels[i:i+2]
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        x_logits = normal_model(images_adv)
        gc.collect()
        torch.cuda.empty_cache()
        
        #approach: attribution and logit robustness
        a_batch = quantus.explain(
            model=normal_model, inputs=images_adv, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        gaussian_noisy_images_1 = make_noise(images_adv, y_pred_adv, spread = 0.15)
        gaussian_logits_1 = normal_model(gaussian_noisy_images_1)
        gaussian_noisy_images_2 = make_noise(images_adv, y_pred_adv, spread = 0.25)
        gaussian_logits_2 = normal_model(gaussian_noisy_images_2)
        gaussian_noisy_images_3 = make_noise(images_adv, y_pred_adv, spread = 0.35)
        gaussian_logits_3 = normal_model(gaussian_noisy_images_3)
        uniform_noisy_images = uniform_noise(images_adv, y_pred_adv)
        uniform_logits = normal_model(uniform_noisy_images)
        
        
        diff1 = torch.norm(x_logits-gaussian_logits_1,p=1, dim=1) 
        diff2 = torch.norm(x_logits-gaussian_logits_2,p=1, dim=1) 
        diff3 = torch.norm(x_logits-gaussian_logits_3,p=1, dim=1) 
        diff4 = torch.norm(x_logits-uniform_logits,p=1, dim=1) 
        
        logit_gaussian1.extend(diff1.detach().cpu().numpy())
        logit_gaussian2.extend(diff2.detach().cpu().numpy())
        logit_gaussian3.extend(diff3.detach().cpu().numpy())
        logit_uniform.extend(diff4.detach().cpu().numpy())
        
        a_batch_gaussian1 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_1, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian2 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_2, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian3 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_3, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_uniform = quantus.explain(
        model=normal_model, inputs=uniform_noisy_images, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        
        for a, b in zip(a_batch, a_batch_gaussian1):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian1.append(c)
            
        for a, b in zip(a_batch, a_batch_gaussian2):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian2.append(c)
        
        for a, b in zip(a_batch, a_batch_gaussian3):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian3.append(c)
        
        for a, b in zip(a_batch, a_batch_uniform):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_uniform.append(c)
        
        
    df = pd.DataFrame([
            
            attribution_gaussian1,
            attribution_gaussian2,
            attribution_gaussian3,
        attribution_uniform,
            logit_gaussian1,
            logit_gaussian2,
            logit_gaussian3,
    logit_uniform], index = [
            "Gaussian1 attribution", 
            "Gaussian2 attribution", 
            "Gaussian3 attribution",
        "uniform attr",
            "Gaussian1 logit robusntess",
            "Gaussian2 logit robusntess",
            "Gaussian3 logit robusntess",
        "uniform logit"
                    ])
            
    path = "adaptive_Adv.csv"
    df.to_csv(path)

In [14]:
def compute_TPR(adv1, a, b, adv2, c, d): 
    TN=0
    FN=0
    FP = 0 
    TP=0
    
    for value1, value2 in zip(adv1, adv2): 
        if value1<a or value1>b:
            TP += 1
        else:
            if value2<c or value2>d:
                TP+=1
            else: 
                FN+=1
    
    return (TP/(TP+FN))*100

In [15]:
def compute_FPR(ap2a, k, l, ap2b, m, n): 
    TN=0
    FN=0
    FP=0 
    TP=0
    
    for value6, value7 in zip(ap2a,ap2b):
        if value6<k or value6>l:
            FP +=1
        else:
            if value7<m or value7>n:
                FP +=1

    return (FP/(len(ap2a)))*100

In [16]:
import sklearn
from sklearn.metrics import roc_auc_score

In [20]:
def return_auc(adv_path, model):

    
    k=[1100, 1315,1465,1580, 1600, 1700, 1800, 1900, 2100]
    l=[2970, 2970,2970,2970, 2970, 2970, 2970, 2970, 2970]
    
    m=[1800, 2100, 2200, 2400,  2800, 3000, 3400, 5000,7000 ]
    n=[10894, 10894, 10894,10894,10894,10894, 10894, 10894, 10894]

    

    
    compute_metrics_benign(adv_path, model)
    compute_metrics_adv(adv_path, model)
    df_cifar = pd.read_csv("adaptive_Benign.csv")
    attr_gaussian3 = df_cifar.iloc[2].values.flatten().tolist()[1:]
    logit_gaussian3 = df_cifar.iloc[6].values.flatten().tolist()[1:]
        
    fpr_results =[]
    for t1,t2,t3,t4 in zip(k,l,m,n):
        FPR = compute_FPR(logit_gaussian3, t1,t2, attr_gaussian3,t3,t4)
        fpr_results.append(FPR/100)
        
    df_pgd_eps1 = pd.read_csv("adaptive_Adv.csv")
    attr_gaussian3_eps1 = df_pgd_eps1.iloc[2].values.flatten().tolist()[1:]
    logit_gaussian3_eps1 = df_pgd_eps1.iloc[6].values.flatten().tolist()[1:]
    
    tpr_results =[]
    for t1,t2,t3,t4 in zip(k,l,m,n):
        TPR = compute_TPR(logit_gaussian3_eps1, t1,t2, attr_gaussian3_eps1,t3,t4)
        tpr_results.append(TPR/100)
    return(sklearn.metrics.auc(fpr_results, tpr_results), tpr_results, fpr_results)

# FGSM

In [21]:

adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/FGSM/0.03137254901960784eps.npz'
adv_path2 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/FGSM/0.06274509803921569eps.npz'
adv_path3 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/FGSM/0.12549019607843137eps.npz'
adv_path4 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/FGSM/0.25098039215686274eps.npz'

In [22]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.647362,
 [0.046, 0.142, 0.261, 0.42199999999999993, 0.54, 0.695, 0.848, 0.981, 1.0],
 [0.022, 0.069, 0.122, 0.222, 0.331, 0.45, 0.68, 0.92, 0.996])

In [23]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.7504, [0.054000000000000006, 0.20200000000000004, 0.37, 0.572, 0.7120000000000001, 0.846, 0.946, 0.998, 1.0], [0.014000000000000002, 0.057, 0.12300000000000001, 0.22699999999999998, 0.32800000000000007, 0.447, 0.644, 0.9280000000000002, 0.995])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.863546, [0.133, 0.424, 0.595, 0.7879999999999999, 0.922, 0.973, 0.998, 1.0, 1.0], [0.017, 0.057, 0.11600000000000002, 0.226, 0.33399999999999996, 0.466, 0.6629999999999999, 0.936, 0.998])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.9465954999999999, [0.568, 0.847, 0.914, 0.978, 0.995, 0.9990000000000001, 1.0, 1.0, 1.0], [0.02, 0.064, 0.117, 0.215, 0.316, 0.467, 0.652, 0.932, 0.993])


# PGD

In [24]:
adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/PGD/0.03137254901960784eps.npz'
adv_path2 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/PGD/0.06274509803921569eps.npz'
adv_path3 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/PGD/0.12549019607843137eps.npz'
adv_path4 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/PGD/0.25098039215686274eps.npz'

In [25]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.969001,
 [0.987, 0.987, 0.988, 0.988, 0.992, 0.992, 0.992, 0.995, 0.998],
 [0.02, 0.062, 0.117, 0.212, 0.295, 0.447, 0.626, 0.909, 0.997])

In [26]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.975, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.018, 0.053, 0.113, 0.22699999999999998, 0.341, 0.475, 0.645, 0.92, 0.993])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.9800000000000001, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.018, 0.057, 0.122, 0.21899999999999997, 0.321, 0.462, 0.67, 0.9230000000000002, 0.998])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.9750000000000001, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.019, 0.055, 0.115, 0.23, 0.353, 0.479, 0.6679999999999999, 0.915, 0.9940000000000001])


# BIM

In [27]:
adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/BIM/0.03137254901960784eps.npz'
adv_path2 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/BIM/0.06274509803921569eps.npz'
adv_path3 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/BIM/0.12549019607843137eps.npz'
adv_path4 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/BIM/0.25098039215686274eps.npz'

In [28]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.3455155,
 [0.133,
  0.162,
  0.18899999999999997,
  0.217,
  0.231,
  0.278,
  0.347,
  0.602,
  0.8740000000000001],
 [0.02, 0.059, 0.10800000000000001, 0.198, 0.294, 0.425, 0.622, 0.921, 0.995])

In [29]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.4606790000000001, [0.37, 0.377, 0.386, 0.3990000000000001, 0.41, 0.436, 0.468, 0.61, 0.867], [0.016, 0.065, 0.11899999999999998, 0.23799999999999996, 0.3390000000000001, 0.488, 0.664, 0.925, 0.9940000000000001])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.592698, [0.549, 0.554, 0.558, 0.561, 0.563, 0.575, 0.583, 0.699, 0.871], [0.017, 0.053, 0.099, 0.207, 0.308, 0.4640000000000001, 0.647, 0.926, 0.998])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.616636, [0.594, 0.595, 0.595, 0.598, 0.598, 0.598, 0.611, 0.725, 0.889], [0.016, 0.063, 0.12, 0.214, 0.303, 0.463, 0.6679999999999999, 0.924, 0.992])


# CW

In [30]:
adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/ResNet50/CW/0.15eps.npz'

In [31]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.943917759838547,
 [0.763874873864783,
  0.875882946518668,
  0.9364278506559032,
  0.9677093844601413,
  0.970736629667003,
  0.9818365287588294,
  0.987891019172553,
  0.9959636730575177,
  0.9989909182643796],
 [0.018, 0.071, 0.125, 0.23, 0.335, 0.47, 0.6409999999999999, 0.916, 0.993])