In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib
import matplotlib.pyplot as plt
import gc
from captum.attr import *
import quantus
from torch.utils.data import DataLoader
import gc
import torchvision.transforms as transforms
import torchvision.models as models

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# the validation transforms
valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    )
])

In [5]:
images = '/home/db1702/Downloads/imagenet-mini/train/'

In [6]:
#get dataset
test = torchvision.datasets.ImageFolder(images, transform=valid_transform)
test_loader = DataLoader(test, shuffle=True, batch_size = 2)

# For given adversarial images and benign images, collect metrics of feature attribution sensitivity and model prediction sensitivity. Save in csv that will be used for inspecting detection performance. 

In [7]:
import pandas as pd

In [8]:
def make_noise(x_batch, y_batch, spread):
    new_x_batch = []
    for x in x_batch:
        x = x.data.cpu().numpy()
        stdev = spread * (np.max(x)-np.min(x))
        noise = np.random.normal(0, stdev, x.shape).astype(np.float32)
        x_plus_noise = x + noise
        x_plus_noise = np.clip(x_plus_noise, 0, 1)
        x_plus_noise = torch.from_numpy(x_plus_noise).cpu()
        new_x_batch.append(x_plus_noise)
    new_batch = torch.stack(new_x_batch).to(device)
    return new_batch

In [9]:
# Define uniform noise function
def add_uniform_noise(image):
    # Generate uniform noise with mean 0 and standard deviation 25
    noise = np.random.uniform(low=-0.5, high=0.5, size=image.shape).astype(np.float32)
    noisy_image = np.clip(image + noise, 0, 1).astype(np.uint8)
    return noisy_image

def uniform_noise(x_batch, y_batch): 
    # Convert batch of images to numpy array
    images = x_batch.detach().cpu().numpy().transpose(0, 2, 3, 1) * 1.0
    # Add Poisson noise to each image in the batch
    noisy_images = [add_uniform_noise(image) for image in images]
    # Convert noisy images back to Tensor format
    noisy_inputs = torch.from_numpy(np.array(noisy_images).transpose(0, 3, 1, 2) / 1.0).float()
    return noisy_inputs.to(device)


In [10]:
def compute_metrics_benign(adv_path, normal_model): 
    
    print("Computing metrics for {} for benign")
    
    npobj = np.load(adv_path)
    adaptive_image = npobj['b_images']
    adaptive_label = npobj['b_labels']
    
    
    #attribution robustness
    attribution_gaussian1 = []
    attribution_gaussian2 = []
    attribution_gaussian3 = []
    attribution_uniform = []
    
    #logit robustness
    logit_gaussian1 = []
    logit_gaussian2 = []
    logit_gaussian3 = []
    logit_uniform = []
    
    images, labels = torch.from_numpy(adaptive_image), torch.from_numpy(adaptive_label)
    #images, labels = images.to(device), labels.to(device)
    
    end = len(adaptive_label)
    if end > 1000:
        end = 1000
    
    for i in range(0, end, 2):
        
        images_adv, y_pred_adv = images[i:i+2], labels[i:i+2]
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        x_logits = normal_model(images_adv)
        gc.collect()
        torch.cuda.empty_cache()
        
        #approach: attribution and logit robustness
        a_batch = quantus.explain(
            model=normal_model, inputs=images_adv, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        gaussian_noisy_images_1 = make_noise(images_adv, y_pred_adv, spread = 0.15)
        gaussian_logits_1 = normal_model(gaussian_noisy_images_1)
        gaussian_noisy_images_2 = make_noise(images_adv, y_pred_adv, spread = 0.25)
        gaussian_logits_2 = normal_model(gaussian_noisy_images_2)
        gaussian_noisy_images_3 = make_noise(images_adv, y_pred_adv, spread = 0.35)
        gaussian_logits_3 = normal_model(gaussian_noisy_images_3)
        uniform_noisy_images = uniform_noise(images_adv, y_pred_adv)
        uniform_logits = normal_model(uniform_noisy_images)
        
        
        diff1 = torch.norm(x_logits-gaussian_logits_1,p=1, dim=1) 
        diff2 = torch.norm(x_logits-gaussian_logits_2,p=1, dim=1) 
        diff3 = torch.norm(x_logits-gaussian_logits_3,p=1, dim=1) 
        diff4 = torch.norm(x_logits-uniform_logits,p=1, dim=1) 
        
        logit_gaussian1.extend(diff1.detach().cpu().numpy())
        logit_gaussian2.extend(diff2.detach().cpu().numpy())
        logit_gaussian3.extend(diff3.detach().cpu().numpy())
        logit_uniform.extend(diff4.detach().cpu().numpy())
        
        a_batch_gaussian1 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_1, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian2 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_2, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian3 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_3, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_uniform = quantus.explain(
        model=normal_model, inputs=uniform_noisy_images, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        
        for a, b in zip(a_batch, a_batch_gaussian1):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian1.append(c)
            
        for a, b in zip(a_batch, a_batch_gaussian2):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian2.append(c)
        
        for a, b in zip(a_batch, a_batch_gaussian3):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian3.append(c)
        
        for a, b in zip(a_batch, a_batch_uniform):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_uniform.append(c)
        
        
    df = pd.DataFrame([
            
            attribution_gaussian1,
            attribution_gaussian2,
            attribution_gaussian3,
        attribution_uniform,
            logit_gaussian1,
            logit_gaussian2,
            logit_gaussian3,
        logit_uniform
    ], index = [
            "Gaussian1 attribution", 
            "Gaussian2 attribution", 
            "Gaussian3 attribution",
        "uniform attr",
            "Gaussian1 logit robusntess",
            "Gaussian2 logit robusntess",
            "Gaussian3 logit robusntess",
        "uniform logit"
                    ])
            
    path = "adaptive_Benign.csv"
    df.to_csv(path)

In [11]:
def compute_metrics_adv(adv_path, normal_model): 
    
    print("Computing metrics for {} for adv")
    npobj = np.load(adv_path)
    adaptive_image = npobj['a_images']
    adaptive_label = npobj['a_labels']
    
    #attribution robustness
    attribution_gaussian1 = []
    attribution_gaussian2 = []
    attribution_gaussian3 = []
    attribution_uniform = []
    
    #logit robustness
    logit_gaussian1 = []
    logit_gaussian2 = []
    logit_gaussian3 = []
    logit_uniform = []
    
    images, labels = torch.from_numpy(adaptive_image), torch.from_numpy(adaptive_label)
    #images, labels = images.to(device), labels.to(device)
    
    end = len(adaptive_label)
    if end > 1000:
        end = 1000
    
    for i in range(0, end, 2):
        
        images_adv, y_pred_adv = images[i:i+2], labels[i:i+2]
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        x_logits = normal_model(images_adv)
        gc.collect()
        torch.cuda.empty_cache()
        
        #approach: attribution and logit robustness
        a_batch = quantus.explain(
            model=normal_model, inputs=images_adv, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        gaussian_noisy_images_1 = make_noise(images_adv, y_pred_adv, spread = 0.15)
        gaussian_logits_1 = normal_model(gaussian_noisy_images_1)
        gaussian_noisy_images_2 = make_noise(images_adv, y_pred_adv, spread = 0.25)
        gaussian_logits_2 = normal_model(gaussian_noisy_images_2)
        gaussian_noisy_images_3 = make_noise(images_adv, y_pred_adv, spread = 0.35)
        gaussian_logits_3 = normal_model(gaussian_noisy_images_3)
        uniform_noisy_images = uniform_noise(images_adv, y_pred_adv)
        uniform_logits = normal_model(uniform_noisy_images)
        
        
        diff1 = torch.norm(x_logits-gaussian_logits_1,p=1, dim=1) 
        diff2 = torch.norm(x_logits-gaussian_logits_2,p=1, dim=1) 
        diff3 = torch.norm(x_logits-gaussian_logits_3,p=1, dim=1) 
        diff4 = torch.norm(x_logits-uniform_logits,p=1, dim=1) 
        
        logit_gaussian1.extend(diff1.detach().cpu().numpy())
        logit_gaussian2.extend(diff2.detach().cpu().numpy())
        logit_gaussian3.extend(diff3.detach().cpu().numpy())
        logit_uniform.extend(diff4.detach().cpu().numpy())
        
        a_batch_gaussian1 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_1, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian2 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_2, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_gaussian3 = quantus.explain(
        model=normal_model, inputs=gaussian_noisy_images_3, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        a_batch_uniform = quantus.explain(
        model=normal_model, inputs=uniform_noisy_images, targets=y_pred_adv, **{"method:": "IntegratedGradient", "device": device})
        
        
        for a, b in zip(a_batch, a_batch_gaussian1):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian1.append(c)
            
        for a, b in zip(a_batch, a_batch_gaussian2):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian2.append(c)
        
        for a, b in zip(a_batch, a_batch_gaussian3):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_gaussian3.append(c)
        
        for a, b in zip(a_batch, a_batch_uniform):
            c = np.linalg.norm(a.flatten()-b.flatten(),ord=1 )
            attribution_uniform.append(c)
        
        
    df = pd.DataFrame([
            
            attribution_gaussian1,
            attribution_gaussian2,
            attribution_gaussian3,
        attribution_uniform,
            logit_gaussian1,
            logit_gaussian2,
            logit_gaussian3,
    logit_uniform], index = [
            "Gaussian1 attribution", 
            "Gaussian2 attribution", 
            "Gaussian3 attribution",
        "uniform attr",
            "Gaussian1 logit robusntess",
            "Gaussian2 logit robusntess",
            "Gaussian3 logit robusntess",
        "uniform logit"
                    ])
            
    path = "adaptive_Adv.csv"
    df.to_csv(path)

In [12]:
def compute_TPR(adv1, a, b, adv2, c, d): 
    TN=0
    FN=0
    FP = 0 
    TP=0
    
    for value1, value2 in zip(adv1, adv2): 
        if value1<a or value1>b:
            TP += 1
        else:
            if value2<c or value2>d:
                TP+=1
            else: 
                FN+=1
    
    return (TP/(TP+FN))*100

In [13]:
def compute_FPR(ap2a, k, l, ap2b, m, n): 
    TN=0
    FN=0
    FP=0 
    TP=0
    
    for value6, value7 in zip(ap2a,ap2b):
        if value6<k or value6>l:
            FP +=1
        else:
            if value7<m or value7>n:
                FP +=1

    return (FP/(len(ap2a)))*100

In [14]:
import sklearn
from sklearn.metrics import roc_auc_score

In [15]:
# load mobilet model first

def load_imagenet_model():
    model=torchvision.models.mobilenet_v3_small(weights=True).to(device)
    model.to('cuda')
    model.train(False)
    return model

In [16]:
normal_model = load_imagenet_model()
normal_model.to(device)
normal_model.eval()

MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), 

In [17]:
def return_auc(adv_path, model):
    #logitgaussian3
    k=[1500, 1700,2000, 2510,2810,3000, 3200,3500, 6000]
    l=[6700, 6700,6700, 6700,6700,6700,6700,6700, 6700]
    
    #attrgaussian3
    m=[1000, 1400, 1580, 1750,1900, 2300, 3200, 3500, 6000]
    n=[6600, 6600,6600, 6600,6600,6600,6600,6600,6600]

    compute_metrics_benign(adv_path, model)
    compute_metrics_adv(adv_path, model)
    df_cifar = pd.read_csv("adaptive_Benign.csv")
    attr_gaussian3 = df_cifar.iloc[2].values.flatten().tolist()[1:]
    logit_gaussian3 = df_cifar.iloc[6].values.flatten().tolist()[1:]
        
    fpr_results =[]
    for t1,t2,t3,t4 in zip(k,l,m,n):
        FPR = compute_FPR(logit_gaussian3, t1,t2, attr_gaussian3,t3,t4)
        fpr_results.append(FPR/100)
        
    df_pgd_eps1 = pd.read_csv("adaptive_Adv.csv")
    attr_gaussian3_eps1 = df_pgd_eps1.iloc[2].values.flatten().tolist()[1:]
    logit_gaussian3_eps1 = df_pgd_eps1.iloc[6].values.flatten().tolist()[1:]
    
    tpr_results =[]
    for t1,t2,t3,t4 in zip(k,l,m,n):
        TPR = compute_TPR(logit_gaussian3_eps1, t1,t2, attr_gaussian3_eps1,t3,t4)
        tpr_results.append(TPR/100)
    return sklearn.metrics.auc(fpr_results, tpr_results), fpr_results, tpr_results

# FGSM

In [32]:

adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/FGSM/0.03137254901960784eps.npz'
adv_path2 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/FGSM/0.06274509803921569eps.npz'
adv_path3 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/FGSM/0.12549019607843137eps.npz'
adv_path4 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/FGSM/0.25098039215686274eps.npz'

In [33]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.8204594999999999,
 [0.025,
  0.052000000000000005,
  0.11200000000000002,
  0.198,
  0.294,
  0.544,
  0.884,
  0.936,
  1.0],
 [0.115, 0.274, 0.505, 0.706, 0.823, 0.9419999999999998, 0.998, 1.0, 1.0])

In [34]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.9136475, [0.026000000000000002, 0.061, 0.124, 0.201, 0.314, 0.559, 0.8960000000000001, 0.9469999999999998, 1.0], [0.279, 0.57, 0.813, 0.924, 0.971, 0.996, 1.0, 1.0, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.959767, [0.031, 0.061, 0.118, 0.204, 0.322, 0.559, 0.882, 0.93, 1.0], [0.7509999999999999, 0.919, 0.982, 0.995, 0.9990000000000001, 1.0, 1.0, 1.0, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.98014, [0.019, 0.059, 0.115, 0.18, 0.277, 0.52, 0.865, 0.915, 1.0], [0.9689999999999999, 0.995, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])


# PGD

In [35]:
adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/PGD/0.03137254901960784eps.npz'
adv_path2 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/PGD/0.06274509803921569eps.npz'
adv_path3 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/PGD/0.12549019607843137eps.npz'
adv_path4 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/PGD/0.25098039215686274eps.npz'

In [36]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.9552784999999999,
 [0.039, 0.077, 0.14, 0.225, 0.325, 0.569, 0.872, 0.924, 1.0],
 [0.986,
  0.987,
  0.9890000000000001,
  0.99,
  0.9940000000000001,
  0.995,
  0.997,
  0.998,
  1.0])

In [37]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.9607490000000001, [0.039, 0.075, 0.144, 0.241, 0.3390000000000001, 0.539, 0.8809999999999999, 0.931, 1.0], [0.9990000000000001, 0.9990000000000001, 0.9990000000000001, 0.9990000000000001, 1.0, 1.0, 1.0, 1.0, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.967, [0.033, 0.065, 0.121, 0.221, 0.338, 0.551, 0.8859999999999999, 0.941, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.9670000000000002, [0.032, 0.065, 0.132, 0.23, 0.333, 0.569, 0.875, 0.935, 0.9990000000000001], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])


# CW

In [38]:
adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/CW/0.15eps.npz'

auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.8666335000000001,
 [0.033, 0.07, 0.132, 0.229, 0.32899999999999996, 0.548, 0.878, 0.932, 1.0],
 [0.527, 0.569, 0.66, 0.856, 0.921, 0.956, 0.98, 0.991, 1.0])

In [20]:
adv_path1 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/BIM/0.03137254901960784eps.npz'
adv_path2 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/BIM/0.06274509803921569eps.npz'
adv_path3 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/BIM/0.12549019607843137eps.npz'
adv_path4 = '/home/db1702/Desktop/data/virtual environments/adv detection by robustness/adv_detection/AAAI /adv samples/IMAGENET/MobileNet/BIM/0.25098039215686274eps.npz'

In [21]:
auc, tpr, fpr = return_auc(adv_path1, normal_model)
auc, tpr, fpr

Computing metrics for {} for benign
Computing metrics for {} for adv


(0.48302700000000004,
 [0.018, 0.049, 0.107, 0.215, 0.317, 0.532, 0.8759999999999999, 0.936, 1.0],
 [0.261,
  0.271,
  0.284,
  0.322,
  0.36700000000000005,
  0.439,
  0.727,
  0.804,
  0.9990000000000001])

In [22]:
print(return_auc(adv_path2, normal_model))
print('----')
print(return_auc(adv_path3, normal_model))
print('----')
print(return_auc(adv_path4, normal_model))

Computing metrics for {} for benign
Computing metrics for {} for adv
(0.6353774999999999, [0.019, 0.067, 0.128, 0.22, 0.331, 0.535, 0.884, 0.936, 1.0], [0.515, 0.521, 0.526, 0.545, 0.564, 0.619, 0.79, 0.84, 0.9990000000000001])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.7671565, [0.026000000000000002, 0.074, 0.135, 0.212, 0.312, 0.527, 0.873, 0.93, 1.0], [0.727, 0.727, 0.728, 0.731, 0.737, 0.758, 0.8640000000000001, 0.904, 0.9990000000000001])
----
Computing metrics for {} for benign
Computing metrics for {} for adv
(0.8449150000000001, [0.012, 0.054000000000000006, 0.10400000000000001, 0.213, 0.32, 0.552, 0.871, 0.932, 1.0], [0.805, 0.806, 0.806, 0.813, 0.82, 0.84, 0.914, 0.946, 0.9990000000000001])
