In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import gc
from captum.attr import *
import quantus
import gc
import torchvision.transforms as transforms
import os
import torch.optim as optim
import torchvision.models as models
import sklearn
from sklearn.metrics import roc_auc_score

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
class Network(nn.Module):
    ''' A basic neural network model '''
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()         #python2 : super(MLP, self).__init__()
        #defining the network's operations
        self.fc1 = nn.Linear(input_size, hidden_size[0])
        self.fc2 = nn.Linear(hidden_size[0], hidden_size[1])
        self.fc3 = nn.Linear(hidden_size[1], output_size)

    def forward(self, x, softmax=False): 
        a = self.fc3(F.relu(self.fc2(F.relu(self.fc1(x.float())))))
        if softmax:
            y_pred = F.softmax(a, dim=1)
        else:
            y_pred = a

        return y_pred

In [5]:
input_size=121
hidden_size=[256,256]
output_size=2
model = Network(input_size, hidden_size, output_size)
model.load_state_dict(torch.load("./model.pytorch"))
model = model.to(device)
model.eval()

Network(
  (fc1): Linear(in_features=121, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=2, bias=True)
)

In [7]:
def compute_TPR(adv, th): 
    TN=0
    FN=0
    FP = 0 
    TP=0
    
    for value in adv: 
        if value>th:
            TP += 1
        else: 
            FN += 1
    
    
    return (TP/(TP+FN))*100

In [8]:
def compute_FPR(ben, th): 
    TN=0
    FN=0
    FP = 0 
    TP=0
    
    for value in ben: 
        if value<th:
            TN += 1
        else: 
            FP += 1
    
    
    return (FP/(FP+TN))*100


In [64]:
def compute_iqr(attr):
    scores = []
    for i in range(len(attr)):
        a = attr[i].flatten()
        score_75 = np.percentile(a, 75)
        #print(score_75)
        score_25 = np.percentile(a, 25)
        #print(score_25)
        score_qt = score_75 - score_25
       # print(score_qt)
        scores.append(score_qt)
    return scores

In [65]:
def train_fs(model, x_batch, y_batch, train_fpr):
    distances = get_attr(model, x_batch, y_batch)
    selected_distance_idx = int(np.ceil(len(x_batch) * (1-train_fpr)))
    threshold = sorted(distances)[selected_distance_idx-1]
    threshold = threshold
    return threshold

def get_attr(model, x_batch, y_batch):
    a_batch_intgrad = quantus.explain(
            model=model, inputs=x_batch, targets=y_batch, **{"method:": "IntegratedGradient", "device": device})
        
    iqr = compute_iqr(a_batch_intgrad)
    return iqr

In [66]:
#evaluate on benign samples and save metrics
def compute_benign_metrics(test_loader, model):
    fs = []
    for image, label in test_loader: 
        image, label = image.to(device), label.to(device)
        d = get_attr(model, image, label)    
        fs.extend(d)
    return fs


#evaluate on adv samples and save metrics
def compute_adv_metrics(test_loader, model):
    fs = []
    for image, label in test_loader: 
        image, label = image.to(device), label.to(device)
        d = get_attr(model, image, label)    
        fs.extend(d)
    return fs

In [67]:
class CustomDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.data = self.data.drop(self.data.columns[0], axis=1)  # Remove unnecessary index column
        self.features = self.data.iloc[:, :-1].values  # Features (all columns except the last)
        self.labels = self.data.iloc[:, -1].values     # Labels (last column)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)

# Load CSV file and split into train and test sets
dataset = CustomDataset('clean_examples.csv')
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [68]:
FPR = [0.01,0.05,0.1]
final_th = []
for fpr in FPR:
    t=[]
    for step, (x_batch, y_batch) in enumerate(train_loader):
        
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        threshold = train_fs(model, x_batch, y_batch, fpr)
        t.append(threshold.item())
        if step==50:
            break
    th = sum(t)/len(t)
    print("Threshold for {} FPR is {}.".format(fpr, th))
    final_th.append(th)

Threshold for 0.01 FPR is 19.744899074236553.
Threshold for 0.05 FPR is 16.84036260025174.
Threshold for 0.1 FPR is 15.667794797934738.


In [96]:
threshold = [0.5,1,3,7,9,10,11,12,13,14,15,16,17,19,21]

In [97]:
sc = []

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_FGSM1.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [98]:
dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_FGSM2.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [99]:
dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_FGSM3.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [100]:
dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_FGSM4.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [101]:
fpr_results

[1.0,
 1.0,
 0.9996104402025711,
 0.8955979742890534,
 0.777561355668095,
 0.7234125438254773,
 0.6606934164394235,
 0.5652512660693416,
 0.4737047136735489,
 0.26373198285936894,
 0.14218932606155044,
 0.08804051421893261,
 0.06973120373977405,
 0.014803272302298403,
 0.0038955979742890533]

In [102]:
np.mean(sc), np.std(sc)

(0.44779995459435473, 0.055272568883801346)

In [103]:
sc = []

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_PGD1.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [104]:
dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_PGD2.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [105]:
dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_PGD3.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [106]:
dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_PGD4.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [107]:
np.mean(sc), np.std(sc)

(0.5655518204825898, 0.06317246132627682)

In [108]:
sc = []

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_BIM1.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [109]:

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_BIM2.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [110]:

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_BIM3.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [111]:

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_BIM4.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [112]:
np.mean(sc), np.std(sc)

(0.4748719134366868, 0.013358656362423123)

# CW

In [113]:
sc = []

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_CW21.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [114]:

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_CW22.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [115]:

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_CW23.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [116]:

dist_ben = compute_benign_metrics(test_loader, model)

advdataset = CustomDataset('adversarial_examples_CW24.csv')
advtrain_data, advtest_data = train_test_split(advdataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing
batch_size = 64
train_loader_adv = DataLoader(advtrain_data, batch_size=batch_size, shuffle=True)
test_loader_adv = DataLoader(advtest_data, batch_size=batch_size, shuffle=False)

dist_adv = compute_adv_metrics(test_loader_adv, model)

fpr_results = []
for th in threshold:
    FPR = compute_FPR(dist_ben, th)
    fpr_results.append(FPR/100)
tpr_results = []
for th in threshold:
    TPR = compute_TPR(dist_adv, th)
    tpr_results.append(TPR/100)
    
score = sklearn.metrics.auc(fpr_results, tpr_results)
sc.append(score)

In [117]:
np.mean(sc), np.std(sc)

(0.7726578267298118, 0.0014484560779117676)