Take a dataset.
Train it
Get clean Accuracy on the Test set.
Use one attack and generate the AEs.
Get the reduced Acc.
Pass the AEs to MPD and verify whether it detects them all as AEs or not.


In [22]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from importlib import reload 
from art.attacks.evasion import FastGradientMethod
from art.attacks.evasion import AutoProjectedGradientDescent,ProjectedGradientDescent,AdversarialPatch
from art.estimators.classification import PyTorchClassifier
from art.utils import load_mnist
from sklearn.tree import DecisionTreeClassifier
from multi_MPD import UncertaintyTrainer

In [23]:
# Step 0: Define the neural network model

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=1)
        self.conv_2 = nn.Conv2d(in_channels=4, out_channels=10, kernel_size=5, stride=1)
        self.fc_1 = nn.Linear(in_features=4 * 4 * 10, out_features=100)
        self.fc_2 = nn.Linear(in_features=100, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4 * 4 * 10)
        x = F.relu(self.fc_1(x))
        x = self.fc_2(x)
        return x

In [24]:
# Step 1: Load the MNIST dataset

(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

# Step 1a: Swap axes to PyTorch's NCHW format

x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32)

In [25]:
# Step 2: Create the model

model = Net()

# Step 2a: Define the loss function and the optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [26]:
# Step 3: Create the ART classifier

classifier = PyTorchClassifier(
    model=model,
    clip_values=(min_pixel_value, max_pixel_value),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)

In [27]:
# Step 4: Train the ART classifier

classifier.fit(x_train, y_train, batch_size=64, nb_epochs=4)


In [28]:
# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))


Accuracy on benign test examples: 97.85000000000001%


In [29]:
from art.attacks.evasion import BasicIterativeMethod

In [30]:
# Create the BIM attack instance
bim = BasicIterativeMethod(classifier, eps=0.3, eps_step=0.1, max_iter=50)

# Generate adversarial examples on the test set
x_test_adv_bim = bim.generate(x_test)

# Evaluate the ART classifier on the adversarial test examples
adv_predictions = classifier.predict(x_test_adv_bim)
accuracy_adv = np.sum(np.argmax(adv_predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on adversarial test examples: {}%".format(accuracy_adv * 100))

PGD - Batches:   0%|          | 0/313 [00:00<?, ?it/s]

Accuracy on adversarial test examples: 1.3599999999999999%


----------------------

# MPD

In [31]:
#data seperation to samples and features
n_samples_train = x_train.shape[0]
n_features_train = x_train.shape[1] * x_train.shape[2] * x_train.shape[3] #this gives an output of features 28*28*1= 784

n_samples_test = x_test.shape[0]
n_features_test = x_test.shape[1] * x_test.shape[2] * x_test.shape[3]

#data flattening which gives the output of (number of samples, 784 features)
x_train_mpd = x_train.reshape(n_samples_train, n_features_train)
x_test_mpd = x_test.reshape(n_samples_test, n_features_test)

#data flattening using argmax to get the label value 
y_train_mpd = np.argmax(y_train, axis=1)
y_test_mpd = np.argmax(y_test, axis=1)

In [49]:
print(f"Shape of x train - {x_train_mpd.shape}")
print(f"Shape of y train - {y_train_mpd.shape}")
print(f"Shape of x test - {x_test_mpd.shape}")
print(f"Shape of y test - {y_test_mpd.shape}")

Shape of x train - (60000, 784)
Shape of y train - (60000,)
Shape of x test - (10000, 784)
Shape of y test - (10000,)


In [58]:
x_train_10000=x_train_mpd[:10000]
y_train_10000=y_train_mpd[:10000]
x_test_100=x_test_mpd[:100]
y_test_100=y_test_mpd[:100]

In [59]:
print(f"Shape of x train - {x_train_10000.shape}")
print(f"Shape of y train - {y_train_10000.shape}")
print(f"Shape of x test - {x_test_100.shape}")
print(f"Shape of y test - {y_test_100.shape}")

Shape of x train - (10000, 784)
Shape of y train - (10000,)
Shape of x test - (100, 784)
Shape of y test - (100,)


In [60]:
#Initialize the base clf
base_clf=DecisionTreeClassifier(max_depth=5)
bootstrap_count=3
mpd_detector= UncertaintyTrainer(x_train_10000,y_train_10000,base_clf,bootstrap_count)

mpd_detector.bootstrap_clfs

In [62]:
mpd_scores_normal=mpd_detector.get_mpd_score(x_test_100)

In [63]:
mpd_scores_normal

array([0.18592141, 0.50910494, 0.5202264 , 0.04171927, 0.45436079,
       0.10176714, 0.74091778, 0.52638948, 0.15256669, 0.63685856,
       0.04171927, 0.81201127, 0.3734317 , 0.04171927, 0.12178349,
       0.49309698, 0.6984556 , 0.72787372, 0.72821755, 0.24042333,
       0.49250819, 0.44788121, 0.60921849, 0.67146792, 0.24042333,
       0.7370921 , 0.10289668, 0.24042333, 0.36642877, 0.46425768,
       0.62648541, 0.62648541, 0.57007864, 0.57735027, 0.18592141,
       0.15256669, 0.18592141, 0.46425768, 0.6654674 , 0.12178349,
       0.58441106, 0.18592141, 0.71988935, 0.56201136, 0.62648541,
       0.66413023, 0.62648541, 0.54696111, 0.63845138, 0.24042333,
       0.33524184, 0.48586877, 0.60761597, 0.49309698, 0.78110072,
       0.50910494, 0.24042333, 0.12178349, 0.3734317 , 0.52578989,
       0.10289668, 0.17313601, 0.63423526, 0.58072538, 0.56220644,
       0.24042333, 0.83854137, 0.24042333, 0.12382492, 0.510219  ,
       0.72787372, 0.04171927, 0.50910494, 0.74743615, 0.12178

--------------------------


# MPD SCORES ON AE

--------

### AEs conversion for MPD

First 100 samples are definitely working 100%

In [71]:
xxx=x_test_adv_bim[:100]
xx=y_test[:100]
# Evaluate the ART classifier on the adversarial test examples
adv_predictions = classifier.predict(xxx)
accuracy_adv = np.sum(np.argmax(adv_predictions, axis=1) == np.argmax(xx, axis=1)) / len(xx)
print("Accuracy on adversarial test examples: {}%".format(accuracy_adv * 100))

Accuracy on adversarial test examples: 0.0%


In [65]:
x_test_adv_bim.shape

(10000, 1, 28, 28)

In [66]:
n_samples_train = x_test_adv_bim.shape[0]
n_features_train = x_test_adv_bim.shape[1] * x_test_adv_bim.shape[2] * x_test_adv_bim.shape[3] #this gives an output of features 28*28*1= 784
x_adv_bim = x_test_adv_bim.reshape(n_samples_train, n_features_train)

In [67]:
x_adv_bim.shape

(10000, 784)

In [68]:
x_test_adv=x_adv_bim[:100]

In [72]:
type(x_test_adv)

numpy.ndarray

In [73]:
x_test_adv.dtype

dtype('float32')

In [69]:
mpd_scores_adv=mpd_detector.get_mpd_score(x_test_adv)

In [70]:
mpd_scores_adv

array([0.73532921, 0.50848077, 0.59056722, 0.76265076, 0.68205099,
       0.64237948, 0.7752816 , 0.76362493, 0.50848077, 0.54668625,
       0.73532921, 0.7070479 , 0.65874261, 0.65874261, 0.16026901,
       0.50848077, 0.49804518, 0.72980177, 0.24669132, 0.68205099,
       0.67011884, 0.70588989, 0.69792805, 0.73532921, 0.68205099,
       0.65874261, 0.81656843, 0.73339577, 0.59103422, 0.66742641,
       0.57293284, 0.16026901, 0.62154208, 0.78384343, 0.74993352,
       0.59864146, 0.59864146, 0.70387121, 0.682465  , 0.56507616,
       0.73325267, 0.56191168, 0.73135879, 0.53563207, 0.68125407,
       0.2096331 , 0.58342458, 0.61459514, 0.50009556, 0.7799483 ,
       0.57337359, 0.77150027, 0.3734317 , 0.77124176, 0.66382188,
       0.50848077, 0.67146792, 0.58552182, 0.50848077, 0.65874261,
       0.56230085, 0.76265076, 0.50848077, 0.57337359, 0.7799483 ,
       0.63423526, 0.46394591, 0.72329257, 0.752716  , 0.68205099,
       0.50848077, 0.59893081, 0.72690814, 0.68066169, 0.68250

---------------


In [98]:
working_samples_100=np.empty(0, dtype=np.float32)

In [99]:
type(working_samples_100)

numpy.ndarray

In [100]:
len(working_samples_100)

0

In [101]:
working_samples_100.dtype

dtype('float32')