# Adversarial Robustness Toolkit (ART)

* Documentación: https://adversarial-robustness-toolbox.readthedocs.io/en/latest/
* Código: https://github.com/Trusted-AI/adversarial-robustness-toolbox
* Ejemplos: https://github.com/Trusted-AI/adversarial-robustness-toolbox/tree/main/examples

## Instalación

In [1]:
!pip install adversarial-robustness-toolbox==1.11.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
import numpy as np
import matplotlib.pyplot as plt
from art.utils import load_mnist

In [3]:
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

import warnings
warnings.filterwarnings('ignore')

In [4]:
%matplotlib inline

## Cargar datos

In [5]:
(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

## Entrenar modelo

In [6]:
from art.estimators.classification import KerasClassifier

In [7]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

victim = KerasClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value), use_logits=False)

victim.fit(x_train, y_train, batch_size=128, nb_epochs=5)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
predictions_test = victim.predict(x_test)
accuracy = np.sum(np.argmax(predictions_test, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on test examples: {:.2f}%".format(accuracy * 100))

Accuracy on test examples: 99.02%


## Ataque de extracción

In [9]:
# Más ataques en
# https://adversarial-robustness-toolbox.readthedocs.io/en/latest/modules/attacks/extraction.html

from art.attacks.extraction import CopycatCNN

In [10]:
max_requests = 5000 
shuffle = np.random.permutation(len(x_test))
x_stolen = x_test[shuffle[:max_requests]]
y_stolen = y_test[shuffle[:max_requests]]

In [11]:
model_stolen = Sequential()
model_stolen.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model_stolen.add(MaxPooling2D(pool_size=(2, 2)))
model_stolen.add(Conv2D(64, (3, 3), activation='relu'))
model_stolen.add(MaxPooling2D(pool_size=(2, 2)))
model_stolen.add(Dropout(0.25))
model_stolen.add(Flatten())
model_stolen.add(Dense(128, activation='relu'))
model_stolen.add(Dense(10, activation='softmax'))

model_stolen.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [12]:
attack = CopycatCNN(classifier=victim, nb_epochs=5, nb_stolen=max_requests, use_probability=True)
classifier_stolen = KerasClassifier(model_stolen, clip_values=(0, 1), use_logits=False)
classifier_stolen = attack.extract(x_stolen, y_stolen, thieved_classifier=classifier_stolen)



Train on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
predictions_stolen = classifier_stolen.predict(x_test)
accuracy = np.sum(np.argmax(predictions_stolen, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on test examples (stolen model): {:.2f}%".format(accuracy * 100))

Accuracy on test examples (stolen model): 98.38%


# Defensas

In [14]:
# Más defensas en
# https://adversarial-robustness-toolbox.readthedocs.io/en/latest/modules/defences/postprocessor.html

from art.defences.postprocessor import Rounded, GaussianNoise, ReverseSigmoid

In [15]:
postprocessor_rounded = Rounded(decimals=1)

In [16]:
postprocessor_gaussian = GaussianNoise(scale=0.1)

In [17]:
postprocessor_reverse_sigmoid = ReverseSigmoid(beta=1.0, gamma=0.5)

In [18]:
victim_defense = KerasClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value), use_logits=False, postprocessing_defences=postprocessor_reverse_sigmoid)
victim_defense.fit(x_train, y_train, batch_size=128, nb_epochs=5)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
predictions_victim_defense = victim_defense.predict(x_test)
accuracy = np.sum(np.argmax(predictions_victim_defense, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on test examples (protected): {:.2f}%".format(accuracy * 100))

Accuracy on test examples (protected): 99.09%


In [20]:
model_stolen_protected = Sequential()
model_stolen_protected.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model_stolen_protected.add(MaxPooling2D(pool_size=(2, 2)))
model_stolen_protected.add(Conv2D(64, (3, 3), activation='relu'))
model_stolen_protected.add(MaxPooling2D(pool_size=(2, 2)))
model_stolen_protected.add(Dropout(0.25))
model_stolen_protected.add(Flatten())
model_stolen_protected.add(Dense(128, activation='relu'))
model_stolen_protected.add(Dense(10, activation='softmax'))
model_stolen_protected.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [21]:
attack_protected = CopycatCNN(classifier=victim_defense, nb_epochs=5, nb_stolen=max_requests, use_probability=True)
classifier_stolen_protected = KerasClassifier(model_stolen_protected, clip_values=(0, 1), use_logits=False)
classifier_stolen_protected = attack_protected.extract(x_stolen, y_stolen, thieved_classifier=classifier_stolen_protected)



Train on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [22]:
predictions_stolen_protected = classifier_stolen_protected.predict(x_test)
accuracy = np.sum(np.argmax(predictions_stolen_protected, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on test examples against protected model (stolen model): {:.2f}%".format(accuracy * 100))

Accuracy on test examples against protected model (stolen model): 9.80%
