In [None]:
!pip install facenet-pytorch
!pip install Pillow
!pip install -q tensorflow==2.0.0
!pip install adversarial-robustness-toolbox[all]
!pip install matplotlib

In [3]:
# LIBRERIE UTILI
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
tf.compat.v1.disable_eager_execution()

import numpy as np
from matplotlib import pyplot as plt
import art

if tf.__version__[0] != '2':
    raise ImportError('This notebook requires TensorFlow v2.')

print("GPU Available: ", tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU Available:  False


In [4]:
from facenet_pytorch import InceptionResnetV1

resnet = InceptionResnetV1(pretrained='vggface2').eval()
resnet.classify = True



fpath = tf.keras.utils.get_file('rcmalli_vggface_labels_v2.npy',
                             "https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_labels_v2.npy",
                             cache_subdir="./")
LABELS = np.load(fpath)

# **FSGM ATTACK**

In [8]:
# Import the attack
from art.attacks.evasion import FastGradientMethod
from torch.nn import CrossEntropyLoss
from art.estimators.classification import PyTorchClassifier
from torchvision import transforms
from PIL import Image

def load_image(filename):
    img = Image.open(filename)
    rsz = img.resize((160, 160))
    tns = transforms.ToTensor()(rsz)
    return tns

model = PyTorchClassifier(resnet,input_shape=[224,224], loss=CrossEntropyLoss(),nb_classes=8631) #This class implements a classifier with the PyTorch framework.

test_img = load_image("test_set_cropped/Antonio_Cassano_8_face_0.jpg")

print(test_img.shape)
print(test_img.size)
test_img = test_img.unsqueeze(0)
print(test_img.shape)
print(test_img.size)
test_img = test_img.numpy()
print(test_img.shape)
print(type(test_img))

torch.Size([3, 160, 160])
<built-in method size of Tensor object at 0x32887ea90>
torch.Size([1, 3, 160, 160])
<built-in method size of Tensor object at 0x3286c1680>
(1, 3, 160, 160)
<class 'numpy.ndarray'>


**NON TARGETED**

In [None]:
#FSGM generic Attack for single sample

epsilon = 0.0005
attack = FastGradientMethod(estimator=model, eps=epsilon, targeted=False)
batch_size = test_img.shape[0]
targeted_labels = np.array([2] * batch_size)   #Al posto di 2 ci va l'indice della label della persona
one_hot_targeted_labels = tf.keras.utils.to_categorical(targeted_labels, num_classes=8631)   #Creazione attacco
test_images_adv = attack.generate(test_img)                                 # Generazione campione avversario
model_predictions = model.predict(test_images_adv)
loss = model.compute_loss(test_images_adv,one_hot_targeted_labels)                          # Predizione
perturbation = np.mean(np.abs((test_images_adv - test_img)))
predicted_label = LABELS[np.array(model_predictions[0].argmax())]
print("{} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))


In [None]:
import os
import re

dataset_dir = "test_set_cropped"

#FSGM generic Attack for all samples

correct_predictions = 0
total_images = 0
eps_range = [0.001, 0.005, 0.007, 0.01, 0.05, 0.07, 0.1, 0.5, 0.7]
accuracy_plot = []
for epsilon in eps_range:
    correct_predictions = 0
    total_images = 0
    attack = FastGradientMethod(estimator=model, eps=epsilon, targeted=False)

    for filename in os.listdir(dataset_dir):
        if filename.endswith(".jpg") or filename.endswith(".jpeg"):
            person_path = os.path.join(dataset_dir, filename)
            test_img = load_image(person_path)
            test_img = test_img.unsqueeze(0)
            test_img = test_img.numpy()
            test_images_adv = attack.generate(test_img)
            model_predictions = model.predict(test_images_adv)
            correct_label = re.sub(r'_\d+_face_0\.jpg$', '', filename)
            print("Etichetta corretta:", correct_label)   
            perturbation = np.mean(np.abs((test_images_adv - test_img)))
            predicted_label = LABELS[np.array(model_predictions[0].argmax())]
            print("Predetto {} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))
            total_images+=1
            
            predicted_label = str(predicted_label)

            if correct_label in predicted_label:
                correct_predictions+=1

            accuracy = correct_predictions/total_images
            print("Accuracy sugli adversarial Sample: {}%".format((100-(accuracy*100))))
        

    if total_images != 0:
        final_accuracy = correct_predictions/total_images
        accuracy_plot.append(final_accuracy)
        print("----------- Accuracy FINALE sugli adversarial Sample: {}\% ----------------".format(final_accuracy))

In [None]:
from matplotlib import pyplot as plt

fig, ax = plt.subplots()
ax.plot(np.array(eps_range), np.array(accuracy_plot), 'b--', label='NN1')

legend = ax.legend(loc='upper center', shadow=True, fontsize='large')
legend.get_frame().set_facecolor('#00FFCC')

plt.xlabel('Attack strength (eps)')
plt.ylabel('Accuracy')
plt.show()

NOTA: da inserire --> FSGM specific Attack for all samples

**TARGETED ATTACK**

In [11]:
#FSGM specif Attack for single sample

target_class = 555
epsilon = 10000
attack = FastGradientMethod(estimator=model, eps=epsilon, targeted=True)

# Trasformazione etichetta categorica
targeted_labels = target_class*np.ones(LABELS.size)
one_hot_targeted_labels = tf.keras.utils.to_categorical(targeted_labels, num_classes = 8631)
test_images_adv = attack.generate(test_img, one_hot_targeted_labels)

#loss_test, accuracy_test = model.evaluate(test_images_adv, test_labels)
#print('Accuracy on adversarial test data: {:4.2f}%'.format(accuracy_test * 100))
model_predictions = model.predict(test_images_adv)
perturbation = np.mean(np.abs((test_images_adv - test_img)))
print('Average perturbation: {:4.2f}'.format(perturbation))
#targeted_attack_loss, targeted_attack_accuracy = model.evaluate(test_images_adv, targeted_labels)
#print('Targeted attack accuracy: {:4.2f}'.format(targeted_attack_accuracy))
print("Etichetta target:{}".format(LABELS[target_class]))
print(model_predictions)
predicted_label = LABELS[np.array(model_predictions.argmax())]
print("{} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))


Average perturbation: 9628.52
Etichetta target: Angelica_Celaya
[[-1693.0969  -3693.3835   1458.5857  ...  -293.34073   704.0545
    680.75305]]
 Daniele_Bonera con probabilità 10355.302734375


# **PGD ATTACK**

https://adversarial-robustness-toolbox.readthedocs.io/en/latest/modules/attacks/evasion.html#fast-gradient-method-fgm

In [5]:
# Import attack
from torch.nn import CrossEntropyLoss
from art.attacks.evasion import ProjectedGradientDescentPyTorch
from art.estimators.classification import PyTorchClassifier

#Impostare l'input shape
classifier = PyTorchClassifier(resnet,input_shape=[224,224], loss=CrossEntropyLoss(),nb_classes=8631) #This class implements a classifier with the PyTorch framework.


**NON-TARGETED ATTACK**

In [6]:
epsilon = 0.1
eps_step = 0.1
max_iter = 1 

attack = ProjectedGradientDescentPyTorch(estimator=classifier, eps = epsilon, eps_step=eps_step, targeted=False, max_iter = max_iter)

In [9]:
# PGD generic Attack for single sample

test_images_adv = attack.generate(test_img)  # Utilizzare generate per generare i campion
model_predictions = classifier.predict(test_images_adv) # Classifier
predicted_label = LABELS[np.array(model_predictions.argmax())] 
print("{} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))

                                                            

 Alfie_Allen con probabilità 8.514165878295898


In [None]:
import os
import re

dataset_dir = "test_set_cropped/"

# PGD generic Attack for all samples


correct_predictions = 0
total_images = 0
for filename in os.listdir(dataset_dir):
    if filename.endswith(".jpg") or filename.endswith(".jpeg"):
        person_path = os.path.join(dataset_dir, filename)
        print("Immagine:", filename)
        test_img = load_image(person_path)
        test_img = test_img.unsqueeze(0)
        test_img = test_img.numpy()
        test_images_adv = attack.generate(test_img)
        model_predictions = model.predict(test_images_adv)
        correct_label = re.sub(r'_\d+_face_0\.jpg$', '', filename)
        print("Etichetta corretta:", correct_label)   
        perturbation = np.mean(np.abs((test_images_adv - test_img)))
        predicted_label = LABELS[np.array(model_predictions[0].argmax())]
        print("Predetto {} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))
        total_images+=1

        if predicted_label == correct_label:
            correct_predictions+=1

        accuracy = correct_predictions/total_images
        print("Accuracy sugli adversarial Sample: {}%".format((100-(accuracy*100))))
        

if total_images != 0:
    final_accuracy = correct_predictions/total_images
    print("----------- Accuracy FINALE sugli adversarial Sample: {}\% ----------------".format(final_accuracy))

**TARGETED ATTACK**

In [14]:
epsilon = 1
eps_step = 0.1
max_iter = 1 

attack = ProjectedGradientDescentPyTorch(estimator=classifier, eps = epsilon, eps_step= eps_step, targeted=True, max_iter = max_iter)

In [15]:
#PGD specific Attack for single sample

target_class = 10 

batch_size = test_img.shape[0]
targeted_labels = np.array([target_class] * batch_size)
one_hot_targeted_labels = tf.keras.utils.to_categorical(targeted_labels, num_classes=8631)

print(one_hot_targeted_labels.shape)
test_images_adv = attack.generate(test_img, one_hot_targeted_labels)


model_predictions = model.predict(test_images_adv)
perturbation = np.mean(np.abs((test_images_adv - test_img)))
print('Average perturbation: {:4.2f}'.format(perturbation))
#targeted_attack_loss, targeted_attack_accuracy = model.evaluate(test_images_adv, targeted_labels)
#print('Targeted attack accuracy: {:4.2f}'.format(targeted_attack_accuracy))
print("Etichetta target:{}".format(LABELS[target_class]))
print(model_predictions)
predicted_label = LABELS[np.array(model_predictions.argmax())]
print("{} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))

(1, 8631)


                                                            

Average perturbation: 0.10
Etichetta target: Aaron_Hernandez
[[ 1.2787857   2.1339145   0.04435374 ... -3.9367952  -0.46481484
   0.7138224 ]]
 Hassan_Nasrallah con probabilità 10.409660339355469


In [None]:
#PGD specific Attack for all samples

correct_predictions = 0
total_images = 0
target_class = 10
print("ETICHETTA TARGET: ", LABELS[10])
batch_size = test_img.shape[0]
targeted_labels = np.array([target_class] * batch_size)
one_hot_targeted_labels = tf.keras.utils.to_categorical(targeted_labels, num_classes=8631)

for filename in os.listdir(dataset_dir):
    if filename.endswith(".jpg") or filename.endswith(".jpeg"):
        person_path = os.path.join(dataset_dir, filename)
        print("Immagine:", filename)
        test_img = load_image(person_path)
        test_img = test_img.unsqueeze(0)
        test_img = test_img.numpy()
        test_images_adv = attack.generate(test_img, one_hot_targeted_labels)
        model_predictions = model.predict(test_images_adv)
        correct_label = re.sub(r'_\d+_face_0\.jpg$', '', filename)
        print("Etichetta corretta:", correct_label)   
        perturbation = np.mean(np.abs((test_images_adv - test_img)))
        predicted_label = LABELS[np.array(model_predictions[0].argmax())]
        print("Predetto {} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))
        total_images+=1

        if predicted_label == correct_label:
            correct_predictions+=1

        accuracy = correct_predictions/total_images
        print("Accuracy sugli adversarial Sample: {}%".format((100-(accuracy*100))))
        

if total_images != 0:
    final_accuracy = correct_predictions/total_images
    print("----------- Accuracy FINALE sugli adversarial Sample: {}\% ----------------".format(final_accuracy))

#  **CARLINI WAGNER **ATTACK****

In [17]:
# Import all L-distance based attacks
from art.attacks.evasion import CarliniL2Method, CarliniL0Method, CarliniLInfMethod

**NON-TARGETED ATTACK**

In [18]:
binary_search_steps = 1
confidence = 0.5
max_iter = 10
learning_rate = 0.01
initial_const = 1000

attack = CarliniL2Method(classifier=classifier, binary_search_steps=binary_search_steps, confidence=confidence, max_iter=max_iter, learning_rate=learning_rate, initial_const=initial_const, targeted=False)


In [19]:
# Carlini Wagner generic attack on single sample

test_images_adv = attack.generate(test_img)

#loss_test, accuracy_test = model.evaluate(test_images_adv, test_labels)
model_predictions = model.predict(test_images_adv)
perturbation = np.mean(np.abs((test_images_adv - test_img)))
#print('Accuracy on adversarial test data: {:4.2f}%'.format(accuracy_test * 100))
print('Average perturbation: {:4.2f}'.format(perturbation))
print(LABELS[target_class])
print(model_predictions)
predicted_label = LABELS[np.array(model_predictions.argmax())]
print("{} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))

C&W L_2: 100%|██████████| 1/1 [00:03<00:00,  3.09s/it]

Average perturbation: 0.03
 Aaron_Hernandez
[[ 2.5345778  -0.17539069  4.2408724  ... -1.8134753   0.874722
  -2.5158045 ]]
 Antonio_Orozco con probabilità 12.907977104187012





In [None]:
# Carlini Wagner generic attack on all samples


correct_predictions = 0
total_images = 0
for filename in os.listdir(dataset_dir):
    if filename.endswith(".jpg") or filename.endswith(".jpeg"):
        person_path = os.path.join(dataset_dir, filename)
        print("Immagine:", filename)
        test_img = load_image(person_path)
        test_img = test_img.unsqueeze(0)
        test_img = test_img.numpy()
        test_images_adv = attack.generate(test_img)
        model_predictions = model.predict(test_images_adv)
        correct_label = re.sub(r'_\d+_face_0\.jpg$', '', filename)
        print("Etichetta corretta:", correct_label)   
        perturbation = np.mean(np.abs((test_images_adv - test_img)))
        predicted_label = LABELS[np.array(model_predictions[0].argmax())]
        print("Predetto {} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))
        total_images+=1

        if predicted_label == correct_label:
            correct_predictions+=1

        accuracy = correct_predictions/total_images
        print("Accuracy sugli adversarial Sample: {}%".format((100-(accuracy*100))))
        

if total_images != 0:
    final_accuracy = correct_predictions/total_images
    print("----------- Accuracy FINALE sugli adversarial Sample: {}\% ----------------".format(final_accuracy))

**TARGETED ATTACK**


In [21]:
binary_search_steps = 1
confidence = 0.5
max_iter = 10
learning_rate = 0.01
initial_const = 1000
target_class = 6

attack = CarliniL2Method(classifier=classifier, binary_search_steps=binary_search_steps, confidence=confidence, max_iter=max_iter, learning_rate=learning_rate, initial_const=initial_const, targeted=True)


In [22]:
# Carlini Wagner specific attack on single sample


# Trasformazione del dato categorico
targeted_labels = target_class*np.ones(LABELS.size)
one_hot_targeted_labels = tf.keras.utils.to_categorical(targeted_labels, num_classes = 8631)
test_images_adv = attack.generate(test_img, one_hot_targeted_labels)

model_predictions = model.predict(test_images_adv)
perturbation = np.mean(np.abs((test_images_adv - test_img)))
print('Average perturbation: {:4.2f}'.format(perturbation))
#targeted_attack_loss, targeted_attack_accuracy = model.evaluate(test_images_adv, targeted_labels)
#print('Targeted attack accuracy: {:4.2f}'.format(targeted_attack_accuracy))
print(LABELS[target_class])
print(model_predictions)
predicted_label = LABELS[np.array(model_predictions.argmax())]
print("{} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))

C&W L_2: 100%|██████████| 1/1 [00:02<00:00,  2.58s/it]


Average perturbation: 0.00
 AB_de_Villiers
[[ 2.4935617e+00  1.5412122e-03  3.4582591e+00 ... -2.8792291e+00
  -2.6755357e-01 -4.0426984e+00]]
 Paola_Barale con probabilità 13.70297908782959


In [23]:
# Carlini Wagner specific attack on all samples


correct_predictions = 0
total_images = 0
target_class = 10

targeted_labels = target_class*np.ones(LABELS.size)
one_hot_targeted_labels = tf.keras.utils.to_categorical(targeted_labels, num_classes = 8631)
test_images_adv = attack.generate(test_img, one_hot_targeted_labels)

for filename in os.listdir(dataset_dir):
    if filename.endswith(".jpg") or filename.endswith(".jpeg"):
        person_path = os.path.join(dataset_dir, filename)
        print("Immagine:", filename)
        test_img = load_image(person_path)
        test_img = test_img.unsqueeze(0)
        test_img = test_img.numpy()
        test_images_adv = attack.generate(test_img, one_hot_targeted_labels)
        model_predictions = model.predict(test_images_adv)
        correct_label = re.sub(r'_\d+_face_0\.jpg$', '', filename)
        print("Etichetta corretta:", correct_label)   
        perturbation = np.mean(np.abs((test_images_adv - test_img)))
        print('Average perturbation: {:4.2f}'.format(perturbation))
        predicted_label = LABELS[np.array(model_predictions[0].argmax())]
        print("Predetto {} con probabilità {}".format(predicted_label,model_predictions[0][model_predictions.argmax()]))
        
        total_images+=1

        if predicted_label == correct_label:
            correct_predictions+=1

        accuracy = correct_predictions/total_images
        print("Accuracy sugli adversarial Sample: {}%".format((100-(accuracy*100))))
        

if total_images != 0:
    final_accuracy = correct_predictions/total_images
    print("----------- Accuracy FINALE sugli adversarial Sample: {}\% ----------------".format(final_accuracy))

C&W L_2: 100%|██████████| 1/1 [00:02<00:00,  2.64s/it]


Immagine: Michael_Phelps_9_face_0.jpg


C&W L_2:   0%|          | 0/1 [00:01<?, ?it/s]


KeyboardInterrupt: 

# CODICE PLOT IMMAGINI

In [None]:
#Show one original example
plt.figure()
plt.matshow(test_images[0])
plt.title("Original Label: {}".format(test_labels[0]))
plt.show()

#Show the corresponding adversarial example
plt.figure()
plt.matshow(test_images_adv[0])
plt.title("Model Prediction: {}".format(np.argmax(model_predictions[0])))
plt.show()