In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from adv_attack_student import AdversialAttacker
from adv_attack_student import generate_experiment
from PIL import Image
import numpy as np

In [None]:
# create experiment case
x = generate_experiment(method='FGSM')

input_img    = x['img']
input_tensor = x['inp']
attacker     = x['attacker']
model        = x['mdl']
un_norm      = x['un_norm']
classnames   = x['classnames']

In [None]:
# run the classifier model
out_pred, scores = attacker.get_pred_label(model, input_tensor, ret_out_scores=True, ret_out_pred=True)

In [None]:
# check the classfier scores
print( "current prediction: %d (%s)\n" % ( int(out_pred), classnames[int(out_pred)] ) )

top_scores, top_indices = scores.topk(5)
print( "current top-5 scores:" )
for ss, ii in zip( top_scores.numpy().ravel(), top_indices.numpy().ravel() ):
    print( ' - %d (%s): %.4f' % ( int(ii), classnames[int(ii)], ss ) )


In [None]:
# check the image before attacking
input_img

In [None]:
# now let's attack

# untargeted setting
inp_adv = attacker.perturb_untargeted(model, input_tensor, eps=1e-1)

# # targeted setting
# target_label = 7
# inp_adv = attacker.perturb_targeted(model, input_tensor, targ_label=[target_label], eps=0.03)


In [None]:
# check the image after attacking
img_adv = un_norm(inp_adv.squeeze(0))
img_adv

In [None]:
# visualize the perturbation "directly"
def diff_img(img1, img2,scale=1):
    return Image.fromarray(
        scale * np.abs(     
            np.array(img1).astype('float') - np.array(img2).astype('float')
        ).astype(np.uint8)
    )

img_diff = diff_img(img_adv, un_norm(input_tensor.squeeze(0)), scale=1) # you can play with scale to amplify the signals
img_diff

In [None]:
# visualize the perturbation, by heatmap

# note: the image pixel values are in range 0-255
img_orig_np = np.array(un_norm(input_tensor.squeeze(0))).astype('float')
img_adv_np  = np.array(img_adv).astype('float')
img_diff_np = np.abs( img_adv_np - img_orig_np ).sum(axis=2)

import matplotlib.pyplot as plt
plt.imshow(img_diff_np, cmap='jet', vmin=0, vmax=np.array(img_diff_np).max());
plt.colorbar()
plt.show()


In [None]:
# run classifier again for the attacked image
attacked_pred, attacked_score = attacker.get_pred_label(model, inp_adv, ret_out_scores=True, ret_out_pred=True)

In [None]:
# check scores
print( "current prediction: %d (%s)\n" % ( int(attacked_pred), classnames[int(attacked_pred)] ) )

top_attacked_scores, top_attacked_indices = attacked_score.topk(5)
print( "current top-5 scores:" )
for ss, ii in zip( top_attacked_scores.numpy().ravel(), top_attacked_indices.numpy().ravel() ):
    print( ' - %d (%s): %.4f' % ( int(ii), classnames[int(ii)], ss ) )

print("\nDid we fooled the classifier?")
if int(attacked_pred) != int(out_pred):
    print(' - Yes!')
else:
    print(' - Nah.')    