In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import os
import datetime
import inception
import math
import scipy
import scipy.misc
import glob

inception.data_dir = 'inception/'

inception.maybe_download()
model = inception.Inception()
resized_image = model.resized_image
y_pred = model.y_pred
y_logits = model.y_logits

# Set the graph for the Inception model as the default graph,
# so that all changes inside this with-block are done to that graph.
with model.graph.as_default():
    # Add a placeholder variable for the target class-number.
    # This will be set to e.g. 300 for the 'bookcase' class.
    pl_cls_target = tf.placeholder(dtype=tf.int32)

    # Add a new loss-function. This is the cross-entropy.
    # See Tutorial #01 for an explanation of cross-entropy.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_logits, labels=[pl_cls_target])

    # Get the gradient for the loss-function with regard to
    # the resized input image.
    gradient = tf.gradients(loss, resized_image)

session = tf.Session(graph=model.graph)
time = datetime.datetime.now().strftime('%m%d%H%M%S')

images = glob.glob("./images/*.JPEG")

# Parameter configs
cls_target=300
noise_limit=3.0
required_score=0.99
show_image=False

Downloading Inception v3 Model ...
Data has apparently already been downloaded and unpacked.


In [2]:
def normalize_image(x):
    # Get the min and max values for all pixels in the input.
    x_min = x.min()
    x_max = x.max()

    # Normalize so all values are between 0.0 and 1.0
    x_norm = (x - x_min) / (x_max - x_min)

    return x_norm

In [3]:
def test_precision(i, image):
    test_image = np.clip(a=image, a_min=0.0, a_max=255.0)

    # Create a feed-dict. This feeds the noisy image to the
    # tensor in the graph that holds the resized image, because
    # this is the final stage for inputting raw image data.
    # This also feeds the target class-number that we desire.
    feed_dict = {model.tensor_name_resized_image: [test_image],
                 pl_cls_target: cls_target}

    # Calculate the predicted class-scores as well as the gradient.
    pred, grad = session.run([y_pred, gradient],
                             feed_dict=feed_dict)

    '''
    final_class = np.argmax(pred)

    # Names for the source and target classes.
    name_source = model.name_lookup.cls_to_name(final_class,
                                                only_first_name=True)
    print('is classified as {} with score {}'. format(name_source, pred.max()))
    '''
    
    # Convert the predicted class-scores to a one-dim array.
    pred = np.squeeze(pred)

    # The scores (probabilities) for the source and target classes.
    score_source = pred[cls_source]
    score_target = pred[cls_target]

    return score_source > score_target

In [4]:
# Simple heuristic to implement total variance
def tv_compress(image):
    A = image
    index1 = np.random.randint(0,A.shape[0],k)
    index2 = np.random.randint(0,A.shape[1],k)
    for x in range(100):
        for i in range(A.shape[0]):
            for j in range(A.shape[1]):
                cnt_ = 0
                sum_ = 0
                if(i > 0):
                    cnt_ += 1
                    sum_ += A[i-1, j]
                if(j > 0):
                    cnt_ += 1
                    sum_ += A[i, j-1]
                if(i < A.shape[0]-1):
                    cnt_ += 1
                    sum_ += A[i+1, j]
                if(j < A.shape[1]-1):
                    cnt_ += 1
                    sum_ += A[i, j+1]
                A[i, j] = sum_ / cnt_
    return A

In [5]:
def bit_compress(image, compress_bit):
    compressed_image = image
    for i in range(compressed_image.shape[0]):
        for j in range(compressed_image.shape[1]):
            for k in range(compressed_image.shape[2]):
                compressed_image[i,j,k] -= compressed_image[i,j,k]% math.pow(2, compress_bit)
    return compressed_image

In [6]:
def median(lst):
    quotient, remainder = divmod(len(lst), 2)
    if remainder:
        return sorted(lst)[quotient]
    return sum(sorted(lst)[quotient - 1:quotient + 1]) / 2.

def checkindex(image, index_x, index_y):
    if(index_x < 0 or index_x >= image.shape[0]):
        return False;
    if(index_y < 0 or index_y >= image.shape[1]):
        return False;
    return True;

def find_median_in_sliding_windown(image, i, j, k, m, n):
    list = []
    for x in range(-m+1, m):
        for y in range(-n+1, n):
            if(checkindex(image, i+x, j+y)):
                list.append(image[i+x, j+y, k])
    return median(list)
            
# m = n = 1 is just using the pixel itself, i.e. no change
def spatial_smoothing(image, m, n):
    compressed_image = image
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            for k in range(image.shape[2]):
                compressed_image[i, j, k] = find_median_in_sliding_windown(image, i, j, k, m, n)
    return compressed_image

In [None]:
    total = len(images)
    success = [0., 0., 0., 0., 0.]
    threshold = [0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1]
    for image in images:
        feed_dict = model._create_feed_dict(image_path=image)

        #image = img[100]
        #image_path= 'cifar/'
        #feed_dict = model._create_feed_dict(image=image)



        pred, image = session.run([y_pred, resized_image],
                                      feed_dict=feed_dict)




        cls_source = np.argmax(pred)
        cls_target = 300

        # Score for the predicted class (aka. probability or confidence).
        score_source_org = pred.max()

        # Names for the source and target classes.
        name_source = model.name_lookup.cls_to_name(cls_source,
                                                    only_first_name=True)
        name_target = model.name_lookup.cls_to_name(cls_target,
                                                    only_first_name=True)

        # Initialize the noise to zero.
        noise = 0
        iterations = 0
        # Perform a number of optimization iterations to find
        # the noise that causes mis-classification of the input image.
        index = 0
        for i in range(10000):
            iterations = i
            
            # The noisy image is just the sum of the input image and noise.
            noisy_image = image + noise

            # Ensure the pixel-values of the noisy image are between
            # 0 and 255 like a real image. If we allowed pixel-values
            # outside this range then maybe the mis-classification would
            # be due to this 'illegal' input breaking the Inception model.
            noisy_image = np.clip(a=noisy_image, a_min=0.0, a_max=255.0)

            # Create a feed-dict. This feeds the noisy image to the
            # tensor in the graph that holds the resized image, because
            # this is the final stage for inputting raw image data.
            # This also feeds the target class-number that we desire.
            feed_dict = {model.tensor_name_resized_image: noisy_image,
                         pl_cls_target: cls_target}

            # Calculate the predicted class-scores as well as the gradient.
            pred, grad = session.run([y_pred, gradient],
                                     feed_dict=feed_dict)

            # Convert the predicted class-scores to a one-dim array.
            pred = np.squeeze(pred)

            # The scores (probabilities) for the source and target classes.
            score_source = pred[cls_source]
            score_target = pred[cls_target]

            # Squeeze the dimensionality for the gradient-array.
            grad = np.array(grad).squeeze()

            # The gradient now tells us how much we need to change the
            # noisy input image in order to move the predicted class
            # closer to the desired target-class.

            # Calculate the max of the absolute gradient values.
            # This is used to calculate the step-size.
            grad_absmax = np.abs(grad).max()

            # If the gradient is very small then use a lower limit,
            # because we will use it as a divisor.
            if grad_absmax < 1e-10:
                grad_absmax = 1e-10

            # Calculate the step-size for updating the image-noise.
            # This ensures that at least one pixel colour is changed by 7.
            # Recall that pixel colours can have 255 different values.
            # This step-size was found to give fast convergence.
            step_size = 1. / grad_absmax


            '''
            l2_disturb = np.linalg.norm(noise)/np.linalg.norm(image)

            step_size = 0.1 / max(0.00001, math.sqrt(l2_disturb))
            '''

            test_precision(iterations, (image + noise)[0])

            #l2_norm = np.linalg.norm(noise)/np.linalg.norm(image)
            l2_norm = math.sqrt(np.linalg.norm(noise)/np.linalg.norm(image))
            print ('l2 norm is {}'.format(math.sqrt(np.linalg.norm(noise)/np.linalg.norm(image))))

            # If the score for the target-class is not high enough.
            if index < len(threshold):
            #if score_target < required_score and index < len(threshold):
                # Update the image-noise by subtracting the gradient
                # scaled by the step-size.
                noise -= step_size * grad

                # Ensure the noise is within the desired range.
                # This avoids distorting the image too much.
                noise = np.clip(a=noise,
                                a_min=-noise_limit,
                                a_max=noise_limit)
                '''
                if (iterations % 10 == 0):
                    print("Print defense effect")
                    # Chose whatever defense method you want to use on the bottom.
                    test_precision(iterations, spatial_smoothing((image + noise)[0], 3, 3))
                '''
                
                if l2_norm >= threshold[index]:
                    #print("inside while loop")
                    # Abort the optimization because the score is high enough.
                    if(test_precision(iterations, bit_compress((image + noise)[0], 2))):
                        #print("index is ", index)
                        success[index] += 1.
                        index += 1
                    else:
                        index += 10
                    
            else:  
                print(success)
                break;
                
        print("finished image ")
    
                
    print("limit", l2_limit, "successful rate is ", success/total)


l2 norm is 0.0
l2 norm is 0.022531122452808736
l2 norm is 0.03014246426930797
l2 norm is 0.03085954189255408
l2 norm is 0.03619725852963769
l2 norm is 0.03818029093055728
l2 norm is 0.04185329800101976
l2 norm is 0.043962517655200685
l2 norm is 0.04679675824241529
l2 norm is 0.04801016913512481
l2 norm is 0.051686271221443494
l2 norm is 0.0520740197715652
[1.0, 1.0, 1.0, 0.0, 0.0]
finished image 
l2 norm is 0.0
l2 norm is 0.016823500015945646
l2 norm is 0.025743949070831308
l2 norm is 0.029909628168331346
l2 norm is 0.03534242883471932
l2 norm is 0.038353777501800836
[2.0, 1.0, 1.0, 0.0, 0.0]
finished image 
l2 norm is 0.0
l2 norm is 0.023579679070496424
l2 norm is 0.03274957571080144
l2 norm is 0.03930352180854958
