In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds

def fast_gradient_signed(x, y, model, eps):
    with tf.GradientTape() as tape:
        tape.watch(x)
        y_pred = model(x)
        loss = model.loss(y, y_pred)
    gradient = tape.gradient(loss, x)
    sign = tf.sign(gradient)
    return x + eps * sign

def generate_adversaries(model, l, dataset, eps):
    true_advs = []
    false_advs = []
    max_true_advs = max_false_advs = l // 2
    for x, y in dataset:
        # generate adversaries
        x_advs = fast_gradient_signed(x, y, model, eps)
        
        y_preds = tf.argmax(model(x), axis=1)
        y_pred_advs = tf.argmax(model(x_advs), axis=1)
        for x_adv, y_pred_adv, y_pred, y_true in zip(x_advs, y_pred_advs, y_preds, y):
            # x_adv is a true adversary
            if y_pred == y_true and y_pred_adv != y_true and len(true_advs) < max_true_advs:
                true_advs.append((x_adv, y_true))
                
            # x_adv is a false adversary
            if y_pred == y_true and y_pred_adv == y_true and len(false_advs) < max_false_advs:
                false_advs.append((x_adv, y_true))
            
            if len(true_advs) == max_true_advs and len(false_advs) == max_false_advs:
                return true_advs, false_advs
            
    raise Exception(f"Only found {len(true_advs)}/{max_true_advs} true adversaries and {len(false_advs)}/{max_false_advs} false adversaries")

In [2]:
def to_float(x, y):
    return tf.cast(x, tf.float32) / 255.0, y

dataset = tfds.load("mnist", split="train", as_supervised=True)
val_set = tfds.load("mnist", split="test", as_supervised=True)

dataset = dataset.map(to_float).shuffle(2048).batch(128).prefetch(-1)
val_set = val_set.map(to_float).batch(128)

model = keras.Sequential([
            keras.layers.Conv2D(16, 3, padding="same", activation="relu"),
            keras.layers.Conv2D(32, 3, padding="same", strides=2, activation="relu"),
            keras.layers.Conv2D(64, 3, padding="same", strides=2, activation="relu"),
            keras.layers.Flatten(),
            keras.layers.Dense(10, activation=None)
        ])

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9), 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics=["sparse_categorical_accuracy"])
model.build(input_shape=(None, 28, 28, 1))

# generate key set
true_advs, false_advs = generate_adversaries(model, 100, dataset, 1. / 255.)

key_set_x = tf.data.Dataset.from_tensor_slices([x for x, y in true_advs + false_advs])
key_set_y = tf.data.Dataset.from_tensor_slices([y for x, y in true_advs + false_advs])
key_set = tf.data.Dataset.zip((key_set_x, key_set_y)).batch(128)

_ = model.fit(dataset, epochs=3, validation_data=val_set)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [3]:
# reset the optimizer and embed the watermark
model.optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
_ = model.fit(key_set, epochs=2, validation_data=val_set)

Epoch 1/2


FailedPreconditionError:  Error while reading resource variable SGD/learning_rate_71639 from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/SGD/learning_rate_71639/N10tensorflow3VarE does not exist.
	 [[node SGD/Cast/ReadVariableOp (defined at <ipython-input-2-5a3e2d7c1ba7>:30) ]] [Op:__inference_train_function_71942]

Function call stack:
train_function
