In [1]:
import numpy as onp
import tensorflow as tf
import matplotlib.pyplot as plt

from utils import *

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPU


# hyperparameters

In [3]:
#data
DATASET = 'mnist'
class_num   = 10
test_size   = None
train_size  = 512
image_shape = None

if DATASET =='mnist':
    image_shape = (28, 28, 1)
elif DATASET == 'cifar10':
    image_shape = (32, 32, 3)

batch_size = 256
epochs = 50

In [29]:
if DATASET == 'mnist':
    eps = 0.3
    eps_iter_10 = (eps/10)*1.1
    eps_iter_100 = (eps/100)*1.1
    eps_iter_1000 = (eps/1000)*1.1
    
elif DATASET == 'cifar10':
    eps = 0.03
    eps_iter_10 = (eps/10)*1.1
    eps_iter_100 = (eps/100)*1.1

In [4]:
x_train_all, y_train_all, x_test_all, y_test_all = tuple(onp.array(x) for x in get_dataset(DATASET, None, None, 
                                                                                  do_flatten_and_normalize=False))

In [5]:
# shuffle
seed = 0
x_train_all, y_train_all = shaffle(x_train_all, y_train_all, seed)

In [6]:
x_train_all = x_train_all.astype(onp.float32)
x_test_all  = x_test_all.astype(onp.float32)

In [7]:
# down sample
x_train = x_train_all[:train_size]
y_train = y_train_all[:train_size]

x_valid = x_train_all[train_size:]
y_valid = y_train_all[train_size:]

x_test = x_test_all[:test_size]
y_test = y_test_all[:test_size]

In [8]:
x_train, x_valid, x_test = x_train.reshape((-1, *image_shape)), x_valid.reshape((-1, *image_shape)), x_test.reshape((-1, *image_shape))

In [9]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.shuffle(
    100000
).batch(
    batch_size
).prefetch(10)

In [10]:
valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)).batch(batch_size)

In [11]:
layers = tf.keras.layers

In [12]:
img_input = layers.Input(shape=image_shape)
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', 
                  kernel_initializer=tf.keras.initializers.GlorotNormal())(img_input)
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same',
                 kernel_initializer=tf.keras.initializers.GlorotNormal())(x)
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same',
                 kernel_initializer=tf.keras.initializers.GlorotNormal())(x)
x = layers.Flatten()(x)
x = layers.Dense(64, activation='relu', kernel_initializer=tf.keras.initializers.GlorotNormal())(x)
out = layers.Dense(10, kernel_initializer=tf.keras.initializers.GlorotNormal())(x)

model = tf.keras.Model(inputs=img_input, outputs=out)

In [13]:
def scheduler(epoch, lr):
    if epoch < 10:
        return 1e-3
    elif epoch < 20:
        return 1e-2
    elif epoch < 35:
        return 1e-3
    else:
        return 1e-4

In [14]:
model.compile(optimizer=tf.keras.optimizers.SGD(momentum=0.9),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [15]:
callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [16]:
model.fit(x=train_ds, validation_data=valid_ds, epochs=epochs, callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fe54008bf98>

In [17]:
model.save_weights('./model_weights/mnist-simple_cnn_thin_train=512-without-DA_ce.h5')

In [18]:
model.evaluate(x=x_test, y=y_test_all, verbose=0)

[1.14113631169945, 0.8549]

In [19]:
def tangent_feature(model, xs):
    n = len(xs)
    with tf.GradientTape() as tape:
        output = model(xs)
    subgradients = tape.jacobian(output, model.trainable_weights)
    flattened = [tf.reshape(sg, [n, -1]) for sg in subgradients]
    gradients = tf.concat(flattened, 1)
    return gradients

@tf.function
def kernel(model, x, y):
    dot = tf.reduce_sum(tf.multiply(tangent_feature(model, x), tangent_feature(model, y)))
    return dot

In [20]:
ce_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

@tf.function
def targeted_fgsm(x, y_target, model, eps):
    with tf.GradientTape() as tp:
        tp.watch(x)
        y = model(x)
        loss = ce_loss(y_target, y)
    grad = tp.gradient(loss, x)
    return tf.clip_by_value(x - eps * tf.sign(grad), 0, 1)

@tf.function
def untargeted_fgsm(x, y_true, model, eps):
    with tf.GradientTape() as tp:
        tp.watch(x)
        y = model(x)
        loss = ce_loss(y_true, y)
    grad = tp.gradient(loss, x)
    return tf.clip_by_value(x + eps * tf.sign(grad), 0, 1)

In [21]:
from tqdm import tqdm

In [22]:
k_train_train = onp.zeros((train_size, train_size), dtype=onp.float32)

In [65]:
k_train_train = k_train_train.astype(onp.float64)

In [74]:
# onp.save('k_train_train.npy', k_train_train)

In [76]:
# onp.save('k_test_train.npy', k_test_train)

In [23]:
for i in tqdm(range(train_size)):
    for j in range(i, train_size):
        k_train_train[i][j] = kernel(model, x_train[i][None], x_train[j][None])
        k_train_train[j][i] = k_train_train[i][j]

100%|██████████| 512/512 [05:31<00:00,  1.54it/s]


In [24]:
def inv(k):
    #inverse with diag_reg
    return onp.linalg.inv(k + 1e-5 * onp.eye(k.shape[0]))

In [66]:
k_train_train_inv = inv(k_train_train)

In [32]:
# downsample
x_test = x_test[:train_size]

In [33]:
k_test_train = onp.zeros((train_size, train_size), dtype=onp.float32)

In [56]:
for i in tqdm(range(train_size)):
    for j in range(train_size):
        k_test_train[i][j] = kernel(model, x_test[i][None], x_train[j][None])

100%|██████████| 512/512 [10:53<00:00,  1.28s/it]


In [70]:
ans = onp.matmul(k_test_train, onp.matmul(k_train_train_inv, y_train[:train_size]))

In [79]:
# onp.argmax(ans, axis=1) == onp.argmax(y_test[:512], axis=1)