# Train mnist models

#### import packages

In [10]:
from keras.datasets import mnist
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Convolution2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.layers import BatchNormalization

import argparse
import numpy as np
from tensorflow.python.ops import init_ops

from tensorflow.python.platform import flags

import keras.backend as K


import tensorflow._api.v2.compat.v1 as tf

import time
import sys

#### define tf util functions

In [5]:
EVAL_FREQUENCY = 100

def error_rate(predictions, labels):
    """
    Return the error rate in percent.
    """

    assert len(predictions) == len(labels)

    return 100.0 - (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])

def batch_eval(tf_inputs, tf_outputs, numpy_inputs):
    """
    A helper function that computes a tensor on numpy inputs by batches.
    From: https://github.com/openai/cleverhans/blob/master/cleverhans/utils_tf.py
    """

    n = len(numpy_inputs)
    assert n > 0
    assert n == len(tf_inputs)
    m = numpy_inputs[0].shape[0]
    for i in range(1, n):
        assert numpy_inputs[i].shape[0] == m

    out = []
    for _ in tf_outputs:
        out.append([])

    for start in range(0, m, 10):
        batch = start // 10

        # Compute batch start and end indices
        start = batch * 10
        end = start + 10
        numpy_input_batches = [numpy_input[start:end]
                               for numpy_input in numpy_inputs]
        cur_batch_size = numpy_input_batches[0].shape[0]
        assert cur_batch_size <= 10
        for e in numpy_input_batches:
            assert e.shape[0] == cur_batch_size

        feed_dict = dict(zip(tf_inputs, numpy_input_batches))
        feed_dict[K.learning_phase()] = 0
        numpy_output_batches = K.get_session().run(tf_outputs,
                                                   feed_dict=feed_dict)
        for e in numpy_output_batches:
            assert e.shape[0] == cur_batch_size, e.shape
        for out_elem, numpy_output_batch in zip(out, numpy_output_batches):
            out_elem.append(numpy_output_batch)

    out = [np.concatenate(x, axis=0) for x in out]
    for e in out:
        assert e.shape[0] == m, e.shape
    return out

def gen_adv_loss(logits, y, loss='logloss', mean=False):
    """
    Generate the loss function.
    """

    if loss == 'training':
        # use the model's output instead of the true labels to avoid
        # label leaking at training time
        y = K.cast(K.equal(logits, K. max(logits, 1, keepdims=True)), "float32")
        y = y / K.sum(y, 1, keepdims=True)
        out = K.categorical_crossentropy(logits, y, from_logits=True)
    elif loss == 'logloss':
        out = K.categorical_crossentropy(logits, y, from_logits=True)
    else:
        raise ValueError("Unknown loss: {}".format(loss))

    if mean:
        out = K.mean(out)
    else:
        out = K.sum(out)
    return out

def tf_train(x, y, model, X_train, Y_train, generator, x_advs=None, num_of_epochs=0):
    old_vars = set(tf.global_variables())
    train_size = Y_train.shape[0]

    # Generate cross-entropy loss for training
    logits = model(x)
    preds = K.softmax(logits)
    l1 = gen_adv_loss(logits, y, mean=True)

    # add adversarial training loss
    if x_advs is not None:
        idx = tf.placeholder(dtype=np.int32)
        logits_adv = model(tf.stack(x_advs)[idx])
        l2 = gen_adv_loss(logits_adv, y, mean=True)
        loss = 0.5*(l1+l2)
    else:
        l2 = tf.constant(0)
        loss = l1

    optimizer = tf.train.AdamOptimizer().minimize(loss)

    # Run all the initializers to prepare the trainable parameters.
    K.get_session().run(tf.initialize_variables(
        set(tf.all_variables()) - old_vars))
    start_time = time.time()
    print('Initialized!')

    # Loop through training steps.
    num_steps = int(num_of_epochs * train_size + 10 - 1) // 10

    step = 0
    for (batch_data, batch_labels) \
            in generator.flow(X_train, Y_train, batch_size=10):

        if len(batch_data) < 10:
            k = 10 - len(batch_data)
            batch_data = np.concatenate([batch_data, X_train[0:k]])
            batch_labels = np.concatenate([batch_labels, Y_train[0:k]])
        
        feed_dict = {x: batch_data,
                     y: batch_labels,
                     K.learning_phase(): 1}

        # choose source of adversarial examples at random
        # (for ensemble adversarial training)
        if x_advs is not None:
            feed_dict[idx] = np.random.randint(len(x_advs))

        # Run the graph
        _, curr_loss, curr_l1, curr_l2, curr_preds, _ = \
            K.get_session().run([optimizer, loss, l1, l2, preds]
                                + [model.updates],
                                feed_dict=feed_dict)

        if step % EVAL_FREQUENCY == 0:
            elapsed_time = time.time() - start_time
            start_time = time.time()
            print('Step %d (epoch %.2f), %.2f s' %
                (step, float(step) * 10 / train_size,
                 elapsed_time))
            print('Minibatch loss: %.3f (%.3f, %.3f)' % (curr_loss, curr_l1, curr_l2))

            print('Minibatch error: %.1f%%' % error_rate(curr_preds, batch_labels))

            sys.stdout.flush()

        step += 1
        if step == num_steps:
            break
def tf_test_error_rate(model, x, X_test, y_test):
    """
    Compute test error.
    """
    assert len(X_test) == len(y_test)

    # Predictions for the test set
    eval_prediction = K.softmax(model(x))

    predictions = batch_eval([x], [eval_prediction], [X_test])[0]

    return error_rate(predictions, y_test)

#### get mnist data

In [12]:
def data_mnist(one_hot=True):
    """
    Preprocess MNIST dataset
    """
    # the data, shuffled and split between train and test sets
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    X_train = X_train.reshape(X_train.shape[0],
                              28,
                              28,
                              1)

    X_test = X_test.reshape(X_test.shape[0],
                            28,
                            28,
                            1)

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    print("Loaded MNIST test data.")

    if one_hot:
        # convert class vectors to binary class matrices
        y_train = np_utils.to_categorical(y_train, 10).astype(np.float32)
        y_test = np_utils.to_categorical(y_test, 10).astype(np.float32)

    return X_train, y_train, X_test, y_test

def data_gen_mnist(X_train):
    datagen = ImageDataGenerator()

    datagen.fit(X_train)
    return datagen

#### get mnist data

In [13]:


# configure tensorflow v1 and keras backend
tf.disable_v2_behavior()
    
np.random.seed(0)
assert keras.backend.backend() == "tensorflow"


# get mnist data here
X_train, Y_train, X_test, Y_test = data_mnist()

data_gen = data_gen_mnist(X_train)

X_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Loaded MNIST test data.


#### define placeholders

In [None]:
x = K.placeholder((None,
                       28,
                       28,
                       1
                       ))

y = K.placeholder(shape=(None, 10))

#### define all models from A to D

In [14]:
def modelA():
    # model = Sequential()
    # model.add(Convolution2D(64, kernel_size=(5, 5),
    #                         padding='valid',
    #                         input_shape=(28,
    #                                      28,
    #                                      1)))
    # model.add(Activation('relu'))

    # model.add(Convolution2D(64, kernel_size=(5, 5)))
    # model.add(Activation('relu'))

    # model.add(Dropout(0.25))

    # model.add(Flatten())
    # model.add(Dense(128))
    # model.add(Activation('relu'))

    # model.add(Dropout(0.5))
    # model.add(Dense(10))

    model=Sequential()

    #model.add(Lambda(standardize,input_shape=(28,28,1)))    
    model.add(Convolution2D(filters=64, kernel_size = (5, 5), activation="relu", input_shape=(28,28,1)))
    model.add(Convolution2D(filters=64, kernel_size = (5, 5), activation="relu"))

    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Convolution2D(filters=128, kernel_size = (5, 5), activation="relu"))
    model.add(Convolution2D(filters=128, kernel_size = (5, 5), activation="relu"))

    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())    
    model.add(Convolution2D(filters=256, kernel_size = (5, 5), activation="relu"))
        
    model.add(MaxPooling2D(pool_size=(2,2)))
        
    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(512,activation="relu"))
        
    model.add(Dense(10,activation="softmax"))
    return model


def model_mnist(type=1):
    """
    Defines MNIST model using Keras sequential model
    """

    models = [modelA]

    return models[type]()

#### define model mnist using keras

In [None]:
model = model_mnist(type='models/modelA')