### Imports and helper functions

In [None]:
from __future__ import print_function
from keras.layers import Input, Dense, Dropout, Flatten, Lambda
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Model
from keras.utils import np_utils
from keras.datasets import mnist
from keras.layers import GradientReversalLayer
from keras.engine.training import make_batches
from keras.datasets import mnist_m
import keras.backend as K
import numpy as np
import warnings
warnings.filterwarnings("ignore")


''' This class will build the necessary architecture of a domain adaptation model as 
proposed by Ganin (2016). Please note that to run this model, you have to update the 
keras module with the files provided in the keras/ folder.
'''
class DANNBuilder(object):
    def __init__(self):
        self.model = None
        self.net = None
        self.domain_invariant_features = None
        self.grl = None
        self.opt = SGD()

    def _build_feature_extractor(self, model_input):
        '''Build segment of net for feature extraction.'''
        net = Convolution2D(nb_filters, nb_conv, nb_conv,
                            border_mode='valid',
                            activation='relu')(model_input)
        net = Convolution2D(nb_filters, nb_conv, nb_conv,
                            activation='relu')(net)
        net = MaxPooling2D(pool_size=(nb_pool, nb_pool))(net)
        net = Dropout(0.5)(net)
        net = Flatten()(net)
        self.domain_invariant_features = net
        return net

    def _build_classifier(self, model_input):
        net = Dense(128, activation='relu')(model_input)
        net = Dropout(0.5)(net)
        net = Dense(nb_classes, activation='softmax',
                    name='classifier_output')(net)
        return net

    def build_source_model(self, main_input, plot_model=False):
        net = self._build_feature_extractor(main_input)
        net = self._build_classifier(net)
        model = Model(input=main_input, output=net)
        if plot_model:
            plot(model, show_shapes=True)
        model.compile(loss={'classifier_output': 'categorical_crossentropy'},
                      optimizer=self.opt, metrics=['accuracy'])
        return model

    def build_dann_model(self, main_input, plot_model=False):
        net = self._build_feature_extractor(main_input)
        self.grl = GradientReversalLayer()
        branch = self.grl(net)
        branch = Dense(128, activation='relu')(branch)
        branch = Dropout(0.1)(branch)
        branch = Dense(2, activation='softmax', name='domain_output')(branch)

        # When building DANN model, route first half of batch (source examples)
        # to domain classifier, and route full batch (half source, half target)
        # to the domain classifier.
        net = Lambda(lambda x: K.switch(K.learning_phase(), x[:int(batch_size / 2), :], x, lazy=True),
                     output_shape=lambda x: ((batch_size / 2,) + x[1:]))(net)

        net = self._build_classifier(net)
        model = Model(input=main_input, output=[branch, net])
        if plot_model:
            plot(model, show_shapes=True)
        model.compile(loss={'classifier_output': 'categorical_crossentropy',
                      'domain_output': 'categorical_crossentropy'},
                      optimizer=self.opt, metrics=['accuracy'])
        return model

    def build_tsne_model(self, main_input):
        '''Create model to output intermediate layer
        activations to visualize domain invariant features'''
        tsne_model = Model(input=main_input,
                           output=self.domain_invariant_features)
        return tsne_model


def batch_gen(batches, id_array, data, labels):
    for batch_index, (batch_start, batch_end) in enumerate(batches):
        batch_ids = id_array[batch_start:batch_end]
        if labels is not None:
            yield data[batch_ids], labels[batch_ids]
        else:
            yield data[batch_ids]
        np.random.shuffle(id_array)


def evaluate_dann(X_test, batch_size):
    """Predict batch by batch."""
    size = batch_size / 2
    num_batches = X_test.shape[0] / size
    acc = 0
    for i in range(0, num_batches):
        _, prob = dann_model.predict_on_batch(X_test[i * size:i * size + size])
        predictions = np.argmax(prob, axis=1)
        actual = np.argmax(y_test[i * size:i * size + size], axis=1)
        acc += float(np.sum((predictions == actual))) / size
    return acc / num_batches

### Processing

For images to be used with neural networks, some basic processing has to be done to the images. The data has to be 32bit floats, normalized and be 3D (RGB). The MNIST data used below does not need to be rescaled (is quite low dimensional), but usually normal images need to be downscaled to the ~250px^2 range.

In [None]:
# Process source data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
y_train = np_utils.to_categorical(y_train, nb_classes)
y_test = np_utils.to_categorical(y_test, nb_classes)

# Process target data
mnistm = mnist_m.load_data()

XT_test = np.swapaxes(np.swapaxes(mnistm[b'test'], 1, 3), 2, 3).astype('float32') / 255
XT_train = np.swapaxes(np.swapaxes(mnistm[b'train'], 1, 3), 2, 3).astype('float32') / 255

X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
X_train = np.concatenate([X_train, X_train, X_train], axis=1).astype('float32') / 255
X_test = np.concatenate([X_test, X_test, X_test], axis=1).astype('float32') / 255

# Get domain labels
domain_labels = np.vstack([np.tile([0, 1], [int(batch_size / 2), 1]),
                           np.tile([1, 0], [int(batch_size / 2), 1])])

### Model parameters

In [None]:
batch_size = 128
nb_epoch = 15
nb_classes = 10
img_rows, img_cols = 28, 28
nb_filters = 32
nb_pool = 2
nb_conv = 3

### Training

In [None]:
main_input = Input(shape=(3, img_rows, img_cols), name='main_input')
src_model = DANNBuilder.build_source_model(main_input)
dann_model = DANNBuilder.build_dann_model(main_input)

print('Training source only model')
src_model.fit(X_train, y_train, batch_size=64, nb_epoch=10, verbose=1,
              validation_data=(X_test, y_test))
print('Evaluating target samples on source-only model')
print('Accuracy: ', src_model.evaluate(XT_test, y_test)[1])

# Broken out training loop for a DANN model.
src_index_arr = np.arange(X_train.shape[0])
target_index_arr = np.arange(XT_train.shape[0])

batches_per_epoch = len(X_train) / batch_size
num_steps = nb_epoch * batches_per_epoch
j = 0

print('Training DANN model')

for i in range(nb_epoch):

    batches = make_batches(X_train.shape[0], batch_size / 2)
    target_batches = make_batches(XT_train.shape[0], batch_size / 2)

    src_gen = batch_gen(batches, src_index_arr, X_train, y_train)
    target_gen = batch_gen(target_batches, target_index_arr, XT_train, None)

    losses = list()
    acc = list()

    print('Epoch ', i)

    for (xb, yb) in src_gen:

        # Update learning rate as described in the paper.
        p = float(j) / num_steps
        lr = 0.01 / (1. + 10 * p)**0.75
        builder.opt.lr = lr

        if xb.shape[0] != batch_size / 2:
            continue

        try:
            xt = target_gen.next()
        except:
            # Regeneration
            target_gen = target_gen(target_batches, target_index_arr, XT_train,
                                    None)

        # Concatenate source and target batch
        xb = np.vstack([xb, xt])

        metrics = dann_model.train_on_batch({'main_input': xb},
                                            {'classifier_output': yb,
                                            'domain_output': domain_labels},
                                            check_batch_dim=False)
        j += 1

### Evaluation

In [None]:
print('Evaluating target samples on DANN model')
acc = evaluate_dann(XT_test, batch_size)
print('Accuracy:', acc)