# [Domain Adversarial Neural Network in Tensorflow](https://github.com/sghoshjr/Domain-Adversarial-Neural-Network/blob/master/DANN.py)

# 预处理
## 装载数据

In [1]:
from others import load_all_dataset, rename_dataset
X_train, y_train, X_test, y_test = load_all_dataset(show=False)
import numpy as np
np.set_printoptions(edgeitems=5,
                    linewidth=1000,
                    formatter={"float":lambda x: "{:.3f}".format(x)})

Train data
Test data


## NaN值处理

In [2]:
from numpy import newaxis
import warnings
warnings.filterwarnings("ignore")
class FeatureExtractor:

    def __init__(self):
        pass

    def transform(self, X):
        ''' Deal with NaN and flatten the matrix to size (sample, 6720).
        Executed on every input data (i.e., source, bkg, target) and passed
        the resulting arrays to `fit`and `predict` methods in :class: Classifier

        Parameters
        ----------
        `X`: ndarray of (sample, 672, 10)
            3D input dataset(sample, time, features)
        
        Returns
        -------
        `X`: ndarray of (sample, 6720)
            The filtered dataset
        '''
        #! ATTENTION
        # The idea is supposed to eliminate the common columns filled entirely 
        # by NaN. But in this competition, since we don't have access to
        # `OpticalDataset` object, it's impossible to communicate informations
        # between datasets. So, here it deletes columns that are found on public
        # dataset.
        X = np.delete(X, [3,], axis=2)
        X = X.astype(np.float64)
        
        ## 1st round
        X1, nanmean = [], []
        for i in range(X.shape[0]):
            x = X[i]
            indice = ~np.isfinite(x)
            nanmean.append(np.nanmean(x, axis=0))

            # Columns with full Nan
            col_is_nan = np.all(indice, axis=0)
            if (col_is_nan == True).any():
                X1.append(x) # deal later
                continue
            
            # Rows with full Nan
            # Unachievable. Cause we don't have access to manipulate on labels
            # row_is_nan = np.all(indice, axis=1)
            # if (row_is_nan == True).any():
            #     row = np.where(row_is_nan == True)[0]
            #     if len(row) >= x.shape[0]/4: # drop sample, /2=85%+, /4=75%+
            #         continue
            
            # Columns with partial NaN
            part_is_nan = np.any(indice, axis=0)
            if (part_is_nan == True).any():
                col = np.where(part_is_nan == True)[0]
                # part_nan[i] = col[0]
                for c in col:
                    this = x[:,c]
                    finite = this[np.isfinite(this)]
                    fill = np.repeat(finite, np.ceil(len(this)/len(finite)))[:len(this)]
                    x[:,c] = np.where(np.isfinite(this), this, fill)
            
            # Construct new array
            X1.append(x)
        X1, nanmean = np.array(X1), np.array(nanmean)

        ## 2nd round
        candidate_mean = []
        for i in range(nanmean.shape[1]):
            col = nanmean[i]
            finite = col[np.isfinite(col)]
            candidate_mean.append(finite)

        X2 = []
        for i in range(X1.shape[0]):
            x = X[i]
            indice = ~np.isfinite(x)
            # Columns with full Nan
            col_is_nan = np.all(indice, axis=0)
            if (col_is_nan == True).any():
                col = np.where(col_is_nan == True)[0]
                for c in col:
                    value = np.random.choice(candidate_mean[c])
                    x = np.nan_to_num(x, nan=value)
            X2.append(x)
        
        X = np.array(X2)

        ## Final
        X = X[:,:,:,newaxis] # For CNN, ResNet, ...
        # X = X.reshape(X.shape[0], -1) # For DNN
        # print("Expected True:", np.all(np.isfinite(X))) # expected True
        return X

fe = FeatureExtractor()

[X_source, X_source_bkg, X_target, X_target_unlabeled, X_target_bkg,
    y_source, y_target, X_test] = rename_dataset(
    fe, X_train, y_train, X_test, y_test, show_imbalance=0)

==== TRAIN SET ====
  | X_source: (46110, 672, 9, 1) ; y_source: (46110,)
A | X_source_bkg: (50862, 672, 9, 1)
----
  | X_target: (438, 672, 9, 1) ; y_target: (438,)
B | X_target_bkg: (29592, 672, 9, 1)
  | X_target_unlabeled: (8202, 672, 9, 1)
==== TEST SET ====
  | X_test.target: (17758, 672, 9, 1) ; y_test.target: (17758,)
B | X_test.target_bkg: (47275, 672, 9, 1)
  | X_test.target_unlabeled: None


# 神经网络

In [3]:
import tensorflow as tf

print(tf.__version__)

import numpy as np

from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPool2D, BatchNormalization, Dropout

import os
import shutil
import sys


#CONSTANTS
BATCH_SIZE = 32
CHANNELS = 1
EPOCH = 5


#Prepare Datasets
y_source = y_source.reshape(-1, 1)
y_target = y_target.reshape(-1, 1)
y_test.target = y_test.target.reshape(-1, 1)

print(X_target.shape)
source_dataset = tf.data.Dataset.from_tensor_slices((X_source, y_source)).shuffle(100).batch(BATCH_SIZE, drop_remainder=True)
da_dataset = tf.data.Dataset.from_tensor_slices((X_source[:438], y_source[:438], X_target, y_target)).shuffle(100).batch(BATCH_SIZE, drop_remainder=True)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test.target, y_test.target)).batch(len(y_test.target)) #Test Dataset over Target Domain
test_dataset_used = tf.data.Dataset.from_tensor_slices((X_target, y_target)).shuffle(100).batch(BATCH_SIZE, drop_remainder=True) #Test Dataset over Target (used for training)

# source_dataset = source_dataset.repeat()
# da_dataset = da_dataset.repeat()
# test_dataset = test_dataset.repeat()
# test_dataset_used = test_dataset_used.repeat()

print("source_dataset:", source_dataset)
print("da_dataset", da_dataset)
print("test_dataset", test_dataset)
print("test_dataset_used", test_dataset_used)

2.5.0
(438, 672, 9, 1)
source_dataset: <BatchDataset shapes: ((32, 672, 9, 1), (32, 1)), types: (tf.float64, tf.float32)>
da_dataset <BatchDataset shapes: ((32, 672, 9, 1), (32, 1), (32, 672, 9, 1), (32, 1)), types: (tf.float64, tf.float32, tf.float64, tf.int64)>
test_dataset <BatchDataset shapes: ((None, 672, 9, 1), (None, 1)), types: (tf.float64, tf.float32)>
test_dataset2 <BatchDataset shapes: ((32, 672, 9, 1), (32, 1)), types: (tf.float64, tf.int64)>


In [13]:
a = list(test_dataset.as_numpy_iterator())
print(len(a))
print(a[0][0].shape)
print(a[0][1].shape)

1
(17758, 672, 9, 1)
(17758, 1)


# 搭网络

In [4]:
#Gradient Reversal Layer
@tf.custom_gradient
def gradient_reverse(x, lamda=1.0):
    y = tf.identity(x)
    
    def grad(dy):
        return lamda * -dy, None
    
    return y, grad


class GradientReversalLayer(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__()
    
    def call(self, x, lamda=1.0):
        return gradient_reverse(x, lamda)


class DANN(Model):
    def __init__(self):
        super().__init__()
        
        #Feature Extractor
        self.feature_extractor_layer0 = Conv2D(32, 2, activation='relu')
        self.feature_extractor_layer1 = BatchNormalization()
        self.feature_extractor_layer2 = MaxPool2D(pool_size=(2, 2),)
        
        self.feature_extractor_layer3 = Conv2D(64, 2, activation='relu')
        self.feature_extractor_layer4 = Dropout(0.5)
        self.feature_extractor_layer5 = BatchNormalization()
        self.feature_extractor_layer6 = MaxPool2D(pool_size=(2, 2),)
        
        #Label Predictor
        self.label_predictor_layer0 = Dense(100, activation='relu')
        self.label_predictor_layer1 = Dense(100, activation='relu')
        self.label_predictor_layer2 = Dense(1, activation=None)
        
        #Domain Predictor
        self.domain_predictor_layer0 = GradientReversalLayer()
        self.domain_predictor_layer1 = Dense(100, activation='relu')
        self.domain_predictor_layer2 = Dense(2, activation=None)
        
    def call(self, x, train=False, source_train=False, lamda=1.0):
        # print("x:", x)

        #Feature Extractor
        # x = tf.keras.Input(shape=(672, 9, 1), name="Input_Layer")
        x = self.feature_extractor_layer0(x)
        # x = self.feature_extractor_layer1(x, training=train)
        x = self.feature_extractor_layer2(x)
        
        x = self.feature_extractor_layer3(x)
        # x = self.feature_extractor_layer4(x, training=train)
        # x = self.feature_extractor_layer5(x, training=train)
        x = self.feature_extractor_layer6(x)
        
        print("x before feature:", x)
        feature = tf.reshape(x, [-1, 167 * 1 * 64])
        print("feature:", feature)
        
        #Label Predictor
        if source_train is True:
            feature_slice = feature
        else:
            feature_slice = tf.slice(feature, [0, 0], [feature.shape[0] // 2, -1])
        print("feature_slice", feature_slice)
        
        lp_x = self.label_predictor_layer0(feature_slice)
        lp_x = self.label_predictor_layer1(lp_x)
        l_logits = self.label_predictor_layer2(lp_x)
        print("l_logits", l_logits)
        
        #Domain Predictor
        if source_train is True:
            return l_logits
        else:
            dp_x = self.domain_predictor_layer0(feature, lamda)    #GradientReversalLayer
            dp_x = self.domain_predictor_layer1(dp_x)
            d_logits = self.domain_predictor_layer2(dp_x)
            print("d_logits", d_logits)
            
            return l_logits, d_logits


model = DANN()


def loss_func(input_logits, target_labels):
    # print("\tinput_logits:", type(input_logits), input_logits.shape)
    # print("\ttarget_labels:", type(target_labels), target_labels.shape)
    # return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=input_logits, labels=target_labels))
    # return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=input_logits, labels=target_labels))
    return tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_pred=input_logits, y_true=target_labels))

def get_loss(l_logits, labels, d_logits=None, domain=None):
    if d_logits is None:
        return loss_func(l_logits, labels)
    else:
        return loss_func(l_logits, labels) + loss_func(d_logits, domain)


model_optimizer = tf.optimizers.Adam() # tf.optimizers.SGD()

# 运行

In [5]:
domain_labels = np.vstack([np.tile([1., 0.], [BATCH_SIZE, 1]),
                           np.tile([0., 1.], [BATCH_SIZE, 1])])
domain_labels = domain_labels.astype('float32')


epoch_accuracy = tf.keras.metrics.Precision()
# epoch_accuracy = tf.keras.metrics.BinaryCrossentropy()
source_acc = []  # Source Domain Accuracy while Source-only Training
da_acc = []      # Source Domain Accuracy while DA-training
test_acc = []    # Testing Dataset (Target Domain) Accuracy 
test2_acc = []   # Target Domain (used for Training) Accuracy


@tf.function
def train_step_source(s_images, s_labels, lamda=1.0):
    pass


@tf.function
def train_step_da(s_images, s_labels, t_images=None, t_labels=None, lamda=1.0):
    print("\n---- train_step_da")
    images = tf.concat([s_images, t_images], 0)
    labels = s_labels
    # print("\timages", images, images[:5])
    # print("\tlabels", labels, labels[:5])
    
    with tf.GradientTape() as tape:
        output = model(images, train=True, source_train=False, lamda=lamda)
        
        model_loss = get_loss(output[0], labels, output[1], domain_labels)
        epoch_accuracy(output[0], labels)
        
    gradients_mdan = tape.gradient(model_loss, model.trainable_variables)
    model_optimizer.apply_gradients(zip(gradients_mdan, model.trainable_variables))


@tf.function
def test_step(t_images, t_labels):
    print("\n---- test_step")
    images = t_images
    labels = t_labels
    
    output = model(images, train=False, source_train=True)
    epoch_accuracy(output, labels)


def train(train_mode, epochs=EPOCH):
    
    if train_mode == 'source':
        dataset = source_dataset
        train_func = train_step_source
        acc_list = source_acc
    elif train_mode == 'domain-adaptation':
        dataset = da_dataset
        train_func = train_step_da
        acc_list = da_acc
    else:
        raise ValueError("Unknown training Mode")
    
    for epoch in range(epochs):
        print("\n============ EPOCH {} ============".format(epoch))
        p = float(epoch) / epochs
        lamda = 2 / (1 + np.exp(-100 * p, dtype=np.float32)) - 1
        lamda = lamda.astype('float32')

        # print("dataset", dataset)
        for batch in dataset:
            # print("\tbatch length:", len(batch), "batch[0]:", batch[0].shape)
            train_func(*batch, lamda=lamda)
        
        print("Training: Epoch {} :\t Source Accuracy : {:.3%}".format(epoch, epoch_accuracy.result()), end='  |  ')
        acc_list.append(epoch_accuracy.result())
        test()
        epoch_accuracy.reset_states()
        print("============ END EPOCH ============", end="\n")


def test():
    epoch_accuracy.reset_states()
    
    #Testing Dataset (Target Domain)
    for batch in test_dataset:
        test_step(*batch)
        
    print("[Target] Metric: {:.3%}".format(epoch_accuracy.result()), end='  |  ')
    test_acc.append(epoch_accuracy.result())
    epoch_accuracy.reset_states()
    
    #Target Domain (used for Training)
    for batch in test_dataset_used:
        test_step(*batch)
    
    print("[Target] Metric (used for training): {:.3%}".format(epoch_accuracy.result()))
    test2_acc.append(epoch_accuracy.result())
    epoch_accuracy.reset_states()


## 训练
#train('source', 5)

train('domain-adaptation', EPOCH)



---- train_step_da
x before feature: Tensor("dann/max_pooling2d_1/MaxPool:0", shape=(64, 167, 1, 64), dtype=float32)
feature: Tensor("dann/Reshape:0", shape=(64, 10688), dtype=float32)
feature_slice Tensor("dann/Slice:0", shape=(32, 10688), dtype=float32)
l_logits Tensor("dann/dense_2/BiasAdd:0", shape=(32, 1), dtype=float32)
d_logits Tensor("dann/dense_4/BiasAdd:0", shape=(64, 2), dtype=float32)

---- train_step_da
x before feature: Tensor("dann/max_pooling2d_1/MaxPool:0", shape=(64, 167, 1, 64), dtype=float32)
feature: Tensor("dann/Reshape:0", shape=(64, 10688), dtype=float32)
feature_slice Tensor("dann/Slice:0", shape=(32, 10688), dtype=float32)
l_logits Tensor("dann/dense_2/BiasAdd:0", shape=(32, 1), dtype=float32)
d_logits Tensor("dann/dense_4/BiasAdd:0", shape=(64, 2), dtype=float32)
Training: Epoch 0 :	 Source Accuracy : 100.000%  |  
---- test_step
x before feature: Tensor("dann/max_pooling2d_1/MaxPool:0", shape=(17758, 167, 1, 64), dtype=float32)
feature: Tensor("dann/Reshap

# 效果评价

In [14]:
print("X_test.target.shape:", X_test.target.shape)
# X = X_test.target.reshape(X_test.target.shape[0], -1)
# print(X.shape)


#Testing Dataset (Target Domain)
for batch in test_dataset:
    test_step(*batch)
    
print("[Target] Metric: {:.3%}".format(epoch_accuracy.result()), end='  |  ')
test_acc.append(epoch_accuracy.result())
epoch_accuracy.reset_states()



X_test.target.shape: (17758, 672, 9, 1)
[Target] Metric: 100.000%  |  

In [7]:
%tensorboard

UsageError: Line magic function `%tensorboard` not found.
