# Fully Connected Deep Neural Network
## 预处理

In [1]:
from others import load_all_dataset
X_train, y_train, X_test, y_test = load_all_dataset()
import numpy as np
np.set_printoptions(edgeitems=5,
                    linewidth=1000,
                    formatter={"float":lambda x: "{:.3f}".format(x)})

Train data
Optical Dataset composed of
46110 source samples
50862 source background samples
438 target labeled samples
8202 target unlabeled samples
29592 target background samples
 Optical Dataset labels composed of
46110 labels of source samples
438 labels of target samples

Test data
Optical Dataset composed of
0 source samples
0 source background samples
17758 target labeled samples
0 target unlabeled samples
47275 target background samples
 Optical Dataset labels composed of
0 labels of source samples
17758 labels of target samples



In [2]:
import tensorflow as tf
from tensorflow.keras import layers

In [3]:
# 去除NaN
class FeatureExtractor:
    def transform(self, X):
        '''
        Parameters
        ----------
        `X`: ndarray of (sample, 672, 10)
            3D input dataset(sample, time, features)
        
        Returns
        -------
        `X`: ndarray of (sample, 6720)
            The filtered dataset
        '''
        np.nan_to_num(X, copy=False)
        X = X.reshape(X.shape[0], -1)
        return X

fe = FeatureExtractor()

# 重命名
from copy import deepcopy
# 训练集
print("==== TRAIN SET ====")
X_source = deepcopy( fe.transform(X_train.source) )
print("  | X_source:", X_source.shape, end=" ; ")
y_source = deepcopy( y_train.source )
print("y_source:", y_source.shape)
X_source_bkg = deepcopy( fe.transform(X_train.source_bkg) )
print("A | X_source_bkg:", X_source_bkg.shape)
X_target = deepcopy( fe.transform(X_train.target) )
print("----")
print("  | X_target:", X_target.shape, end=" ; ")
y_target = deepcopy( y_train.target )
print("y_target:", y_target.shape)
X_target_bkg= deepcopy( fe.transform(X_train.target_bkg) )
print("B | X_target_bkg:", X_target_bkg.shape)
X_target_unlabeled = deepcopy( fe.transform(X_train.target_unlabeled) )
print("  | X_target_unlabeled:", X_target_unlabeled.shape)
# 测试集
print("==== TEST SET ====")
X_test.target = fe.transform(X_test.target)
print("  | X_test.target:", X_test.target.shape, end=" ; ")
print("y_test.target:", y_test.target.shape)
X_test.target_bkg = fe.transform(X_test.target_bkg)
print("B | X_test.target_bkg:", X_test.target_bkg.shape)
print("  | X_test.target_unlabeled:", X_test.target_unlabeled)

==== TRAIN SET ====
  | X_source: (46110, 6720) ; y_source: (46110,)
A | X_source_bkg: (50862, 6720)
----
  | X_target: (438, 6720) ; y_target: (438,)
B | X_target_bkg: (29592, 6720)
  | X_target_unlabeled: (8202, 6720)
==== TEST SET ====
  | X_test.target: (17758, 6720) ; y_test.target: (17758,)
B | X_test.target_bkg: (47275, 6720)
  | X_test.target_unlabeled: None


笔记:
- batch size 越大，同样多epoch下，acc 越小

In [4]:
# 将数据集转换为TensorFlow格式
train_dataset = tf.data.Dataset.from_tensor_slices((X_source, y_source)).batch(32)
valid_dataset = tf.data.Dataset.from_tensor_slices((X_target, y_target)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test.target, y_test.target))

# 额外操作
# train_dataset = train_dataset.map( lambda x, y: (tf.image.random_flip_left_right(x), y) ) # array must be 3D
train_dataset = train_dataset.repeat()
valid_dataset = valid_dataset.repeat()

## 搭建网络模型
参考资料

1. [Build your first Neural Network in TensorFlow 2](https://towardsdatascience.com/building-your-first-neural-network-in-tensorflow-2-tensorflow-for-hackers-part-i-e1e2f1dfe7a0)

In [5]:
# Sequential groups a linear stack of layers into a tf.keras.Model.
# https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
model = tf.keras.Sequential()
model.add( layers.Flatten(input_shape=(6720,), name="Input_Layer") )

num_fully_connected_layers = 10
for i in range(num_fully_connected_layers):
    model.add( layers.Dense(256, activation="relu", name="Layer{}".format(i+1)) )

model.add( layers.Dropout(0.5, name="Layer-1"))
model.add( layers.Dense(1, activation='sigmoid', name="Output_Layer") )

model.compile(optimizer="adam",
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.Precision(),
                       # tf.keras.metrics.PrecisionAtRecall(recall=0.1),
                       "acc",
                      ]
             )
# model.summary()

## 训练模型
- if 'softmax' in the last layer, output is 0 or 1
    - 'categorical_crossentropy' returns NaN, 'binary_crossentropy' acc ~ 0.1
- if `'sigmoid'` in the last layer, output is the probability of 1
    - 'categorical_crossentropy' returns NaN, `'binary_crossentropy'` acc ~ 0.8 to 0.9

In [6]:
import datetime as dt
callbacks = [
    # Write TensorBoard logs to `./logs` directory
    tf.keras.callbacks.TensorBoard(log_dir='./log/{}'.format(
        dt.datetime.now().strftime("%Y-%m-%d-%H-%M DNN")), write_images=True),
    ]
model.fit(train_dataset, epochs=10, steps_per_epoch=20,
          validation_data=valid_dataset, validation_steps=3,
          # callbacks=callbacks
          )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f00ad9acfd0>

## 预测概率

In [7]:
print("X_test.target.shape:", X_test.target.shape)
# X = X_test.target.reshape(X_test.target.shape[0], -1)
# print(X.shape)
y_pred = model.predict(X_test.target).transpose()
print("Predicted:", y_pred, y_pred.shape)
print("True:      ", y_test.target, y_test.target.shape)

X_test.target.shape: (17758, 6720)
Predicted: [[0.000 0.000 0.000 0.000 0.000 ... 0.000 0.002 0.000 0.000 0.001]] (1, 17758)
True:       [0.000 0.000 0.000 0.000 0.000 ... 0.000 1.000 0.000 0.000 1.000] (17758,)


## 查看Tensorboard

In [8]:
%tensorboard

UsageError: Line magic function `%tensorboard` not found.
