In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tqdm import trange

In [2]:
ds = np.load('dataset/8_1_0_1024.npz', allow_pickle=True)
db_train, db_test = ds['pahdb']
fp_train, fp_test = ds['fingerprint']
spec_train, spec_test = ds['spectrum']

spec_train = spec_train.values
spec_test = spec_test.values

fp_train = tf.sparse.to_dense(fp_train)
fp_test = tf.sparse.to_dense(fp_test)

train_size = db_train.shape[0]
test_size = db_test.shape[0]

train_ds = tf.data.Dataset.from_tensor_slices((spec_train, fp_train)).shuffle(train_size)
test_ds = tf.data.Dataset.from_tensor_slices((spec_test, fp_test)).batch(test_size)

In [3]:
class DiceLoss:
    def __init__(self, smooth=1e-5):
        self.smooth = smooth

    def __call__(self, y_true, y_pred):
        y_true = tf.cast(y_true, dtype=tf.float32)
        return 1 - (2 * tf.reduce_sum(y_true * y_pred, axis=-1) + self.smooth) / (
            tf.reduce_sum(y_true ** 2, axis=-1) + tf.reduce_sum(y_pred ** 2, axis=-1) + self.smooth)

In [4]:
INPUT = tf.keras.layers.Input(shape=(spec_train.shape[1],))

X = tf.keras.layers.Reshape((spec_train.shape[1], 1))(INPUT)
X = tf.keras.layers.Conv1D(32, 9, activation='relu')(X)
X = tf.keras.layers.MaxPooling1D(pool_size=2)(X)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Conv1D(64, 5, activation='relu')(X)
X = tf.keras.layers.MaxPooling1D(pool_size=2)(X)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Conv1D(128, 5, activation='relu')(X)
X = tf.keras.layers.MaxPooling1D(pool_size=2)(X)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Conv1D(256, 5, activation='relu')(X)
X = tf.keras.layers.MaxPooling1D(pool_size=2)(X)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Flatten()(X)
X = tf.keras.layers.Dropout(0.5)(X)
X = tf.keras.layers.Dense(1024, activation='relu')(X)
X = tf.keras.layers.Dropout(0.25)(X)

OUT = tf.keras.layers.Dense(fp_train.shape[1], activation='sigmoid')(X)

model = tf.keras.models.Model(inputs=INPUT, outputs=OUT)

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1024)]            0         
_________________________________________________________________
reshape (Reshape)            (None, 1024, 1)           0         
_________________________________________________________________
conv1d (Conv1D)              (None, 1016, 32)          320       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 508, 32)           0         
_________________________________________________________________
batch_normalization (BatchNo (None, 508, 32)           128       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 504, 64)           10304     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 252, 64)           0     

In [5]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), metrics=[DiceLoss()])
model.fit(train_ds.batch(128, drop_remainder=True), epochs=20, validation_data=test_ds, verbose=2)

Epoch 1/20
25/25 - 5s - loss: 0.2119 - dice_loss: 0.8832 - val_loss: 0.4861 - val_dice_loss: 0.9885
Epoch 2/20
25/25 - 2s - loss: 0.0181 - dice_loss: 0.7593 - val_loss: 0.3162 - val_dice_loss: 0.9788
Epoch 3/20
25/25 - 2s - loss: 0.0137 - dice_loss: 0.7227 - val_loss: 0.2763 - val_dice_loss: 0.9748
Epoch 4/20
25/25 - 2s - loss: 0.0111 - dice_loss: 0.6991 - val_loss: 0.2471 - val_dice_loss: 0.9717
Epoch 5/20
25/25 - 2s - loss: 0.0104 - dice_loss: 0.6740 - val_loss: 0.2281 - val_dice_loss: 0.9686
Epoch 6/20
25/25 - 2s - loss: 0.0098 - dice_loss: 0.6543 - val_loss: 0.2133 - val_dice_loss: 0.9659
Epoch 7/20
25/25 - 2s - loss: 0.0095 - dice_loss: 0.6351 - val_loss: 0.2099 - val_dice_loss: 0.9655
Epoch 8/20
25/25 - 2s - loss: 0.0091 - dice_loss: 0.6176 - val_loss: 0.2110 - val_dice_loss: 0.9660
Epoch 9/20
25/25 - 2s - loss: 0.0089 - dice_loss: 0.6016 - val_loss: 0.2191 - val_dice_loss: 0.9680
Epoch 10/20
25/25 - 2s - loss: 0.0086 - dice_loss: 0.5917 - val_loss: 0.2307 - val_dice_loss: 0.9703

<keras.callbacks.History at 0x26628504220>

In [6]:
%%time

model.compile(loss=DiceLoss(), optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=[])
model.fit(train_ds.batch(64, drop_remainder=True), epochs=1000, validation_data=test_ds, verbose=2, callbacks=[
    tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=10, verbose=1, min_delta=1e-5, min_lr=1e-6),
    tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=1e-5, patience=50)])

# loss: 0.0204 - val_loss: 0.2536  272  655

Epoch 1/1000
50/50 - 3s - loss: 0.5846 - val_loss: 0.8209
Epoch 2/1000
50/50 - 2s - loss: 0.4645 - val_loss: 0.7545
Epoch 3/1000
50/50 - 2s - loss: 0.4065 - val_loss: 0.7556
Epoch 4/1000
50/50 - 2s - loss: 0.3560 - val_loss: 0.7549
Epoch 5/1000
50/50 - 2s - loss: 0.3100 - val_loss: 0.6581
Epoch 6/1000
50/50 - 2s - loss: 0.2642 - val_loss: 0.6184
Epoch 7/1000
50/50 - 2s - loss: 0.2189 - val_loss: 0.5882
Epoch 8/1000
50/50 - 2s - loss: 0.1803 - val_loss: 0.6015
Epoch 9/1000
50/50 - 2s - loss: 0.1496 - val_loss: 0.6411
Epoch 10/1000
50/50 - 2s - loss: 0.1270 - val_loss: 0.6041
Epoch 11/1000
50/50 - 2s - loss: 0.1097 - val_loss: 0.5858
Epoch 12/1000
50/50 - 2s - loss: 0.0970 - val_loss: 0.6550
Epoch 13/1000
50/50 - 2s - loss: 0.0862 - val_loss: 0.5341
Epoch 14/1000
50/50 - 2s - loss: 0.0783 - val_loss: 0.4925
Epoch 15/1000
50/50 - 2s - loss: 0.0712 - val_loss: 0.5795
Epoch 16/1000
50/50 - 2s - loss: 0.0659 - val_loss: 0.5489
Epoch 17/1000
50/50 - 2s - loss: 0.0614 - val_loss: 0.6147
Epoch 

Epoch 138/1000
50/50 - 2s - loss: 0.0229 - val_loss: 0.7673
Epoch 139/1000
50/50 - 2s - loss: 0.0230 - val_loss: 0.7323
Epoch 140/1000
50/50 - 2s - loss: 0.0228 - val_loss: 0.6174
Epoch 141/1000
50/50 - 2s - loss: 0.0228 - val_loss: 0.7077
Epoch 142/1000
50/50 - 2s - loss: 0.0229 - val_loss: 0.5365
Epoch 143/1000
50/50 - 2s - loss: 0.0229 - val_loss: 0.4299
Epoch 144/1000
50/50 - 2s - loss: 0.0229 - val_loss: 0.5799

Epoch 00144: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 145/1000
50/50 - 2s - loss: 0.0226 - val_loss: 0.5579
Epoch 146/1000
50/50 - 2s - loss: 0.0224 - val_loss: 0.3765
Epoch 147/1000
50/50 - 2s - loss: 0.0222 - val_loss: 0.3686
Epoch 148/1000
50/50 - 2s - loss: 0.0223 - val_loss: 0.2733
Epoch 149/1000
50/50 - 2s - loss: 0.0221 - val_loss: 0.3008
Epoch 150/1000
50/50 - 2s - loss: 0.0220 - val_loss: 0.3543
Epoch 151/1000
50/50 - 2s - loss: 0.0220 - val_loss: 0.3577
Epoch 152/1000
50/50 - 2s - loss: 0.0218 - val_loss: 0.3660
Epoch 153/1000
50/5

Epoch 271/1000
50/50 - 2s - loss: 0.0207 - val_loss: 0.2613
Epoch 272/1000
50/50 - 2s - loss: 0.0206 - val_loss: 0.3228
Epoch 273/1000
50/50 - 2s - loss: 0.0206 - val_loss: 0.3295
Epoch 274/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2665
Epoch 275/1000
50/50 - 2s - loss: 0.0206 - val_loss: 0.2759
Epoch 276/1000
50/50 - 2s - loss: 0.0206 - val_loss: 0.2993
Epoch 277/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.3225

Epoch 00277: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 278/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2759
Epoch 279/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2939
Epoch 280/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2657
Epoch 281/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2586
Epoch 282/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2549
Epoch 283/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2688
Epoch 284/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2729
Epoch 285/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2598
Epoch 286/1000
50/5

Epoch 400/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2532
Epoch 401/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2534
Epoch 402/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2533
Epoch 403/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2530
Epoch 404/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2534
Epoch 405/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2534
Epoch 406/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2532
Epoch 407/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2535
Epoch 408/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2539
Epoch 409/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2539
Epoch 410/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2536
Epoch 411/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2537
Epoch 412/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2542
Epoch 413/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2542
Epoch 414/1000
50/50 - 2s - loss: 0.0204 - val_loss: 0.2539
Epoch 415/1000
50/50 - 2s - loss: 0.0205 - val_loss: 0.2537
Epoch 416/1000
50/50 - 2s - loss: 0.0205

<keras.callbacks.History at 0x2667f7530a0>

In [7]:
def dice_coef(y_true, y_pred, smooth=1e-5):
    return (2 * np.sum(y_true * y_pred, axis=-1) + smooth) / (np.sum(y_true ** 2, axis=-1) + np.sum(y_pred ** 2, axis=-1) + smooth)

In [8]:
fp = np.r_[fp_train, fp_test]
test_pred = model.predict(spec_test, batch_size=test_size)

mat = np.zeros((test_size, train_size + test_size))

for i in trange(test_size):
    mat[i] = dice_coef(fp, test_pred[[i]])

mat.shape

100%|████████████████████████████████████████████████████████████████████████████████| 802/802 [11:37<00:00,  1.15it/s]


(802, 4007)

In [9]:
(mat.argmax(axis=1) - train_size == range(test_size)).sum()

272

In [10]:
m = mat.argsort()[:, -10:]
np.array([i + train_size in m[i] for i in range(test_size)]).sum()

655

In [11]:
# model.save_weights('weights/fp_pred_base__8_1_0_1024__655')