In [1]:
import datetime
from pathlib import Path
from collections import Counter

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras

from model import conv_block
from data import example_to_tensor, normalize, add_channel_axis, train_test_split
from utils import plot_slice, plot_animated_volume
from config import data_root_dir

%matplotlib inline
plt.rcParams["figure.figsize"] = [15, 7]

In [2]:
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
#
# policy = mixed_precision.Policy("mixed_float16")
##policy = mixed_precision.Policy("float32")
# mixed_precision.set_policy(policy)
# print("Compute dtype: %s" % policy.compute_dtype)
# print("Variable dtype: %s" % policy.variable_dtype)

In [3]:
downscaling = 2
if downscaling == 4:
    input_shape = (24, 128, 128, 1)
    neg_tfrecord_glob = "CT-0-0.25/*.tfrecord"
    pos_tfrecord_glob = "CT-[1-4]-0.25/*.tfrecord"
elif downscaling == 2:
    input_shape = (48, 256, 256, 1)
    neg_tfrecord_glob = "CT-0-0.5/*.tfrecord"
    pos_tfrecord_glob = "CT-[1-4]-0.5/*.tfrecord"
elif downscaling == 1:
    input_shape = (96, 512, 512, 1)
    neg_tfrecord_glob = "CT-0/*.tfrecord"
    pos_tfrecord_glob = "CT-[1-4]/*.tfrecord"
else:
    raise RuntimeError("Downscaling not supported")

epochs = 1000
patience = 20
batch_size = 4
learning_rate = 0.0001
dropout_rate = 0.0
seed = 5
val_perc = 0.12  # percentage from the already splitted training test
test_perc = 0.1

In [4]:
neg_tfrecord_fnames = [str(p) for p in Path(data_root_dir).glob(neg_tfrecord_glob)]
neg_x = (
    tf.data.TFRecordDataset(neg_tfrecord_fnames)
    .map(example_to_tensor, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .map(normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .map(add_channel_axis, num_parallel_calls=tf.data.experimental.AUTOTUNE)
)
# num_neg = sum(1 for _ in neg_x)
num_neg = 254
print(f"Number of negative samples: {num_neg}")
neg_x

Number of negative samples: 254


<ParallelMapDataset shapes: (None, None, None, 1), types: tf.float32>

In [5]:
pos_tfrecord_fnames = [str(p) for p in Path(data_root_dir).glob(pos_tfrecord_glob)]
pos_x = (
    tf.data.TFRecordDataset(pos_tfrecord_fnames)
    .map(example_to_tensor, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .map(normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .map(add_channel_axis, num_parallel_calls=tf.data.experimental.AUTOTUNE)
)
# num_pos = sum(1 for _ in pos_x)
num_pos = 856
print(f"Number of positive samples: {num_pos}")
pos_x

Number of positive samples: 856


<ParallelMapDataset shapes: (None, None, None, 1), types: tf.float32>

In [6]:
neg_y = tf.data.Dataset.from_tensors(tf.constant([0], dtype=tf.int8)).repeat(num_neg)
neg_dataset = tf.data.Dataset.zip((neg_x, neg_y))
neg_dataset

<ZipDataset shapes: ((None, None, None, 1), (1,)), types: (tf.float32, tf.int8)>

In [7]:
pos_y = tf.data.Dataset.from_tensors(tf.constant([1], dtype=tf.int8)).repeat(num_pos)
pos_dataset = tf.data.Dataset.zip((pos_x, pos_y))
pos_dataset

<ZipDataset shapes: ((None, None, None, 1), (1,)), types: (tf.float32, tf.int8)>

In [8]:
dataset = neg_dataset.concatenate(pos_dataset)
dataset, test_dataset = train_test_split(
    dataset,
    test_perc=test_perc,
    cardinality=(num_pos + num_neg),
    seed=seed,
)
test_dataset = test_dataset.padded_batch(1, (input_shape, (1,)))
train_dataset, val_dataset = train_test_split(
    dataset,
    test_perc=val_perc,
    cardinality=None,
    seed=seed,
)
val_dataset = (
    val_dataset.padded_batch(batch_size, (input_shape, (1,)), drop_remainder=True)
    .cache()
    .prefetch(tf.data.experimental.AUTOTUNE)
)
train_dataset = (
    train_dataset.padded_batch(batch_size, (input_shape, (1,)), drop_remainder=True)
    .cache()  # must be called before shuffle
    .shuffle(buffer_size=64, reshuffle_each_iteration=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)
train_dataset

<PrefetchDataset shapes: ((4, 48, 256, 256, 1), (4, 1)), types: (tf.float32, tf.int8)>

In [9]:
def count_labels(dataset):
    "Return a dictionary of the label count."
    return dict(Counter(label.numpy()[0] for _, label in dataset.unbatch()))


print(f"Train labels:\n\t{count_labels(train_dataset)}")
print(f"Validation labels:\n\t{count_labels(val_dataset)}")
print(f"Test labels:\n\t{count_labels(test_dataset)}")

Train labels:
	{0: 203, 1: 677}
Validation labels:
	{1: 95, 0: 21}
Test labels:
	{0: 30, 1: 81}


In [10]:
inputs = keras.layers.Input(input_shape)

x = conv_block(inputs, filters=32, dropout_rate=dropout_rate)
x = conv_block(x, filters=64, dropout_rate=dropout_rate)
x = conv_block(x, filters=128, dropout_rate=dropout_rate)

x = keras.layers.GlobalAveragePooling3D()(x)
x = keras.layers.Dense(
    512,
    kernel_initializer="lecun_normal",
    bias_initializer="lecun_normal",
    activation="selu",
)(x)
x = keras.layers.AlphaDropout(dropout_rate)(x)

outputs = keras.layers.Dense(
    1,
    activation="sigmoid",
)(x)

cnn = keras.Model(inputs, outputs, name="baseline-3dcnn")
cnn.summary()

Model: "baseline-3dcnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 48, 256, 256, 1)] 0         
_________________________________________________________________
conv3d (Conv3D)              (None, 48, 256, 256, 32)  896       
_________________________________________________________________
alpha_dropout (AlphaDropout) (None, 48, 256, 256, 32)  0         
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 24, 128, 128, 32)  0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 24, 128, 128, 64)  55360     
_________________________________________________________________
alpha_dropout_1 (AlphaDropou (None, 24, 128, 128, 64)  0         
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 12, 64, 64, 64) 

In [11]:
cnn.compile(
    optimizer=keras.optimizers.Adam(learning_rate),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[
        keras.metrics.TruePositives(name="tp"),
        keras.metrics.FalsePositives(name="fp"),
        keras.metrics.TrueNegatives(name="tn"),
        keras.metrics.FalseNegatives(name="fn"),
        keras.metrics.BinaryAccuracy(name="accuracy"),
        keras.metrics.Precision(name="precision"),
        keras.metrics.Recall(name="recall"),
        keras.metrics.AUC(name="auc"),
    ],
)

In [12]:
monitor_metric = "val_auc"

start_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
best_checkpoint = f"models/baseline-{start_time}.h5"
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    best_checkpoint, monitor=monitor_metric, mode="max", verbose=1, save_best_only=True
)
early_stopping_cb = keras.callbacks.EarlyStopping(
    monitor=monitor_metric, patience=patience, mode="max"
)
log_dir = f"logs/baseline-{start_time}"
file_writer = tf.summary.create_file_writer(log_dir)
tensorboard_cb = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir,
    histogram_freq=1,
    write_graph=False,
    profile_batch=0,
)
cnn.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=epochs,
    callbacks=[checkpoint_cb, early_stopping_cb, tensorboard_cb],
)
with file_writer.as_default():
    tf.summary.text(
        "Hyperparameters",
        f"{seed=}; "
        f"{downscaling=}; "
        f"{epochs=}; "
        f"{patience=}; "
        f"{batch_size=}; "
        f"{learning_rate=}; "
        f"{dropout_rate=}; "
        f"{val_perc=}; "
        f"{test_perc=}",
        step=0,
    )
cnn = keras.models.load_model(best_checkpoint)

Epoch 1/1000
    220/Unknown - 47s 213ms/step - loss: 0.5907 - tp: 654.0000 - fp: 196.0000 - tn: 7.0000 - fn: 23.0000 - accuracy: 0.7511 - precision: 0.7694 - recall: 0.9660 - auc: 0.5086
Epoch 00001: val_auc improved from -inf to 0.50175, saving model to models/baseline-20201025-120509.h5
Epoch 2/1000
Epoch 00002: val_auc improved from 0.50175 to 0.52682, saving model to models/baseline-20201025-120509.h5
Epoch 3/1000
Epoch 00003: val_auc did not improve from 0.52682
Epoch 4/1000
Epoch 00004: val_auc did not improve from 0.52682
Epoch 5/1000
Epoch 00005: val_auc improved from 0.52682 to 0.55013, saving model to models/baseline-20201025-120509.h5
Epoch 6/1000
Epoch 00006: val_auc did not improve from 0.55013
Epoch 7/1000
Epoch 00007: val_auc did not improve from 0.55013
Epoch 8/1000
Epoch 00008: val_auc did not improve from 0.55013
Epoch 9/1000
Epoch 00009: val_auc did not improve from 0.55013
Epoch 10/1000
Epoch 00010: val_auc did not improve from 0.55013
Epoch 11/1000
Epoch 00011: va

Epoch 14/1000
Epoch 00014: val_auc did not improve from 0.55013
Epoch 15/1000
Epoch 00015: val_auc did not improve from 0.55013
Epoch 16/1000
Epoch 00016: val_auc did not improve from 0.55013
Epoch 17/1000
Epoch 00017: val_auc did not improve from 0.55013
Epoch 18/1000
Epoch 00018: val_auc did not improve from 0.55013
Epoch 19/1000
Epoch 00019: val_auc did not improve from 0.55013
Epoch 20/1000
Epoch 00020: val_auc did not improve from 0.55013
Epoch 21/1000
Epoch 00021: val_auc did not improve from 0.55013
Epoch 22/1000
Epoch 00022: val_auc improved from 0.55013 to 0.55739, saving model to models/baseline-20201025-120509.h5
Epoch 23/1000
Epoch 00023: val_auc did not improve from 0.55739
Epoch 24/1000
Epoch 00024: val_auc did not improve from 0.55739
Epoch 25/1000
Epoch 00025: val_auc did not improve from 0.55739
Epoch 26/1000
Epoch 00026: val_auc did not improve from 0.55739


Epoch 27/1000
Epoch 00027: val_auc did not improve from 0.55739
Epoch 28/1000
Epoch 00028: val_auc did not improve from 0.55739
Epoch 29/1000
Epoch 00029: val_auc did not improve from 0.55739
Epoch 30/1000
Epoch 00030: val_auc did not improve from 0.55739
Epoch 31/1000
Epoch 00031: val_auc improved from 0.55739 to 0.57093, saving model to models/baseline-20201025-120509.h5
Epoch 32/1000
Epoch 00032: val_auc did not improve from 0.57093
Epoch 33/1000
Epoch 00033: val_auc did not improve from 0.57093
Epoch 34/1000
Epoch 00034: val_auc did not improve from 0.57093
Epoch 35/1000
Epoch 00035: val_auc did not improve from 0.57093
Epoch 36/1000
Epoch 00036: val_auc did not improve from 0.57093
Epoch 37/1000
Epoch 00037: val_auc did not improve from 0.57093
Epoch 38/1000
Epoch 00038: val_auc did not improve from 0.57093
Epoch 39/1000
Epoch 00039: val_auc did not improve from 0.57093


Epoch 40/1000
Epoch 00040: val_auc improved from 0.57093 to 0.58120, saving model to models/baseline-20201025-120509.h5
Epoch 41/1000
Epoch 00041: val_auc did not improve from 0.58120
Epoch 42/1000
Epoch 00042: val_auc did not improve from 0.58120
Epoch 43/1000
Epoch 00043: val_auc did not improve from 0.58120
Epoch 44/1000
Epoch 00044: val_auc did not improve from 0.58120
Epoch 45/1000
Epoch 00045: val_auc did not improve from 0.58120
Epoch 46/1000
Epoch 00046: val_auc did not improve from 0.58120
Epoch 47/1000
Epoch 00047: val_auc did not improve from 0.58120
Epoch 48/1000
Epoch 00048: val_auc did not improve from 0.58120
Epoch 49/1000
Epoch 00049: val_auc did not improve from 0.58120
Epoch 50/1000
Epoch 00050: val_auc did not improve from 0.58120
Epoch 51/1000
Epoch 00051: val_auc improved from 0.58120 to 0.59799, saving model to models/baseline-20201025-120509.h5
Epoch 52/1000
Epoch 00052: val_auc did not improve from 0.59799


Epoch 53/1000
Epoch 00053: val_auc did not improve from 0.59799
Epoch 54/1000
Epoch 00054: val_auc did not improve from 0.59799
Epoch 55/1000
Epoch 00055: val_auc did not improve from 0.59799
Epoch 56/1000
Epoch 00056: val_auc did not improve from 0.59799
Epoch 57/1000
Epoch 00057: val_auc did not improve from 0.59799
Epoch 58/1000
Epoch 00058: val_auc did not improve from 0.59799
Epoch 59/1000
Epoch 00059: val_auc did not improve from 0.59799
Epoch 60/1000
Epoch 00060: val_auc did not improve from 0.59799
Epoch 61/1000
Epoch 00061: val_auc did not improve from 0.59799
Epoch 62/1000
Epoch 00062: val_auc did not improve from 0.59799
Epoch 63/1000
Epoch 00063: val_auc did not improve from 0.59799
Epoch 64/1000
Epoch 00064: val_auc did not improve from 0.59799
Epoch 65/1000
Epoch 00065: val_auc did not improve from 0.59799


Epoch 66/1000
Epoch 00066: val_auc did not improve from 0.59799
Epoch 67/1000
Epoch 00067: val_auc did not improve from 0.59799
Epoch 68/1000
Epoch 00068: val_auc did not improve from 0.59799
Epoch 69/1000
Epoch 00069: val_auc did not improve from 0.59799
Epoch 70/1000
Epoch 00070: val_auc did not improve from 0.59799
Epoch 71/1000
Epoch 00071: val_auc improved from 0.59799 to 0.59950, saving model to models/baseline-20201025-120509.h5
Epoch 72/1000
Epoch 00072: val_auc did not improve from 0.59950
Epoch 73/1000
Epoch 00073: val_auc did not improve from 0.59950
Epoch 74/1000
Epoch 00074: val_auc did not improve from 0.59950
Epoch 75/1000
Epoch 00075: val_auc improved from 0.59950 to 0.60652, saving model to models/baseline-20201025-120509.h5
Epoch 76/1000
Epoch 00076: val_auc did not improve from 0.60652
Epoch 77/1000
Epoch 00077: val_auc did not improve from 0.60652
Epoch 78/1000
Epoch 00078: val_auc did not improve from 0.60652


Epoch 79/1000
Epoch 00079: val_auc improved from 0.60652 to 0.60852, saving model to models/baseline-20201025-120509.h5
Epoch 80/1000
Epoch 00080: val_auc did not improve from 0.60852
Epoch 81/1000
Epoch 00081: val_auc did not improve from 0.60852
Epoch 82/1000
Epoch 00082: val_auc did not improve from 0.60852
Epoch 83/1000
Epoch 00083: val_auc did not improve from 0.60852
Epoch 84/1000
Epoch 00084: val_auc improved from 0.60852 to 0.61353, saving model to models/baseline-20201025-120509.h5
Epoch 85/1000
Epoch 00085: val_auc did not improve from 0.61353
Epoch 86/1000
Epoch 00086: val_auc improved from 0.61353 to 0.62030, saving model to models/baseline-20201025-120509.h5
Epoch 87/1000
Epoch 00087: val_auc did not improve from 0.62030
Epoch 88/1000
Epoch 00088: val_auc did not improve from 0.62030
Epoch 89/1000
Epoch 00089: val_auc did not improve from 0.62030
Epoch 90/1000
Epoch 00090: val_auc did not improve from 0.62030
Epoch 91/1000
Epoch 00091: val_auc did not improve from 0.62030


Epoch 92/1000
Epoch 00092: val_auc did not improve from 0.62030
Epoch 93/1000
Epoch 00093: val_auc improved from 0.62030 to 0.62757, saving model to models/baseline-20201025-120509.h5
Epoch 94/1000
Epoch 00094: val_auc did not improve from 0.62757
Epoch 95/1000
Epoch 00095: val_auc did not improve from 0.62757
Epoch 96/1000
Epoch 00096: val_auc did not improve from 0.62757
Epoch 97/1000
Epoch 00097: val_auc improved from 0.62757 to 0.64737, saving model to models/baseline-20201025-120509.h5
Epoch 98/1000
Epoch 00098: val_auc did not improve from 0.64737
Epoch 99/1000
Epoch 00099: val_auc did not improve from 0.64737
Epoch 100/1000
Epoch 00100: val_auc did not improve from 0.64737
Epoch 101/1000
Epoch 00101: val_auc did not improve from 0.64737
Epoch 102/1000
Epoch 00102: val_auc did not improve from 0.64737
Epoch 103/1000
Epoch 00103: val_auc did not improve from 0.64737
Epoch 104/1000
Epoch 00104: val_auc improved from 0.64737 to 0.65263, saving model to models/baseline-20201025-12050

Epoch 105/1000
Epoch 00105: val_auc did not improve from 0.65263
Epoch 106/1000
Epoch 00106: val_auc did not improve from 0.65263
Epoch 107/1000
Epoch 00107: val_auc did not improve from 0.65263
Epoch 108/1000
Epoch 00108: val_auc did not improve from 0.65263
Epoch 109/1000
Epoch 00109: val_auc did not improve from 0.65263
Epoch 110/1000
Epoch 00110: val_auc did not improve from 0.65263
Epoch 111/1000
Epoch 00111: val_auc did not improve from 0.65263
Epoch 112/1000
Epoch 00112: val_auc did not improve from 0.65263
Epoch 113/1000
Epoch 00113: val_auc did not improve from 0.65263
Epoch 114/1000
Epoch 00114: val_auc did not improve from 0.65263
Epoch 115/1000
Epoch 00115: val_auc improved from 0.65263 to 0.65815, saving model to models/baseline-20201025-120509.h5
Epoch 116/1000
Epoch 00116: val_auc did not improve from 0.65815
Epoch 117/1000
Epoch 00117: val_auc improved from 0.65815 to 0.66266, saving model to models/baseline-20201025-120509.h5


Epoch 118/1000
Epoch 00118: val_auc did not improve from 0.66266
Epoch 119/1000
Epoch 00119: val_auc did not improve from 0.66266
Epoch 120/1000
Epoch 00120: val_auc did not improve from 0.66266
Epoch 121/1000
Epoch 00121: val_auc did not improve from 0.66266
Epoch 122/1000
Epoch 00122: val_auc did not improve from 0.66266
Epoch 123/1000
Epoch 00123: val_auc did not improve from 0.66266
Epoch 124/1000
Epoch 00124: val_auc did not improve from 0.66266
Epoch 125/1000
Epoch 00125: val_auc improved from 0.66266 to 0.66541, saving model to models/baseline-20201025-120509.h5
Epoch 126/1000
Epoch 00126: val_auc improved from 0.66541 to 0.66566, saving model to models/baseline-20201025-120509.h5
Epoch 127/1000
Epoch 00127: val_auc did not improve from 0.66566
Epoch 128/1000
Epoch 00128: val_auc did not improve from 0.66566
Epoch 129/1000
Epoch 00129: val_auc did not improve from 0.66566
Epoch 130/1000
Epoch 00130: val_auc did not improve from 0.66566


Epoch 131/1000
Epoch 00131: val_auc improved from 0.66566 to 0.68822, saving model to models/baseline-20201025-120509.h5
Epoch 132/1000
Epoch 00132: val_auc did not improve from 0.68822
Epoch 133/1000
Epoch 00133: val_auc did not improve from 0.68822
Epoch 134/1000
Epoch 00134: val_auc did not improve from 0.68822
Epoch 135/1000
Epoch 00135: val_auc improved from 0.68822 to 0.69073, saving model to models/baseline-20201025-120509.h5
Epoch 136/1000
Epoch 00136: val_auc did not improve from 0.69073
Epoch 137/1000
Epoch 00137: val_auc did not improve from 0.69073
Epoch 138/1000
Epoch 00138: val_auc did not improve from 0.69073
Epoch 139/1000
Epoch 00139: val_auc did not improve from 0.69073
Epoch 140/1000
Epoch 00140: val_auc did not improve from 0.69073
Epoch 141/1000
Epoch 00141: val_auc did not improve from 0.69073
Epoch 142/1000
Epoch 00142: val_auc improved from 0.69073 to 0.69348, saving model to models/baseline-20201025-120509.h5
Epoch 143/1000
Epoch 00143: val_auc did not improve 

Epoch 144/1000
Epoch 00144: val_auc did not improve from 0.69348
Epoch 145/1000
Epoch 00145: val_auc did not improve from 0.69348
Epoch 146/1000
Epoch 00146: val_auc did not improve from 0.69348
Epoch 147/1000
Epoch 00147: val_auc did not improve from 0.69348
Epoch 148/1000
Epoch 00148: val_auc did not improve from 0.69348
Epoch 149/1000
Epoch 00149: val_auc did not improve from 0.69348
Epoch 150/1000
Epoch 00150: val_auc did not improve from 0.69348
Epoch 151/1000
Epoch 00151: val_auc did not improve from 0.69348
Epoch 152/1000
Epoch 00152: val_auc did not improve from 0.69348
Epoch 153/1000
Epoch 00153: val_auc did not improve from 0.69348
Epoch 154/1000
Epoch 00154: val_auc did not improve from 0.69348
Epoch 155/1000
Epoch 00155: val_auc improved from 0.69348 to 0.69900, saving model to models/baseline-20201025-120509.h5
Epoch 156/1000
Epoch 00156: val_auc did not improve from 0.69900


Epoch 157/1000
Epoch 00157: val_auc did not improve from 0.69900
Epoch 158/1000
Epoch 00158: val_auc did not improve from 0.69900
Epoch 159/1000
Epoch 00159: val_auc improved from 0.69900 to 0.71178, saving model to models/baseline-20201025-120509.h5
Epoch 160/1000
Epoch 00160: val_auc did not improve from 0.71178
Epoch 161/1000
Epoch 00161: val_auc did not improve from 0.71178
Epoch 162/1000
Epoch 00162: val_auc improved from 0.71178 to 0.72682, saving model to models/baseline-20201025-120509.h5
Epoch 163/1000
Epoch 00163: val_auc did not improve from 0.72682
Epoch 164/1000
Epoch 00164: val_auc did not improve from 0.72682
Epoch 165/1000
Epoch 00165: val_auc did not improve from 0.72682
Epoch 166/1000
Epoch 00166: val_auc did not improve from 0.72682
Epoch 167/1000
Epoch 00167: val_auc did not improve from 0.72682
Epoch 168/1000
Epoch 00168: val_auc did not improve from 0.72682
Epoch 169/1000
Epoch 00169: val_auc did not improve from 0.72682


Epoch 170/1000
Epoch 00170: val_auc did not improve from 0.72682
Epoch 171/1000
Epoch 00171: val_auc did not improve from 0.72682
Epoch 172/1000
Epoch 00172: val_auc did not improve from 0.72682
Epoch 173/1000
Epoch 00173: val_auc did not improve from 0.72682
Epoch 174/1000
Epoch 00174: val_auc did not improve from 0.72682
Epoch 175/1000
Epoch 00175: val_auc did not improve from 0.72682
Epoch 176/1000
Epoch 00176: val_auc did not improve from 0.72682
Epoch 177/1000
Epoch 00177: val_auc did not improve from 0.72682
Epoch 178/1000
Epoch 00178: val_auc did not improve from 0.72682
Epoch 179/1000
Epoch 00179: val_auc did not improve from 0.72682
Epoch 180/1000
Epoch 00180: val_auc did not improve from 0.72682
Epoch 181/1000
Epoch 00181: val_auc did not improve from 0.72682
Epoch 182/1000
Epoch 00182: val_auc did not improve from 0.72682


In [None]:
# cnn = keras.models.load_model("models/baseline-20201025-005657.h5")

In [15]:
cnn.evaluate(test_dataset, verbose=1, return_dict=True)



{'loss': 0.3771935999393463,
 'tp': 663.0,
 'fp': 129.0,
 'tn': 74.0,
 'fn': 14.0,
 'accuracy': 0.8374999761581421,
 'precision': 0.8371211886405945,
 'recall': 0.9793205261230469,
 'auc': 0.8713499903678894}

In [None]:
x, y = next(iter(test_dataset.skip(0)))
prediction = cnn(x, training=False)
print(f"real: {y.numpy()}, prediction: {prediction.numpy()}")
plot_animated_volume(x[0, :], fps=2)

In [17]:
def prediction_bias(dataset):
    """Prediction bias is the difference
        average_labels - average_predictions
    
    It should be near zero.
    Return the tuple (label_avg, prediction_avg, prediction_bias)
    """
    label_avg = np.mean([label.numpy()[0] for _, label in dataset.unbatch()])

    def gen():
        for x, _ in dataset:
            yield x

    x_dataset = (
        tf.data.Dataset.from_generator(gen, tf.float32)
        .unbatch()
        .padded_batch(1, input_shape)
    )
    prediction_avg = np.mean([cnn(x, training=False).numpy()[0][0] for x in x_dataset])
    return label_avg, prediction_avg, np.abs(label_avg - prediction_avg)

In [20]:
l, p, b = prediction_bias(test_dataset)
print(f"Labels average: {l}")
print(f"Predictions average: {p}")
print(f"Prediction bias: {b}")

Labels average: 0.7297297297297297
Predictions average: 0.8031964898109436
Prediction bias: 0.07346676008121389
