In [None]:
from pathlib import Path
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt

## Configuration

In [None]:
EXPERIMENT_NAME = 'densenet169-f192-s96-m16-rgb'
SHARD = 'Amy'

TRAIN_TEST_SPLIT = 0.8
BATCH_SIZE = 128
EPOCHS = 50
LR = 0.001

In [None]:
SLIDES_PATH = Path('./data/results')
EXPERIMENTS_PATH = Path('./data/experiments')
TARGET = str(EXPERIMENTS_PATH / EXPERIMENT_NAME)

## Prepare dataset sources

In [None]:
def get_sources():
    with open(Path(TARGET) / 'config.json') as fp:
        data = json.load(fp)

    if SHARD not in data['shards']:
        raise Exception('Invalid SHARD')

    sources = [
        str(SLIDES_PATH / slide / 'examples-f192-s96-m16.tfrecords')
        for name, shard in data['shards'].items()
        for slide in shard
        if name != SHARD
    ]

    size = int(len(sources) * TRAIN_TEST_SPLIT)
    training = set(np.random.choice(sources, size=size, replace=False))
    testing = set(sources).difference(training)   

    validating = [
        str(SLIDES_PATH / slide / 'examples-f192-s96-m16.tfrecords')
        for slide in data['shards'][SHARD]
    ]

    return training, testing, validating

train_sources, test_sources, validating = get_sources()
train_sources, test_sources, validating

In [None]:
for training_steps, _ in enumerate(tf.data.TFRecordDataset(filenames=list(train_sources))):
    pass
print(f'Training records: {training_steps}')

for testing_steps, _ in enumerate(tf.data.TFRecordDataset(filenames=list(test_sources))):
    pass
print(f'Testing records: {testing_steps}')

## Dataset

In [None]:
feature_description = {
    'data': tf.io.FixedLenFeature([], tf.string),
    'label': tf.io.FixedLenFeature([], tf.int64),
}

def _parse_function(proto):
    example = tf.io.parse_single_example(proto, feature_description)
    data = tf.io.decode_png(example['data'])
    data = tf.cast(data, dtype=tf.float32) * (1.0 / 255.0)
    data = tf.ensure_shape(data, (192, 192, 3))
    data = tf.clip_by_value(data, 0.0, 1.0)
    label = tf.cast(example['label'], dtype=tf.bool)
    label = tf.reshape(label, (1,))
    return data, label

def augment(inputs, target):
    rgb = inputs
    rgb = tf.image.random_flip_left_right(rgb)
    rgb = tf.image.random_flip_up_down(rgb)
    rgb = tf.image.random_brightness(rgb, 0.2)
    rgb = tf.image.random_contrast(rgb, 0.7, 1.4)
    rgb = tf.image.random_hue(rgb, 0.05)
    rgb = tf.image.random_saturation(rgb, 0.7, 1.2)
    rgb = tf.clip_by_value(rgb, 0.0, 1.0)
    return rgb, target

In [None]:
training = tf.data.TFRecordDataset(filenames=list(train_sources))
training = training.map(_parse_function, tf.data.AUTOTUNE)
training = training.shuffle(10000)
training = training.repeat()
training = training.map(augment, tf.data.AUTOTUNE)
training = training.batch(BATCH_SIZE)
training = training.prefetch(tf.data.AUTOTUNE)

testing = tf.data.TFRecordDataset(filenames=list(test_sources))
testing = testing.map(_parse_function, tf.data.AUTOTUNE)
testing = testing.repeat()
testing = testing.batch(BATCH_SIZE)
testing = testing.prefetch(tf.data.AUTOTUNE)

training, testing

## Setup DenseNet Model

In [None]:
from ki67.modules.cnn.utils.model import DenseNet

strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

with strategy.scope():
    model = DenseNet.create()
    model.compile(
        loss='binary_crossentropy',
        optimizer=tf.keras.optimizers.Adam(LR),
        metrics=[
            'accuracy',
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
        ],
    )

## Training

In [None]:
model_dir = Path(TARGET) / SHARD
model_dir.mkdir(exist_ok=True, parents=True)

tensorboard = tf.keras.callbacks.TensorBoard(str(model_dir / 'logs'), histogram_freq=1)
logger = tf.keras.callbacks.CSVLogger(str(model_dir / 'training.csv'))
checkpointer = tf.keras.callbacks.ModelCheckpoint(
    filepath=str(model_dir / 'weights.hdf5'),
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
)

history = model.fit(
    training,
    validation_data=testing,
    steps_per_epoch=(training_steps // BATCH_SIZE),
    validation_steps=(testing_steps // BATCH_SIZE),
    epochs=EPOCHS,
    callbacks=[checkpointer, logger, tensorboard],
)

model.save_weights(str(model_dir / 'training-weights.hdf5'))


### Validation

In [None]:
dataset = tf.data.TFRecordDataset(filenames=list(validating))
dataset = dataset.map(_parse_function, tf.data.AUTOTUNE)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
dataset

In [None]:
model = DenseNet.create()
model.load_weights(str(Path(TARGET) / SHARD / 'weights.hdf5'))

In [None]:
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(LR),
    metrics=[
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
    ],
)

In [None]:
loss, acc, precision, recall = model.evaluate(dataset, verbose=1)
print(f'Accuracy: {(acc*100):5.2f}%')