In [1]:
import numpy as np
import os
import wget
from sklearn.model_selection import train_test_split
import tensorflow as tf
from training_utils import download_file, get_batches, read_and_decode_single_example, load_validation_data, \
    download_data, evaluate_model, get_training_data, load_weights, flatten, _conv2d_batch_norm
from inception_utils import _stem, _block_a, _block_b, _block_c, _reduce_a, _reduce_b
import argparse
from tensorboard import summary as summary_lib

  from ._conv import register_converters as _register_converters


In [2]:
epochs = 30
dataset = 9
init_model = None
how = "normal"
action = "train"
threshold = 0.5

In [3]:
# precalculated pixel mean of images
mu = 104.1353

# download the data
download_data(what=dataset)

batch_size = 32

train_files, total_records = get_training_data(what=dataset)

## Hyperparameters
# Small epsilon value for the BN transform
epsilon = 1e-8

# learning rate
epochs_per_decay = 10
starting_rate = 0.001
decay_factor = 0.80
staircase = True

# learning rate decay variables
steps_per_epoch = int(total_records / batch_size)
print("Steps per epoch:", steps_per_epoch)

# lambdas
lamC = 0.00000
lamF = 0.00250

# use dropout
dropout = False
fcdropout_rate = 0.5
convdropout_rate = 0.0
pooldropout_rate = 0.0

if how == "label":
    num_classes = 5
elif how == "normal":
    num_classes = 2
elif how == "mass":
    num_classes = 3
elif how == "benign":
    num_classes = 3

print("Number of classes:", num_classes)

Steps per epoch: 1366
Number of classes: 2


In [4]:
## Build the graph
graph = tf.Graph()

# whether to retrain model from scratch or use saved model
init = True
model_name = "vgg_16.3.01l.6"
# vgg_19.01 - attempting to recreate vgg 19 architecture
# vgg_16.02 - went to vgg 16 architecture, reducing units in fc layers
# vgg_16.2.01 - changing first conv layers to stride 2 to get dimensions down to reasonable size
# vgg_16.2.02 - using normal x-entropy instead of weighted
# vgg_16.3.01 - average pooling image before first conv, changing conv1 to stride 1

with graph.as_default():
    training = tf.placeholder(dtype=tf.bool, name="is_training")
    is_testing = tf.placeholder(dtype=bool, shape=(), name="is_testing")

    # create global step for decaying learning rate
    global_step = tf.Variable(0, trainable=False)

    learning_rate = tf.train.exponential_decay(starting_rate,
                                               global_step,
                                               steps_per_epoch * epochs_per_decay,
                                               decay_factor,
                                               staircase=staircase)

    with tf.name_scope('inputs') as scope:
        image, label = read_and_decode_single_example(train_files, label_type=how, normalize=False)

        X_def, y_def = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=2000,
                                              min_after_dequeue=1000)

        # Placeholders
        X = tf.placeholder_with_default(X_def, shape=[None, 299, 299, 1])
        y = tf.placeholder_with_default(y_def, shape=[None])

        X = tf.cast(X, dtype=tf.float32)

        # center the pixel data
        mu = tf.constant(mu, name="pixel_mean", dtype=tf.float32)
        X = tf.subtract(X, mu, name="centered_input")

    # input stem
    stem = _stem(X, lamC, training)

    # 4 Block As
    blocka = _block_a(stem, name="a_1.1", lamC=0.0, training = training)
    blocka = _block_a(blocka, name="a_1.2", lamC=0.0, training=training)
    blocka = _block_a(blocka, name="a_1.3", lamC=0.0, training=training)
    blocka = _block_a(blocka, name="a_1.4", lamC=0.0, training=training)

    # Reduction A
    reducea = _reduce_a(blocka, "a_reduce_1", k=192, l=224, m=256, n=384, training = training)

    # 7 Block Bs
    blockb = _block_b(reducea, "b_1.1", lamC=0.0, training = training)
    blockb = _block_b(blockb, "b_1.2", lamC=0.0, training=training)
    blockb = _block_b(blockb, "b_1.3", lamC=0.0, training=training)
    blockb = _block_b(blockb, "b_1.4", lamC=0.0, training=training)
    blockb = _block_b(blockb, "b_1.5", lamC=0.0, training=training)
    blockb = _block_b(blockb, "b_1.6", lamC=0.0, training=training)
    blockb = _block_b(blockb, "b_1.7", lamC=0.0, training=training)

    # Reduction B
    reduceb = _reduce_b(blockb, name="b_reduce_1", training = training)

    # 3 Block Cs
    blockc = _block_c(reduceb, name="c_1.1", lamC=0.0, training = training)
    blockc = _block_c(blockc, name="c_1.2", lamC=0.0, training=training)
    blockc = _block_c(blockc, name="c_1.3", lamC=0.0, training=training)

    # Global Average Pooling
    global_pool = tf.layers.average_pooling2d(
            blockc,  # Input
            pool_size=(8, 8),  # Pool size: 2x2
            strides=(8, 8),  # Stride: 2
            padding='SAME',  # "same" padding
            name='global_pool'
        )

    global_pool = tf.layers.dropout(global_pool, rate=0.2, seed=103, training=training)
    
    # flatten the output
    flat_output = tf.contrib.layers.flatten(global_pool)
    
    # Output layer
    logits = tf.layers.dense(
        flat_output,
        num_classes,  # One output unit per category
        activation=None,  # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=121),
        bias_initializer=tf.zeros_initializer(),
        name="fc_logits"
    )

    # get the fully connected variables so we can only train them when retraining the network
    fc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fc")

    # with tf.variable_scope('conv_1.1', reuse=True):
    #     conv_kernels1 = tf.get_variable('kernel')
    #     kernel_transposed = tf.transpose(conv_kernels1, [3, 0, 1, 2])
    #
    # with tf.variable_scope('visualization'):
    #     tf.summary.image('conv_1.1/filters', kernel_transposed, max_outputs=32, collections=["kernels"])

    # get the probabilites for the classes
    probabilities = tf.nn.softmax(logits, name="probabilities")

    # the probability that the scan is abnormal is 1 - probability it is normal
    abnormal_probability = (1 - probabilities[:, 0])

    if num_classes > 2:
        # the scan is abnormal if the probability is greater than the threshold
        #is_abnormal = tf.cast(tf.greater(abnormal_probability, threshold), tf.int64)

        # Compute predictions from the probabilities - if the scan is normal we ignore the other probabilities
        #predictions = is_abnormal * tf.argmax(probabilities[:,1:], axis=1, output_type=tf.int64)
        predictions = tf.argmax(logits, axis=1, output_type=tf.int64)
    else:
        predictions = tf.cast(tf.greater(abnormal_probability, threshold), tf.int32)
    
    print("Predictions:", predictions.shape)
    
    # get the accuracy
    accuracy, acc_op = tf.metrics.accuracy(
        labels=y,
        predictions=predictions,
        updates_collections=tf.GraphKeys.UPDATE_OPS,
        # metrics_collections=["summaries"],
        name="accuracy",
    )

    #########################################################
    ## Loss function options
    # Regular mean cross entropy
    mean_ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))

    #########################################################
    ## Weight the positive examples higher
    # This will weight the positive examples higher so as to improve recall
    #weights = tf.multiply(1, tf.cast(tf.greater(y, 0), tf.int32)) + 1
    #mean_ce = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels=y, logits=logits, weights=weights))

    # Add in l2 loss
    loss = mean_ce + tf.losses.get_regularization_loss()

    # Adam optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

    # Minimize cross-entropy
    train_op = optimizer.minimize(loss, global_step=global_step)

    # calculate recall
    if num_classes > 2:
        # collapse the predictions down to normal or not for our pr metrics
        zero = tf.constant(0, dtype=tf.int64)
        collapsed_predictions = tf.cast(tf.greater(predictions, zero), tf.int64)
        collapsed_labels = tf.greater(y, zero)

        recall, rec_op = tf.metrics.recall(labels=collapsed_labels, predictions=collapsed_predictions, updates_collections=tf.GraphKeys.UPDATE_OPS, name="recall")
        precision, prec_op = tf.metrics.precision(labels=collapsed_labels, predictions=collapsed_predictions, updates_collections=tf.GraphKeys.UPDATE_OPS, name="precision")

    else:
        recall, rec_op = tf.metrics.recall(labels=y, predictions=predictions, updates_collections=tf.GraphKeys.UPDATE_OPS, name="recall")
        precision, prec_op = tf.metrics.precision(labels=y, predictions=predictions, updates_collections=tf.GraphKeys.UPDATE_OPS, name="precision")

    f1_score = 2 * ((precision * recall) / (precision + recall))
    _, update_op = summary_lib.pr_curve_streaming_op(name='pr_curve',
                                                     predictions=(1 - probabilities[:, 0]),
                                                     labels=y,
                                                     updates_collections=tf.GraphKeys.UPDATE_OPS,
                                                     num_thresholds=20)

    tf.summary.scalar('recall_1', recall, collections=["summaries"])
    tf.summary.scalar('precision_1', precision, collections=["summaries"])
    tf.summary.scalar('f1_score', f1_score, collections=["summaries"])

    # Create summary hooks
    tf.summary.scalar('accuracy', accuracy, collections=["summaries"])
    tf.summary.scalar('cross_entropy', mean_ce, collections=["summaries"])
    tf.summary.scalar('learning_rate', learning_rate, collections=["summaries"])

    # add this so that the batch norm gets run
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # Merge all the summaries
    merged = tf.summary.merge_all("summaries")
    kernel_summaries = tf.summary.merge_all("kernels")
    per_epoch_summaries = [[]]

    print("Graph created...")

Instructions for updating:
Use the retry module or similar alternatives.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regulariz

ValueError: Shapes (?, 1, 2) and (?,) are incompatible