In [21]:
import tensorflow as tf, re, math
import time
from tensorflow.keras.layers import Conv2D, BatchNormalization, Dense, MaxPooling2D
from tensorflow.keras import Input
from tensorflow.keras.layers import AvgPool2D, GlobalAveragePooling2D, MaxPool2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import ReLU, concatenate
import tensorflow.keras.backend as K
import numpy as np                                    
import pandas as pd 
import os
import random
import cv2
from keras_tqdm import TQDMNotebookCallback
from tensorflow.keras.preprocessing.image import ImageDataGenerator,img_to_array
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import warnings
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
warnings.filterwarnings("ignore")
import tensorflow.keras.backend as K
from functools import partial

In [22]:
DEVICE = "GPU"
if DEVICE == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    physical_devices = tf.config.list_physical_devices('GPU')
    print(physical_devices)
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

Using default strategy for CPU and single GPU
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Num GPUs Available:  1
REPLICAS: 1


## Different Network Architectures:
### Generalized Sequental Network

In [23]:
class SequentialNet:
    def __init__(self, input_shape,n_classes, filters=32):
        self.input_shape = input_shape
        self.n_classes = n_classes
        self.filters = filters
        self.model = tf.keras.Sequential()
        
        # self.topology_dict = topology_dict # contains the layers = which function is called when.
    def build_self(self, topology_list):
        self.model.add(tf.keras.Input(shape=self.input_shape))
        for layer in topology_list:
            self.model.add(layer)
        self.model.add(Dense(self.n_classes))
        opt = tf.keras.optimizers.Adam(learning_rate=0.001)
        loss = tf.keras.losses.SparseCategoricalCrossentropy() 
        self.model.compile(optimizer=opt,loss=loss,metrics=['accuracy'])
        return self.model
        
        # self.model.add()


### Dense Network

In [36]:
def densenet(input_shape, n_classes, activation = "softmax", filters = 32):
  # Same sequence for each convolutional block after the input
    def bn_rl_conv(x, filters, kernel=3, strides=2):
        x = BatchNormalization()(x)
        x = ReLU()(x)
        x = Conv2D(filters, kernel, strides=strides, padding = "same")(x)
        return x
  
    def dense_block(x, repetition):
    # Each dense block has 2 convolutions with 1x1 and 3x3 kernels
    # Each block is run for the 6,12,24,16
        for _ in range(repetition):
            y = bn_rl_conv(x, 4*filters)    # Every 1x1 convolutions has 4-times the number of filters
            y = bn_rl_conv(y, filters, 3)   # But 3x3 filters are oly present once
        return x
  
    def transition_layer(x):
        # Remove channels to half of the existing channels 
        x = bn_rl_conv(x, K.int_shape(x)[-1]//2)        # 1x1 convolution layer
        x = AvgPool2D(2, strides=2, padding='same')(x)  # 2x2 average poolling layer with strid of 2
        return x

    input = Input(input_shape)
    # 1st convolution block with 64 filters of size 7x7 & a stride of 2:
    x = Conv2D(64, 7, strides = 2, padding = "same")(input)
    # Max pooling laxer with 3x3 max pooling & stride of 2
    x = MaxPool2D(3, strides=2, padding="same")(x)

    # Run 4-times trough the 6, 12, 24, 16 repetitions
    for repetition in [6,12,24,16]:
        d = dense_block(x, repetition)
        x = transition_layer(d)    

    x = GlobalAveragePooling2D()(d)
    # Final dense output layer
    output = Dense(n_classes, activation=activation)(x)
    model = Model(input, output)
    return model

## Compare to DenseNet121 from keras

## Prepare the Data
### Functions to prepare/load the data

In [37]:
np.set_printoptions(threshold=15, linewidth=80)
CLASSES = [0,1]
IMAGE_SIZE= [512,512]

def count_data_items(filenames):
    """ Count number of data items in TFRecord file
    INPUT: filenames:   names of tfrecord files
    OUTPUT:             how many files are stored in the tfrecord file 
    """
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) 
         for filename in filenames]
    return np.sum(n)

def decode_image(image_data, augment=False):
    """ Decode image, convert to float and resize
    INPUT:  image_data: tensor of image stored in TFRecords
            augment:    (boolean) for possible augmentation
    OUTPUT:             decoded, resized image
    """
    img = tf.image.decode_jpeg(image_data, channels=3)
    img = tf.cast(img, tf.float32) / 255.0  # convert image to floats in [0, 1] range
    if augment:
        img = transform(img,DIM=dim)
        img = tf.image.random_flip_left_right(img)
        #img = tf.image.random_hue(img, 0.01)
        img = tf.image.random_saturation(img, 0.7, 1.3)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
    img = tf.reshape(img, [*IMAGE_SIZE, 3]) # explicit size needed for TPU
    return img

def read_labeled_tfrecord(example):
    """ Read labeled data from tfrecord and extract image and label
    INPUT:  flexible message type read from TFREcords
    OUTPUT: dataset of (image, label) pairs
    """
    LABELED_TFREC_FORMAT = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
        'patient_id'                   : tf.io.FixedLenFeature([], tf.int64),
        'sex'                          : tf.io.FixedLenFeature([], tf.int64),
        'age_approx'                   : tf.io.FixedLenFeature([], tf.int64),
        'anatom_site_general'          : tf.io.FixedLenFeature([], tf.int64),
        'diagnosis'                    : tf.io.FixedLenFeature([], tf.int64),
        'target'                       : tf.io.FixedLenFeature([], tf.int64)
    } 
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    print("Image: ", image)
    label = example['target']
    return image, label

def load_dataset(filenames, labeled=True, repeat=False, shuffle=False):
    """ Read from TFRecords. For optimal performance, reading from multiple files at once and disregarding data order. 
    INPUT:  filenames:  (array<string>) paths to the tfrecord files
            labeled:    (boolean) if the data is labeled or not (currently not implemented)
            repeat      (boolean) if the data should be repeated
            shuffle:    (boolean) if the data should be shuffled
    OUTPUT: labelled dataset containing all features from TFRecord file
    """
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) # automatically interleaves reads from multiple files
    dataset = dataset.cache()
    if repeat:
        dataset = dataset.repeat()
    if shuffle:
        dataset = dataset.shuffle(2048)
        ignore_order = tf.data.Options()
        ignore_order.experimental_deterministic = False # disable order, increase speed
        dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    return dataset

def get_dataset(FILENAMES, repeat=False, shuffle=False):
    dataset = load_dataset(FILENAMES, True, repeat, shuffle)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO) # prefetch next batch while training (autotune prefetch buffer size)
    return dataset

### Learning Rate

In [38]:
# LEARNING RATE
def get_lr_callback(batch_size=32):
    lr_start   = 0.000005
    lr_max     = 0.00000125 * batch_size
    lr_min     = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback


## Generate Layer list of Sequential Networks
### Generate a list of Layers:

In [40]:
seq_list = [
    Conv2D(32, (3, 3), activation="relu"),
    Conv2D(16, (3, 3), activation="relu"),
    Conv2D(8, (3, 3), activation="relu"),
    Dense(32, activation="sigmoid")
    ]

### Generate VGG list of Layers

In [41]:
vgg_list = [
  Conv2D(64, (3, 3), activation="relu"),
  Conv2D(64, (3, 3), activation="relu"),
  MaxPooling2D((2,2), strides=(2,2)),
  Conv2D(128, (3, 3), activation="relu"),
  Conv2D(128, (3, 3), activation="relu"),
  MaxPooling2D((2,2), strides=(2,2)),
  Conv2D(256, (3, 3), activation="relu"),
  Conv2D(256, (3, 3), activation="relu"),
  Conv2D(256, (3, 3), activation="relu"),
  Conv2D(256, (3, 3), activation="relu"),
  MaxPooling2D((2,2), strides=(2,2)),
]

## Training
### Initialization

In [42]:
EPOCHS = 5
BATCH_SIZE = 32
VERBOSE = 2
AUTO = tf.data.experimental.AUTOTUNE
#TRAINING_FILENAMES = tf.io.gfile.glob(os.getcwd()+'/train*.tfrec')
TRAINING_FILENAMES = tf.io.gfile.glob("./tfrecs/" + "train*.tfrec")
VALIDATION_FILENAMES = [TRAINING_FILENAMES.pop()]
print('There are %i train images'%count_data_items(TRAINING_FILENAMES))
#tqdm_callback = tf.callbacks.TQDMProgressBar()

There are 4142 train images


### Training DenseNet

In [43]:
K.clear_session()
# Save best model
sv = tf.keras.callbacks.ModelCheckpoint('dense_model_cath.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min', save_freq='epoch')

with strategy.scope():
    dense_model = densenet((512,512,3), 2, "softmax")
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.SparseCategoricalCrossentropy() 
    dense_model.compile(optimizer=opt, loss=loss, metrics=["accuracy"])
    dense_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 512, 512, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 256, 256, 64)      9472      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 64)      0         
_________________________________________________________________
batch_normalization_12 (Batc (None, 128, 128, 64)      256       
_________________________________________________________________
re_lu_12 (ReLU)              (None, 128, 128, 64)      0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 64, 64, 32)        18464     
_________________________________________________________________
average_pooling2d (AveragePo (None, 32, 32, 32)        0     

In [66]:
print('Training...')
history = dense_model.fit(
    get_dataset(TRAINING_FILENAMES, True, True), 
    epochs=10, 
    callbacks = [sv, get_lr_callback(BATCH_SIZE)], 
    steps_per_epoch=count_data_items(TRAINING_FILENAMES)/BATCH_SIZE, 
    validation_data=get_dataset(VALIDATION_FILENAMES, False, False), 
    verbose=VERBOSE
)

Training...
Image:  Tensor("Reshape:0", shape=(512, 512, 3), dtype=float32)
Image:  Tensor("Reshape:0", shape=(512, 512, 3), dtype=float32)
Epoch 1/10
129/129 - 41s - loss: 0.2043 - accuracy: 0.9195 - val_loss: 0.2172 - val_accuracy: 0.9070
Epoch 2/10
129/129 - 40s - loss: 0.2018 - accuracy: 0.9221 - val_loss: 0.2168 - val_accuracy: 0.9081
Epoch 3/10
129/129 - 41s - loss: 0.2088 - accuracy: 0.9166 - val_loss: 0.2132 - val_accuracy: 0.9086
Epoch 4/10
129/129 - 41s - loss: 0.2004 - accuracy: 0.9243 - val_loss: 0.2616 - val_accuracy: 0.9065
Epoch 5/10
129/129 - 45s - loss: 0.2044 - accuracy: 0.9168 - val_loss: 0.2211 - val_accuracy: 0.9145
Epoch 6/10
129/129 - 46s - loss: 0.1890 - accuracy: 0.9291 - val_loss: 0.2697 - val_accuracy: 0.9065
Epoch 7/10
129/129 - 41s - loss: 0.1974 - accuracy: 0.9204 - val_loss: 0.2134 - val_accuracy: 0.9118
Epoch 8/10
129/129 - 41s - loss: 0.1969 - accuracy: 0.9202 - val_loss: 0.2283 - val_accuracy: 0.9140
Epoch 9/10
129/129 - 41s - loss: 0.1786 - accuracy: 

In [70]:
print('Loading best model...')
dense_model.load_weights('dense_model_cath.h5')
print("Done")

Loading best model...
Done


### Training VGG Net

In [72]:
K.clear_session()
# Save best model
sv = tf.keras.callbacks.ModelCheckpoint('vgg_model.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min', save_freq='epoch')
with strategy.scope():
    # Generate the model:
    vgg_model = SequentialNet((512,512,3),2)
    vgg_model = vgg_model.build_self(vgg_list)    # Build the model (includes compilation)
vgg_model.summary()
print('Training...')
history = vgg_model.fit(
    get_dataset(TRAINING_FILENAMES, True, True), epochs=EPOCHS, callbacks = [sv,get_lr_callback(BATCH_SIZE)], steps_per_epoch=count_data_items(TRAINING_FILENAMES)/BATCH_SIZE, validation_data=get_dataset(VALIDATION_FILENAMES, False, False), verbose=VERBOSE)
    
print('Loading best model...')
vgg_model.load_weights('vgg_model.h5')

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_124 (Conv2D)          (None, 510, 510, 64)      1792      
_________________________________________________________________
conv2d_125 (Conv2D)          (None, 508, 508, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 254, 254, 64)      0         
_________________________________________________________________
conv2d_126 (Conv2D)          (None, 252, 252, 128)     73856     
_________________________________________________________________
conv2d_127 (Conv2D)          (None, 250, 250, 128)     147584    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 125, 125, 128)     0         
_________________________________________________________________
conv2d_128 (Conv2D)          (None, 123, 123, 256)     2

ResourceExhaustedError:  OOM when allocating tensor with shape[32,64,508,508] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node sequential/conv2d_125/Relu (defined at <ipython-input-72-93bf7a920543>:11) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_50706]

Function call stack:
train_function


### Training Sequential Network

In [73]:
K.clear_session()
# Save the best model
sv = tf.keras.callbacks.ModelCheckpoint('seq_model.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min', save_freq='epoch')
with strategy.scope():
    # Generate the model:
    seq_model = SequentialNet((512,512,3),2)
    seq_model = seq_model.build_self(seq_list)
seq_model.summary()  
print('Training...')
history = seq_model.fit(
    get_dataset(TRAINING_FILENAMES, True, True), epochs=EPOCHS, callbacks = [sv,get_lr_callback(BATCH_SIZE)], steps_per_epoch=count_data_items(TRAINING_FILENAMES)/BATCH_SIZE, validation_data=get_dataset(VALIDATION_FILENAMES, False, False), verbose=VERBOSE)
    
print('Loading best model...')
model.load_weights('fold-%i.h5'%fold)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_121 (Conv2D)          (None, 510, 510, 32)      896       
_________________________________________________________________
conv2d_122 (Conv2D)          (None, 508, 508, 16)      4624      
_________________________________________________________________
conv2d_123 (Conv2D)          (None, 506, 506, 8)       1160      
_________________________________________________________________
dense_1 (Dense)              (None, 506, 506, 32)      288       
_________________________________________________________________
dense (Dense)                (None, 506, 506, 2)       66        
Total params: 7,034
Trainable params: 7,034
Non-trainable params: 0
_________________________________________________________________
Training...
Image:  Tensor("Reshape:0", shape=(512, 512, 3), dtype=float32)
Image:  Tensor("Reshape:0", shape=(512, 512,

ResourceExhaustedError:  OOM when allocating tensor with shape[8193152,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node sequential/dense_1/Tensordot/MatMul (defined at <ipython-input-73-b83c9c9cfb2f>:11) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_51749]

Function call stack:
train_function


## Possible Visualization (test tfrecord)

In [None]:
import matplotlib.pyplot as plt
np.set_printoptions(threshold=15, linewidth=80)
CLASSES = [0,1]

def batch_to_numpy_images_and_labels(data):
    images, labels = data
    numpy_images = images.numpy()
    numpy_labels = labels.numpy()
    #if numpy_labels.dtype == object: # binary string in this case, these are image ID strings
    #    numpy_labels = [None for _ in enumerate(numpy_images)]
    # If no labels, only image IDs, return None for labels (this is the case for test data)
    return numpy_images, numpy_labels


def display_one_flower(image, title, subplot, red=False, titlesize=16):
    plt.subplot(*subplot)
    plt.axis('off')
    plt.imshow(image)
    if len(title) > 0:
        plt.title(title, fontsize=int(titlesize) if not red else int(titlesize/1.2), color='red' if red else 'black', fontdict={'verticalalignment':'center'}, pad=int(titlesize/1.5))
    return (subplot[0], subplot[1], subplot[2]+1)
    
def display_batch_of_images(databatch, predictions=None):
    """This will work with:
    display_batch_of_images(images)
    display_batch_of_images(images, predictions)
    display_batch_of_images((images, labels))
    display_batch_of_images((images, labels), predictions)
    """

    # data
    images, labels = batch_to_numpy_images_and_labels(databatch)
    if labels is None:
        labels = [None for _ in enumerate(images)]
        
    # auto-squaring: this will drop data that does not fit into square or square-ish rectangle
    rows = int(math.sqrt(len(images)))
    cols = len(images)//rows
        
    # size and spacing
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot=(rows,cols,1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
    else:
        plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))
    
    # display
    for i, (image, label) in enumerate(zip(images[:rows*cols], labels[:rows*cols])):
        title = label
        print("SHAPE: ", image.shape)
        dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols)*40+3 # magic formula tested to work from 1x1 to 10x10 images
        subplot = display_one_flower(image, "img", subplot, not label, titlesize=dynamic_titlesize)
    
    #layout
    plt.tight_layout()
    if label is None and predictions is None:
        plt.subplots_adjust(wspace=0, hspace=0)
    else:
        plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()