In [None]:
# This notebook is meant to be run on kaggle as part of kaggle competitions.
import os

# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session



# Imports and Loading Dataset

Importing valuable modules and place dataset in GCS bucket to be usable by TPU.


In [None]:
import math, re
import numpy as np
import pandas as pd
import tensorflow as tf

# checking the version of tf we are using
print('tensorflow version  ' + tf.__version__)

In [None]:
# load competition data
from kaggle_datasets import KaggleDatasets

# Add your data into your input directory in Kaggle
GCS_DS_PATH = KaggleDatasets().get_gcs_path('tpu-getting-started')
print(GCS_DS_PATH) #places the dataset in a Google Cloud Storage bucket for TPU usage

# TPUs and Distribution Strategy

Getting a distribution strategy for calculation on TPUs. TPUs have 8 cores, each of which work sort of like a GPU. We will make tensorflow use all of these cores at once using a distribution strategy.

In [None]:
# Try to detect a TPU
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master()) #this works on kaggle because Kaggle defines TPU_NAME in the environment variables
except ValueError:
    tpu = None

# If we have a TPU, use a distribution strategy for it, if not let's just use a normal distribution strategy
if tpu:
    tf.config.experimental_connect_to_cluster(tpu)#connect to tpu cluster
    tf.tpu.experimental.initialize_tpu_system(tpu)#initialize our cluster
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()
    
print('REPLICAS: {0}'.format(str(strategy.num_replicas_in_sync)))


# Load Data for Processing

We need to set up functions to get images from tfrecords and load them into our dataset.





In [None]:
# we need image size and path setups for different sizes of images
# IMAGE_SIZE = [192, 192]
IMAGE_SIZE = [224, 224]
# IMAGE_SIZE = [331, 331]
# IMAGE_SIZE = [512, 512]
GCS_PATH = GCS_DS_PATH + '/tfrecords-jpeg-{0}x{1}'.format(str(IMAGE_SIZE[0]),str(IMAGE_SIZE[1])) #this autoselects the correct input path
print('GCS_PATH = ' + GCS_PATH)
AUTO = tf.data.experimental.AUTOTUNE #not sure what this does, I am trying to find information on this from tf documentation

TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/train/*.tfrec')
VALIDATION_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/val/*.tfrec')
TEST_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/test/*.tfrec')

CLASSES = ['pink primrose',    'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea',     'wild geranium',     'tiger lily',           'moon orchid',              'bird of paradise', 'monkshood',        'globe thistle',         # 00 - 09
           'snapdragon',       "colt's foot",               'king protea',      'spear thistle', 'yellow iris',       'globe-flower',         'purple coneflower',        'peruvian lily',    'balloon flower',   'giant white arum lily', # 10 - 19
           'fire lily',        'pincushion flower',         'fritillary',       'red ginger',    'grape hyacinth',    'corn poppy',           'prince of wales feathers', 'stemless gentian', 'artichoke',        'sweet william',         # 20 - 29
           'carnation',        'garden phlox',              'love in the mist', 'cosmos',        'alpine sea holly',  'ruby-lipped cattleya', 'cape flower',              'great masterwort', 'siam tulip',       'lenten rose',           # 30 - 39
           'barberton daisy',  'daffodil',                  'sword lily',       'poinsettia',    'bolero deep blue',  'wallflower',           'marigold',                 'buttercup',        'daisy',            'common dandelion',      # 40 - 49
           'petunia',          'wild pansy',                'primula',          'sunflower',     'lilac hibiscus',    'bishop of llandaff',   'gaura',                    'geranium',         'orange dahlia',    'pink-yellow dahlia',    # 50 - 59
           'cautleya spicata', 'japanese anemone',          'black-eyed susan', 'silverbush',    'californian poppy', 'osteospermum',         'spring crocus',            'iris',             'windflower',       'tree poppy',            # 60 - 69
           'gazania',          'azalea',                    'water lily',       'rose',          'thorn apple',       'morning glory',        'passion flower',           'lotus',            'toad lily',        'anthurium',             # 70 - 79
           'frangipani',       'clematis',                  'hibiscus',         'columbine',     'desert-rose',       'tree mallow',          'magnolia',                 'cyclamen ',        'watercress',       'canna lily',            # 80 - 89
           'hippeastrum ',     'bee balm',                  'pink quill',       'foxglove',      'bougainvillea',     'camellia',             'mallow',                   'mexican petunia',  'bromelia',         'blanket flower',        # 90 - 99
           'trumpet creeper',  'blackberry lily',           'common tulip',     'wild rose']

def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32)/255.0 #converts rgb values to floats in [0,1]
    image = tf.reshape(image, [*IMAGE_SIZE, 3]) #reshapes image to an [IMAGE_SIZE[0], IMAGE_SIZE[1], 3] tensor
    return image

def read_labeled_tfrecord(example):
    #pass in a format for tf.io.parse_single_example
    LABELED_TFREC_FORMAT = {
        'image': tf.io.FixedLenFeature([], tf.string), #tf.string is a bytestring
        'class': tf.io.FixedLenFeature([], tf.int64),  #shape[] refers to there being only a single element
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = tf.cast(example['class'], tf.int32)
    return image, label

def read_unlabeled_tfrecord(example):
    #pass in a format for tf.io.parse_single_example
    UNLABELED_TFREC_FORMAT = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'id': tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    idnum = example['id']
    return image, idnum

def load_dataset(filenames, labeled = True, ordered = False):
    #Reads tfrecs. Optimal performance when you do not care for data order.
    #Therefore, we provide the option but default to false for data ordering.
    #We will be shuffling data anyway so it shouldn't make a difference.
    options = tf.data.Options()
    if not ordered:
        options.experimental_deterministic = False #disable ordering when reading in to increase speed
    
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) #this is where our AUTOTUNE param is used to auto-adjust parallel reads
    dataset = dataset.with_options(options)
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls = AUTO)
    #we map the correct reading function onto our dataset. AUTOTUNE is used here to adjust the parallelism of the calls on the dataset.
    return dataset


# Creating Data Pipelines
The goal is to create efficient pipelines for train/test/val splits.


In [None]:
# Define the batch size. This will be 16 with TPU off and 128 (=16*8) with TPU on
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

def data_augment(image, label):
    # Thanks to the dataset.prefetch(AUTO)
    # statement in the next function (below), this happens essentially
    # for free on TPU. Data pipeline code is executed on the "CPU"
    # part of the TPU while the TPU itself is computing gradients.
    image = tf.image.random_flip_left_right(image)
    #image = tf.image.random_saturation(image, 0, 2)
    return image, label   

def get_training_dataset():
    dataset = load_dataset(TRAINING_FILENAMES, labeled=True)
    dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
    dataset = dataset.repeat() # the training dataset must repeat for several epochs
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO) # prefetch next batch while training (autotune prefetch buffer size)
    return dataset

def get_validation_dataset(ordered=False):
    dataset = load_dataset(VALIDATION_FILENAMES, labeled=True, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.cache()
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_test_dataset(ordered=False):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

def count_data_items(filenames):
    # the number of data items is written in the name of the .tfrec
    # files, i.e. flowers00-230.tfrec = 230 data items
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
NUM_VALIDATION_IMAGES = count_data_items(VALIDATION_FILENAMES)
NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)
print('Dataset: {} training images, {} validation images, {} unlabeled test images'.format(NUM_TRAINING_IMAGES, NUM_VALIDATION_IMAGES, NUM_TEST_IMAGES))


We will call these functions to create the datasets that we will be using for training. 

In [None]:
ds_train = get_training_dataset()
ds_valid = get_validation_dataset(ordered=True)
ds_test = get_test_dataset()

print("Training:", ds_train)
print ("Validation:", ds_valid)
print("Test:", ds_test)

These datasets are `tf.data.Dataset` objects. You can think about a dataset in TensorFlow as a *stream* of data records. The training and validation sets are streams of `(image, label)` pairs.

In [None]:
np.set_printoptions(threshold=15, linewidth=80)

print("Training data shapes:")
for image, label in ds_train.take(3):
    print(image.numpy().shape, label.numpy().shape)
print("Training data label examples:", label.numpy())

The test set is a stream of `(image, idnum)` pairs; `idnum` here is the unique identifier given to the image that we'll use later when we make our submission as a `csv` file.

In [None]:
print("Test data shapes:")
for image, idnum in ds_test.take(3):
    print(image.numpy().shape, idnum.numpy().shape)
print("Test data IDs:", idnum.numpy().astype('U')) # U=unicode string

# Data Exploration

In [None]:
from matplotlib import pyplot as plt

def batch_to_numpy_images_and_labels(data):
    images, labels = data
    np_images = images.numpy()
    np_labels = labels.numpy()
    if np_labels.dtype == object:
        np_labels = [None for _ in enumerate(np_images)]
    #this ensures we have None as the label in test data
    return np_images, np_labels

def title_from_label_and_target(label, correct_label):
    if correct_label is None:
        return CLASSES[label], TRUE #CLASSES is the array we used earlier to index flower labels
    correct = (label == correct_label)
    return '{0} [{1}{2}{3}]'.format(CLASSES[label], 'OK' if correct else 'NO', u'\u2192' if not correct else '', CLASSES['correct_label'] if not correct else ''), correct

#pyplot visualization functions to show images of flowers
def display_one_flower(image, title, subplot, red = False, titlesize=16):
    plt.subplot(*subplot)
    plt.axis('off')
    plt.imshow(image)
    if len(title) > 0:
        plt.title(title, fontsize=int(titlesize) if not red else int(titlesize/1.2), color='red' if red else 'black', fontdict={'verticalalignment':'center'}, pad=int(titlesize/1.5))
    return (subplot[0], subplot[1], subplot[2]+1)

def display_batch_of_images(databatch, predictions=None):
    # This works with:
    # display_batch_of_images(images)
    # display_batch_of_images(images, predictions)
    # display_batch_of_images((images, labels))
    # display_batch_of_images((images, labels), predictions)
    # data
    images, labels = batch_to_numpy_images_and_labels(databatch)
    if labels is None:
        labels = [None for _ in enumerate(images)]
        
    # auto-squaring: this will drop data that does not fit into square
    # or square-ish rectangle
    rows = int(math.sqrt(len(images)))
    cols = len(images)//rows
        
    # size and spacing
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot=(rows,cols,1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
    else:
        plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))
    
    # display
    for i, (image, label) in enumerate(zip(images[:rows*cols], labels[:rows*cols])):
        title = '' if label is None else CLASSES[label]
        correct = True
        if predictions is not None:
            title, correct = title_from_label_and_target(predictions[i], label)
        dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols)*40+3 # magic formula tested to work from 1x1 to 10x10 images
        subplot = display_one_flower(image, title, subplot, not correct, titlesize=dynamic_titlesize)
    
    #layout
    plt.tight_layout()
    if label is None and predictions is None:
        plt.subplots_adjust(wspace=0, hspace=0)
    else:
        plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()

def display_training_curves(training, validation, title, subplot):
    if subplot%10 == 1:
        plt.subplots(figsize=(10,10), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])


### Important Tip
You can use the below cell to get an iterator over the dataset.

In [None]:
#this will give you batches of size 20 every time you iterate
ds_iter = iter(ds_train.unbatch().batch(20))

In [None]:
#you only have to rerun this cell to iterate over the batch and display images per batch
one_batch = next(ds_iter)
display_batch_of_images(one_batch)

# Scheduling a Learning Rate
To schedule a learning rate we need to create a function which returns function which returns a learning rate for each epoch. We pass this into keras using `keras.callbacks.LearningRateScheduler` to be able to use it in model fitting.

In [None]:
EPOCHS = 20
# Our Learning Rate schedule is defined here:
def exponential_lr(epoch,
                   start_lr = 0.0001, min_lr = 0.00001, max_lr = 0.0001, 
                   rampup_epochs = 1, sustain_epochs = 1, exp_decay = 0.8):

    def lr(epoch, start_lr, min_lr, max_lr, rampup_epochs, sustain_epochs, exp_decay):
        # linear increase from start to rampup_epochs
        if epoch < rampup_epochs:
            lr = ((max_lr - start_lr) /
                  rampup_epochs * epoch + start_lr)
        # constant max_lr during sustain_epochs
        elif epoch < rampup_epochs + sustain_epochs:
            lr = max_lr
        # exponential decay towards min_lr
        else:
            lr = ((max_lr - min_lr) *
                  exp_decay**(epoch - rampup_epochs - sustain_epochs) +
                  min_lr)
        return lr
    return lr(epoch,
              start_lr,
              min_lr,
              max_lr,
              rampup_epochs,
              sustain_epochs,
              exp_decay)

# def lr_scheduler(epoch):
#     if epoch < 4:
#         return 0.0005
#     elif epoch < 8:
#         return 0.0002
#     elif epoch < 12:
#         return 0.0001
#     elif epoch < 16:
#         return 0.00005
#     elif epoch < 20:
#         return 0.00002
#     else:
#         return 0.00001

lr_callback = tf.keras.callbacks.LearningRateScheduler(exponential_lr, verbose=True)



rng = [i for i in range(EPOCHS)]
y = [exponential_lr(x) for x in rng]
plt.plot(rng, y)
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(y[0], max(y), y[-1]))

# Defining the Model

We can use a pretrained model to use **transfer learning**. This will improve the time it takes to train the model.

We are using **VGG16**, **DenseNet201**, **EfficientNetB7**, and **Xception** as components of our ensemble.

I learned a method for this from [**this awesome notebook**](https://www.kaggle.com/servietsky/pretrained-cnn-epic-fight/notebook#4.-Pretrained-Models-Creation-). I highly encourage anyone reading this to check it out.

We will employ a [context manager](https://docs.python.org/3/reference/compound_stmts.html#with) which we get from `strategy.scope`. We defined `strategy` at the beginning of the notebook. The context manager tells TensorFlow hoiw to divide tasks between the TPU cores. When using tensorflow with a TPU you will need to use this concept of a context to run your training in.

In [None]:
# EPOCHS = 20 #this number can be changed to be whatever you like

# We are including a drop_rate argument here, setting it to 0 should give us an effect
# equivalent to having no dropout layer at all. This is just to make experimentation easier
# and cleaner. We allow selection of an optimizer (but since these are CNNs, nadam or adam are best).

def model_VGG16(drop_rate = 0.2, opt = 'nadam', trainable_arg = False):
    with strategy.scope():
        pretrained_model = tf.keras.applications.VGG16(
            weights='imagenet',
            include_top=False,
            input_shape=[*IMAGE_SIZE, 3]#same shape as we pass out of decode_image
        )
        pretrained_model.trainable = trainable_arg #this will make the layers of the pretrained model untrainable
        #of course, this does not mean the added layers will not be able to learn in our model.
        model = tf.keras.Sequential([
            pretrained_model,
            tf.keras.layers.GlobalAveragePooling2D(), #this layer attaches behind our VGG16 pretrained as a classifier
            tf.keras.layers.Dropout(drop_rate),
            tf.keras.layers.Dense(len(CLASSES), activation='softmax', dtype='float32') #output layer, provides classification
        ])
    model.compile(
        optimizer = opt,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['sparse_categorical_accuracy']
    )
    return model
    
def model_Xception(drop_rate = 0.2, opt = 'nadam', trainable_arg = False):
    with strategy.scope():
        pretrained_model = tf.keras.applications.Xception(
            weights='imagenet',
            include_top=False,
            input_shape=[*IMAGE_SIZE, 3]#same shape as we pass out of decode_image
        )
        pretrained_model.trainable = trainable_arg #this will make the layers of the pretrained model untrainable
        #of course, this does not mean the added layers will not be able to learn in our model.
        model = tf.keras.Sequential([
            pretrained_model,
            tf.keras.layers.GlobalAveragePooling2D(), #this layer attaches behind our VGG16 pretrained as a classifier
            tf.keras.layers.Dropout(drop_rate),
            tf.keras.layers.Dense(len(CLASSES), activation='softmax', dtype='float32') #output layer, provides classification
        ])
    model.compile(
        optimizer = opt,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['sparse_categorical_accuracy']
    )
    return model

def model_densenet(drop_rate = 0.2, opt = 'nadam', trainable_arg = False):
    with strategy.scope():
        pretrained_model = tf.keras.applications.DenseNet201(
            weights='imagenet',
            include_top=False,
            input_shape=[*IMAGE_SIZE, 3]#same shape as we pass out of decode_image
        )
        pretrained_model.trainable = trainable_arg #this will make the layers of the pretrained model untrainable
        #of course, this does not mean the added layers will not be able to learn in our model.
        model = tf.keras.Sequential([
            pretrained_model,
            tf.keras.layers.GlobalAveragePooling2D(), #this layer attaches behind our VGG16 pretrained as a classifier
            tf.keras.layers.Dropout(drop_rate),
            tf.keras.layers.Dense(len(CLASSES), activation='softmax', dtype='float32') #output layer, provides classification
        ])
    model.compile(
        optimizer = opt,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['sparse_categorical_accuracy']
    )
    return model

def model_effnet_B7(drop_rate = 0.2, opt = 'nadam', trainable_arg = False):
    with strategy.scope():
        pretrained_model = tf.keras.applications.DenseNet201(
            weights='imagenet',
            include_top=False,
            input_shape=[*IMAGE_SIZE, 3]#same shape as we pass out of decode_image
        )
        pretrained_model.trainable = trainable_arg #this will make the layers of the pretrained model untrainable
        #of course, this does not mean the added layers will not be able to learn in our model.
        model = tf.keras.Sequential([
            pretrained_model,
            tf.keras.layers.GlobalAveragePooling2D(), #this layer attaches behind our VGG16 pretrained as a classifier
            tf.keras.layers.Dropout(drop_rate),
            tf.keras.layers.Dense(len(CLASSES), activation='softmax', dtype='float32') #output layer, provides classification
        ])
    model.compile(
        optimizer = opt,
        loss = 'sparse_categorical_crossentropy',
        metrics = ['sparse_categorical_accuracy']
    )
    return model


In [None]:
DRATE = 0.5 #this is just a default value, started at 0.1 but it was overfitting
OPT = 'adam' #this was a good optimizer for VGG16, Xception
TARG = True #make models trainable


# 'VGG16': model_VGG16(drop_rate = DRATE, opt = OPT, trainable_arg = TARG),
# removed VGG16 altogether.

models = {
          'Xception':model_Xception(drop_rate = DRATE, opt = OPT, trainable_arg = TARG),
          'DenseNet201':model_densenet(drop_rate = DRATE, opt = OPT, trainable_arg = TARG),
          'EfficientNetB7':model_effnet_B7(drop_rate = DRATE, opt = OPT, trainable_arg = TARG)
         }
#testing why my validation accuracy is so high for first epochs of the last three models
#running just one model to see if I get something like 60% validation accuracy for the first epoch
#[[[AFTER TESTING: OK THIS IS FINE, THE OTHER MODELS JUST START WITH QUITE HIGH VALIDATION ACCURACY COMPARED TO VGG16]]]

# models = {'EfficientNetB7':model_effnet_B7(drop_rate = 0.1, opt = 'nadam')}
    
histories={}
predictions={}
predictions_val={}
predictions_prob={} #this will be used to give predictions for the ensemble
# times={}

MODEL_COUNT = len(models)

print('We have {} models'.format(str(MODEL_COUNT)))

#### SOME NOTES (first trainable run)
- VGG16: model is HEAVILY overfitting, need to add high dropout rate for this. Something like 0.4 instead of 0.1. training accuracy=0.9733, validation accuracy=0.7489.
- Xception: model is overfitting, training accuracy reached almost 1 while validation accuracy was stuck around 0.92. 
- DenseNet: model is learning very rapidly, reached training accuracy = 0.99 and validation accuracy = 0.92 by the 9th epoch. There is a slight overfit here, I need to increase drop rate to 0.25. It capped off at a training accuracy of 1 with a validation accuracy of 0.93. 
- EfficientNet: reached 1 training accuracy with 0.94 validation accuracy. Increasing drop rate

#### A Decision (second trainable run)
I have decided to remove VGG16 from the running. The model doesn't hold up very well to the others when considering how much more it needs to train to reach a lower accuracy than the others. 

# Fitting the Models
We will fit the models as per the method used in [this notebook](https://www.kaggle.com/servietsky/pretrained-cnn-epic-fight/notebook#5.-Transfer-Learning-)

[This page](https://www.tensorflow.org/tutorials/distribute/custom_training) has crucial information about how this is done.

In [None]:
EPOCHS = 12

STEPS_PER_EPOCH = NUM_TRAINING_IMAGES//BATCH_SIZE
ds_test_ordered = get_test_dataset(ordered = True)

ds_test_images = ds_test_ordered.map(lambda image, idnum: image)
ds_test_idnums = ds_test_ordered.map(lambda image, idnum: idnum)

# import gc

# We initialize our models here:
# for name, model_ in models.items():
#     model = model_()
for name, model in models.items():
#     model = model
    tf.keras.utils.plot_model(model, to_file=name+'.png', show_shapes=True)
    
    print('Training Model --- ' + name)
    history = model.fit(
        ds_train,
        steps_per_epoch = STEPS_PER_EPOCH,
        epochs = EPOCHS,
        callbacks = [lr_callback],
        validation_data = ds_valid
    )
    
    histories[name] = history #saving the history
    predictions_val[name] = np.argmax(model.predict(ds_valid), axis=-1)
    

    

We can display training metrics using our earlier defined function `display_training_curves`.

In [None]:
# Plotting Training curves for model Xception
modelname = 'Xception'
display_training_curves(
    histories[modelname].history['loss'],
    histories[modelname].history['val_loss'],
    'loss',
    211,
)
display_training_curves(
    histories[modelname].history['sparse_categorical_accuracy'],
    histories[modelname].history['val_sparse_categorical_accuracy'],
    'accuracy',
    212,
)

In [None]:
# Plotting Training curves for model DenseNet201
modelname = 'DenseNet201'
display_training_curves(
    histories[modelname].history['loss'],
    histories[modelname].history['val_loss'],
    'loss',
    211,
)
display_training_curves(
    histories[modelname].history['sparse_categorical_accuracy'],
    histories[modelname].history['val_sparse_categorical_accuracy'],
    'accuracy',
    212,
)

In [None]:
# Plotting Training curves for model EfficientNetB7
modelname = 'EfficientNetB7'
display_training_curves(
    histories[modelname].history['loss'],
    histories[modelname].history['val_loss'],
    'loss',
    211,
)
display_training_curves(
    histories[modelname].history['sparse_categorical_accuracy'],
    histories[modelname].history['val_sparse_categorical_accuracy'],
    'accuracy',
    212,
)

# Evaluating Predictions

We will build a **confusion matrix** on our validation data to evaluate the efficacy of our model. The important metrics here will be **precision**, **recall**, and the **f1-score** which all indicate the predictive value of our model.

In [None]:
# import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

def display_confusion_matrix(cmat, score, precision, recall):
    plt.figure(figsize=(15,15))
    ax = plt.gca()
    ax.matshow(cmat, cmap='Reds')
    ax.set_xticks(range(len(CLASSES)))
    ax.set_xticklabels(CLASSES, fontdict={'fontsize': 7})
    plt.setp(ax.get_xticklabels(), rotation=45, ha="left", rotation_mode="anchor")
    ax.set_yticks(range(len(CLASSES)))
    ax.set_yticklabels(CLASSES, fontdict={'fontsize': 7})
    plt.setp(ax.get_yticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    titlestring = ""
    if score is not None:
        titlestring += 'f1 = {:.3f} '.format(score)
    if precision is not None:
        titlestring += '\nprecision = {:.3f} '.format(precision)
    if recall is not None:
        titlestring += '\nrecall = {:.3f} '.format(recall)
    if len(titlestring) > 0:
        ax.text(101, 1, titlestring, fontdict={'fontsize': 18, 'horizontalalignment':'right', 'verticalalignment':'top', 'color':'#804040'})
    plt.show()
    
def display_training_curves(training, validation, title, subplot):
    if subplot%10==1: # set up the subplots on the first call
        plt.subplots(figsize=(10,10), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    #ax.set_ylim(0.28,1.05)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])

### Creating the confusion matrix

In [None]:
# cmdataset = get_validation_dataset(ordered = True)
cmdataset = ds_valid
# images_ds = cmdataset.map(lambda image, label: image) #simply projecting out the image
labels_ds = cmdataset.map(lambda image, label: label).unbatch()

cm_correct_labels = next(iter(labels_ds.batch(NUM_VALIDATION_IMAGES))).numpy()
# cm_probabilities = model.predict(images_ds)
# cm_predictions = np.argmax(cm_probabilities, axis = -1)# pass this in per model you wish to see



Investigate the confusion matrix of the model of your choosing below by passing in the name in the dictionary.

In [None]:
# model_in_ensemble = 'VGG16'
# model_in_ensemble = 'Xception'
model_in_ensemble = 'DenseNet201'
# model_in_ensemble = 'EfficientNetB7'

cm_predictions = predictions_val[model_in_ensemble]
labels = range(len(CLASSES))
cmat = confusion_matrix(
    cm_correct_labels,
    cm_predictions,
    labels = labels
)
cmat = (cmat.T/cmat.sum(axis=1)).T #just normalizing the values
score = f1_score(
    cm_correct_labels,
    cm_predictions,
    labels = labels,
    average = 'macro'
)
precision = precision_score(
    cm_correct_labels,
    cm_predictions,
    labels = labels,
    average = 'macro'
)
recall = recall_score(
    cm_correct_labels,
    cm_predictions,
    labels = labels,
    average = 'macro'
)
display_confusion_matrix(cmat, score, precision, recall)

# Making Test Predictions
Now that we have investigated our three models, let's make predictions using the one we like most (in this case we use the one with the best f1 score).

Based on this metric, we have chosen the DenseNet model since it shows the greatest promise.

In [None]:
ds_test_ordered = get_test_dataset(ordered = True)
ds_test_images = ds_test_ordered.map(lambda image, idnum: image)
ds_test_idnums = ds_test_ordered.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(ds_test_idnums.batch(NUM_TEST_IMAGES))).numpy().astype('U')

for name, model in models.items():
    #Models have already been trained
    print('Predicting with Model --- ' + name)
    predictions_prob[name] = model.predict(ds_test_images)
    predictions[name] = np.argmax(predictions_prob[name], axis=-1)
    print(predictions)

final_model = 'DenseNet201'
final_pred = predictions[final_model]

np.savetxt(
    'submission.csv',
    np.rec.fromarrays([test_ids, final_pred]),
    fmt=['%s', '%d'],
    delimiter=',',
    header='id,label',
    comments=''
)

!head submission.csv