In [12]:
import os
import tensorflow as tf
import tensorflow.contrib.eager as tfe
from matplotlib import pyplot as plt
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix
import cv2
import keras.backend as K

np.random.seed(42)
tf.enable_eager_execution()
save_path = '/home/final/data/cache/{}.bottle_necks.labels.paths.npz'
plot_path = '/home/final/data/plots/bin_classifier'

device = "gpu:0" if tfe.num_gpus() else "cpu:0"

In [13]:
class XceptionClassifier(tf.keras.Model):
    def __init__(self, n_classes, n_layers):
        self.n_layers = n_layers
        self.n_classes = n_classes
        
        super(XceptionClassifier, self).__init__()
#         self.xception_layers = Xception(include_top=False, weights='imagenet', input_shape=(200,200,3))
#         self.pooling_layer = GlobalAveragePooling2D(data_format='channels_last')
        if n_layers  == 3:
            self.dense_layer1 = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_layer2 = tf.keras.layers.Dense(units=512, activation='relu')
            self.dense_layer3 = tf.keras.layers.Dense(units=self.n_classes)
        if n_layers  == 6:
            self.dense_layer1 = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_layer2 = tf.keras.layers.Dense(units=512, activation='relu')
            self.dense_layer3 = tf.keras.layers.Dense(units=256, activation='relu')
            self.dense_layer4 = tf.keras.layers.Dense(units=128, activation='relu')
            self.dense_layer5 = tf.keras.layers.Dense(units=64, activation='relu')
            self.dense_layer6 = tf.keras.layers.Dense(units=self.n_classes)
        if n_layers  == 9:
            self.dense_layer1 = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_layer2 = tf.keras.layers.Dense(units=512, activation='relu')
            self.dense_layer3 = tf.keras.layers.Dense(units=256, activation='relu')
            self.dense_layer4 = tf.keras.layers.Dense(units=128, activation='relu')
            self.dense_layer5 = tf.keras.layers.Dense(units=64, activation='relu')
            self.dense_layer6 = tf.keras.layers.Dense(units=32, activation='relu')
            self.dense_layer7 = tf.keras.layers.Dense(units=16, activation='relu')
            self.dense_layer8 = tf.keras.layers.Dense(units=8, activation='relu')
            self.dense_layer9 = tf.keras.layers.Dense(units=self.n_classes)
            
    def call(self, inputs):
#         xception = self.xception_layers(inputs)
#         pooling = self.pooling_layer(xception)
        if self.n_layers == 3:
            result = self.dense_layer1(inputs)
            result = self.dense_layer2(result)
            result = self.dense_layer3(result)
        if self.n_layers == 6:
            result = self.dense_layer1(inputs)
            result = self.dense_layer2(result)
            result = self.dense_layer3(result)
            result = self.dense_layer4(result)
            result = self.dense_layer5(result)
            result = self.dense_layer6(result)
        if self.n_layers == 9:
            result = self.dense_layer1(inputs)
            result = self.dense_layer2(result)
            result = self.dense_layer3(result)
            result = self.dense_layer4(result)
            result = self.dense_layer5(result)
            result = self.dense_layer6(result)
            result = self.dense_layer7(result)
            result = self.dense_layer8(result)
            result = self.dense_layer9(result)

        return result

In [None]:
def calculate_loss(classifier, images, labels):
    logits = classifier(images)
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits))

def calculate_f1(classifier, images, labels):
    logits = tf.argmax(classifier(images), axis=1)
    TP = tf.count_nonzero(logits * labels)
    TN = tf.count_nonzero((logits - 1) * (labels - 1))
    FP = tf.count_nonzero(logits * (labels - 1))
    FN = tf.count_nonzero((logits - 1) * labels)

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * precision * recall / (precision + recall)

    return f1

def train(train_dataset, valid_dataset,
          learning_rate, batch_size, n_epochs, n_layers, n_classes,
          save_plot_fname,
          plot_graphs=False, save_graphs=True
         ):
    
    def _plot_loss(train, val, plot, save):
        plt.figure(figsize=(9,6))
        plt.plot(train, label='Train Loss')
        plt.plot(val, label='Validation Loss')
        plt.legend()
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Loss History')
        if save: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "loss"), bbox_inches='tight')
            plt.close()
        if plot: 
            plt.show()
            plt.close()
    
    def _plot_acc(train, val, plot, save):
        plt.figure(figsize=(9,6))
        plt.plot(train, label='Train Accuracy')
        plt.plot(val, label='Validation Accuracy')
        plt.legend()
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('Accuracy History')
        if save: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "accr"), bbox_inches='tight')
            plt.close()
        if plot: 
            plt.show()
            plt.close()
    
    def _plot_f1(train, val, plot, save):
        plt.figure(figsize=(9,6))
        plt.plot(train, label='Train F1')
        plt.plot(val, label='Validation F1')
        plt.legend()
        plt.xlabel('Epochs')
        plt.ylabel('F1 Score')
        plt.title('F1 Score History')
        if save: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "f1sc"), bbox_inches='tight')
            plt.close()
        if plot: 
            plt.show()
            plt.close()
    
    
    x_classifier = XceptionClassifier(n_classes=n_classes, n_layers=n_layers)
    optimizer = tf.train.AdamOptimizer(learning_rate) 

    # Performance Metrics
    # - F1 Score
    train_F1_history = []
    val_F1_history = []
    # - Loss
    train_loss_history = []
    val_loss_history = []
    # - Accuracy
    train_acc_history = []
    val_acc_history = []
    
    with tf.device(device):
        for epoch in range(n_epochs):
            
            epoch_loss_avg = tfe.metrics.Mean()
            epoch_acc = tfe.metrics.Accuracy()
            epoch_f1 = tfe.metrics.Mean()
            for batch, (tr_img, tr_lbl) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    # Compute logits, logits are the domain/input to softmax
                    tr_loss = calculate_loss(x_classifier, tr_img, tr_lbl)
                
                # Compute gradient and apply gradients
                grads = tape.gradient(tr_loss, x_classifier.variables)
                optimizer.apply_gradients(zip(grads, x_classifier.variables),
                                          global_step=tf.train.get_or_create_global_step())
                
                # Add current batch metrics
                epoch_loss_avg(tr_loss) # calculate avg epoch loss 
                epoch_acc(tf.argmax(x_classifier(tr_img), axis=1), tr_lbl)
                epoch_f1(calculate_f1(x_classifier, tr_img, tr_lbl)) # calculate avg epoch f1 score 
                
                if batch % 10 == 0:
                    print('\rEpoch: {}, Batch: {}, Loss: {}'.format(epoch, batch, tr_loss.numpy()), end='')
            # Add train loss, accuracy, and f1 for epoch
            train_loss_history.append(epoch_loss_avg.result())
            train_acc_history.append(epoch_acc.result())
            train_F1_history.append(epoch_f1.result())

            
            # Run validation loop at end of each epoch
            val_loss_avg = tfe.metrics.Mean()
            val_acc = tfe.metrics.Accuracy()
            val_f1 = tfe.metrics.Mean()
            with tf.device(device):
                for batch, (val_img, val_lbl) in enumerate(valid_dataset):
                    # Compute validation metrics
                    val_loss = calculate_loss(x_classifier, val_img, val_lbl)
                    val_loss_avg(val_loss) # Add current batch loss
                    val_acc(tf.argmax(x_classifier(val_img), axis=1), val_lbl)
                    val_f1(calculate_f1(x_classifier, val_img, val_lbl))
                
                val_loss_history.append(val_loss_avg.result())
                val_acc_history.append(val_acc.result())
                val_F1_history.append(val_f1.result())
            
            # Print progress of epochs
            if epoch % 100 == 0:
                print("\rEpoch {:03d}: Train F1:{:.3f}, Train Loss:{:.3f}, Train Acc:{:.3%}, Val F1:{:.3%}, Val Loss: {:.3f}, Val Acc: {:.3%}".format(
                    epoch, epoch_f1.result(), epoch_loss_avg.result(), epoch_acc.result(), 
                    val_f1.result(), val_loss_avg.result(), val_acc.result()))
    
    _plot_loss(train_loss_history, val_loss_history, plot_graphs, save_graphs)
    _plot_acc(train_acc_history, val_acc_history, plot_graphs, save_graphs)
    _plot_f1(train_F1_history, val_F1_history, plot_graphs, save_graphs)
    
    return x_classifier, train_loss_history, val_loss_history, train_acc_history, val_acc_history, train_F1_history, val_F1_history

def test(x_classifier, test_dataset, save_plot_fname):
    
    def _plot_confusion_matrix(cm, classes,
                              normalize=False,
                              title='Confusion matrix',
                              cmap=plt.cm.Blues,
                              show_graphs=False, save_graphs=True):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#         print(cm)

        plt.figure(figsize=(9,6))
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title(title)
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45)
        plt.yticks(tick_marks, classes)

        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.tight_layout()
        if save_graphs: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "conf_norm" if normalize else "conf"), bbox_inches='tight')
            plt.close()
        if show_graphs: 
            plt.show()
            plt.close()
    
    # Performance metrics
    test_loss_avg = tfe.metrics.Mean() # loss
    test_acc = tfe.metrics.Accuracy() # accuracy
    test_f1 = tfe.metrics.Mean() # f1
    predictions = tf.convert_to_tensor([], dtype=tf.int64)
    correct = tf.convert_to_tensor([], dtype=tf.int64)
    
    # Run testing loop batch-wise
    with tf.device(device):
        for batch, (tst_img, tst_lbl) in enumerate(test_dataset):
            tst_loss = calculate_loss(x_classifier, tst_img, tst_lbl)
            test_loss_avg(tst_loss)
            test_acc(tf.argmax(x_classifier(tst_img), axis=1), tst_lbl)
            test_f1(calculate_f1(x_classifier, tst_img, tst_lbl))
            predictions = tf.concat([predictions, tf.argmax(x_classifier(tst_img), axis=1)], 0)
            correct = tf.concat([correct, tst_lbl], 0)
    
    cnf_matrix = confusion_matrix(correct, predictions)
    class_names = ['normal', 'pneumonia']
    # Plot non-normalized confusion matrix
    _plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization')
    # Plot normalized confusion matrix
    _plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,show_graphs=True,
                      title='Normalized confusion matrix')
    
    print("\nTest dataset metrics: F1: {:.3f}, Loss: {:.3f}, Accuracy: {:.3%}".format(
        test_f1.result(), test_loss_avg.result(), test_acc.result()))
    return test_f1.result(), test_loss_avg.result(), test_acc.result()

### Load bottle neck layers and create Dataset

In [18]:
def load_data(path, dataset_type='train'):
    
    data = np.load(path.format(dataset_type))
    data_bottle_necks, data_labels, data_file_paths = data['bottle_necks'],  data['labels'], data['paths']
    
    return data_bottle_necks, data_labels, data_file_paths

# Read in data
train_bottle_necks, train_labels, train_file_paths = load_data(save_path, 'train')
validate_bottle_necks, validate_labels, validate_file_paths = load_data(save_path, 'validate')
test_bottle_necks, test_labels, test_file_paths = load_data(save_path, 'test')
n = len(train_bottle_necks)

# Make labels into binary outputs
np.clip(train_labels, 0, 1, out=train_labels)
np.clip(validate_labels, 0, 1, out=validate_labels)
np.clip(test_labels, 0, 1, out=test_labels)

train_images_dataset = tf.data.Dataset.from_tensor_slices(train_bottle_necks)
train_labels_dataset = tf.data.Dataset.from_tensor_slices(train_labels)
train_dataset_original = tf.data.Dataset.zip((train_images_dataset, train_labels_dataset))
validate_images_dataset = tf.data.Dataset.from_tensor_slices(validate_bottle_necks)
validate_labels_dataset = tf.data.Dataset.from_tensor_slices(validate_labels)
validate_dataset_original = tf.data.Dataset.zip((validate_images_dataset, validate_labels_dataset))
test_images_dataset = tf.data.Dataset.from_tensor_slices(test_bottle_necks)
test_labels_dataset = tf.data.Dataset.from_tensor_slices(test_labels)
test_dataset_original = tf.data.Dataset.zip((test_images_dataset, test_labels_dataset))

### Train with various hyperparameters

In [19]:
shapes = [None, "1024, 512, 3", "1024, 512, 256, 128, 64, 3", "1024, 512, 256, 128, 64, 32, 16, 8, 3"]
# Parameters to try out
list_n_layers = [3] * 11 + [6] * 11 + [9] * 11
list_batch_size = ([64, 32] + [64] * 5 + [1000] * 4) * 3
list_n_epochs = [50, 50, 50, 50, 50, 100, 200, 200, 200, 400, 400] * 3
list_learning_rate = [0.01, 0.01, 0.1, 0.001, 0.0001, 0.01, 0.01, 0.01, 0.001, 0.01, 0.001] * 3
n_classes = 2

markdown = """
| Model # | # of hidden layers |               # of nodes              | batch size | epochs | learning rate | train_loss | val_loss | test_loss | train_acc | val_acc | test_acc | train_F1 | val_F1 | test_F1 | 
|:-------:|:------------------:|:-------------------------------------:|:----------:|:------:|:-------------:|:----------:|:--------:|:---------:|:---------:|:-------:|:--------:|:--------:|:------:|:-------:|
"""
for i, (n_layers, batch_size, n_epochs, learning_rate) in \
    enumerate(zip(list_n_layers, list_batch_size, list_n_epochs, list_learning_rate), 1):

    save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
    title = "| Model number {:02d}: {} |".format(i, save_plot_fname)
    print("-"*len(title))
    print(title)
    print("-"*len(title))
    
    train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    
    x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
    tstf1, tstl, tsta = test(x_classifier, test_dataset, save_plot_fname)
    markdown += "\n|{}|{}|{}|{}|{}|{}|{:.3f}|{:.3f}|{:.3f}|{:.3%}|{:.3%}|{:.3%}|{:.3f}|{:.3f}|{:.3f}|".format(str(i), n_layers, shapes[n_layers//3], batch_size, n_epochs, learning_rate, tl[-1].numpy(), vl[-1].numpy(), tstl.numpy(), ta[-1].numpy(), va[-1].numpy(), tsta.numpy(), tf1[-1].numpy(), vf1[-1].numpy(), tstf1.numpy())
    
print(markdown)

----------------------------------------
| Model number 01: L3.BS64.LR0.01.EP50 |
----------------------------------------
Epoch 000: Train F1:nan, Train Loss:88.358, Train Acc:78.126%, Val F1:92.149%, Val Loss: 0.384, Val Acc: 88.739%
Epoch: 49, Batch: 60, Loss: 0.052000369876623154
Test dataset metrics: F1: 0.896, Loss: 0.914, Accuracy: 84.234%
----------------------------------------
| Model number 02: L3.BS32.LR0.01.EP50 |
----------------------------------------
Epoch 000: Train F1:nan, Train Loss:33.680, Train Acc:76.957%, Val F1:88.453%, Val Loss: 0.606, Val Acc: 83.333%
Epoch: 49, Batch: 120, Loss: 0.621408045291900633
Test dataset metrics: F1: 0.798, Loss: 0.637, Accuracy: 66.667%
---------------------------------------
| Model number 03: L3.BS64.LR0.1.EP50 |
---------------------------------------
Epoch 000: Train F1:nan, Train Loss:13959.579, Train Acc:60.676%, Val F1:80.299%, Val Loss: 0.640, Val Acc: 66.667%
Epoch: 49, Batch: 60, Loss: 0.6108943223953247
Test dataset metri

Epoch 100: Train F1:0.972, Train Loss:0.112, Train Acc:95.501%, Val F1:83.146%, Val Loss: 1.099, Val Acc: 72.973%
Epoch: 199, Batch: 0, Loss: 0.124686896800994874
Test dataset metrics: F1: 0.827, Loss: 1.467, Accuracy: 72.072%
-------------------------------------------
| Model number 21: L6.BS1000.LR0.01.EP400 |
-------------------------------------------
Epoch 000: Train F1:nan, Train Loss:806.159, Train Acc:42.406%, Val F1:80.000%, Val Loss: 57.583, Val Acc: 66.667%
Epoch 100: Train F1:0.979, Train Loss:0.077, Train Acc:96.520%, Val F1:90.184%, Val Loss: 0.460, Val Acc: 85.586%
Epoch 200: Train F1:0.983, Train Loss:0.063, Train Acc:97.117%, Val F1:91.589%, Val Loss: 0.383, Val Acc: 87.838%
Epoch 300: Train F1:0.989, Train Loss:0.036, Train Acc:98.235%, Val F1:88.358%, Val Loss: 0.654, Val Acc: 82.432%
Epoch: 399, Batch: 0, Loss: 0.152184456586837775
Test dataset metrics: F1: 0.907, Loss: 0.338, Accuracy: 86.486%
--------------------------------------------
| Model number 22: L6.BS10

| Model # | # of hidden layers | # of nodes                            | batch size | epochs | learning rate | train_loss | test_loss | train_acc | test_acc | train_F1 | test_F1 |
|-:-:-----|-:-:----------------|-:-:-----------------------------------|-:-:--------|-:-:----|-:-:-----------|-:-:--------|-:-:-------|-:-:-------|-:-:------|-:-:------|-:-:-----|
| 1       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.01          | 0.081      | 0.914     | 97.291%   | 84.234%  | 0.980    | 0.896   |
| 2       | 3                  | 1024, 512, 3                          | 32         | 50     | 0.01          | 0.637      | 0.637     | 66.667%   | 66.667%  | 0.797    | 0.798   |
| 3       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.1           | 0.645      | 0.621     | 66.667%   | 66.667%  | 0.799    | 0.812   |
| 4       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.001         | 0.084      | 0.472     | 97.241%   | 88.288%  | 0.979    | 0.914   |
| 5       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.0001        | 0.137      | 1.044     | 96.172%   | 81.532%  | 0.970    | 0.881   |
| 6       | 3                  | 1024, 512, 3                          | 64         | 100    | 0.01          | 0.061      | 0.728     | 98.011%   | 83.784%  | 0.985    | 0.895   |
| 7       | 3                  | 1024, 512, 3                          | 64         | 200    | 0.01          | 0.026      | 0.820     | 99.304%   | 81.982%  | 0.995    | 0.894   |
| 8       | 3                  | 1024, 512, 3                          | 1000       | 200    | 0.01          | 0.041      | 0.864     | 98.260%   | 84.685%  | 0.989    | 0.896   |
| 9       | 3                  | 1024, 512, 3                          | 1000       | 200    | 0.001         | 0.051      | 0.714     | 97.962%   | 83.784%  | 0.988    | 0.891   |
| 10      | 3                  | 1024, 512, 3                          | 1000       | 400    | 0.01          | 0.034      | 1.139     | 98.509%   | 76.577%  | 0.991    | 0.851   |
| 11      | 3                  | 1024, 512, 3                          | 1000       | 400    | 0.001         | 0.037      | 1.242     | 98.136%   | 73.874%  | 0.989    | 0.836   |
| 12      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.01          | 0.097      | 0.811     | 96.868%   | 81.081%  | 0.976    | 0.875   |
| 13      | 6                  | 1024, 512, 256, 128, 64, 3            | 32         | 50     | 0.01          | 0.637      | 0.638     | 66.667%   | 66.667%  | 0.798    | 0.799   |
| 14      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.1           | 0.640      | 0.652     | 66.667%   | 66.667%  | 0.799    | 0.793   |
| 15      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.001         | 0.056      | 3.375     | 98.111%   | 82.432%  | 0.985    | 0.887   |
| 16      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.0001        | 0.034      | 1.131     | 99.130%   | 81.982%  | 0.993    | 0.880   |
| 17      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 100    | 0.01          | 0.637      | 0.634     | 66.667%   | 66.667%  | 0.799    | 0.803   |
| 18      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 200    | 0.01          | 0.637      | 0.620     | 66.667%   | 66.667%  | 0.799    | 0.812   |
| 19      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 200    | 0.01          | 0.074      | 1.556     | 96.893%   | 74.324%  | 0.981    | 0.839   |
| 20      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 200    | 0.001         | 0.086      | 1.467     | 96.520%   | 72.072%  | 0.978    | 0.827   |
| 21      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 400    | 0.01          | 0.168      | 0.338     | 93.860%   | 86.486%  | 0.958    | 0.907   |
| 22      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 400    | 0.001         | 0.029      | 0.877     | 98.732%   | 86.486%  | 0.992    | 0.907   |
| 23      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.01          | 0.087      | 1.286     | 96.992%   | 80.631%  | 0.977    | 0.864   |
| 24      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 32         | 50     | 0.01          | 0.637      | 0.637     | 66.667%   | 66.667%  | 0.797    | 0.798   |
| 25      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.1           | 0.640      | 0.634     | 66.667%   | 66.667%  | 0.799    | 0.807   |
| 26      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.001         | 0.098      | 1.191     | 96.445%   | 74.324%  | 0.973    | 0.834   |
| 27      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.0001        | 0.050      | 0.477     | 98.732%   | 87.838%  | 0.990    | 0.914   |
| 28      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 100    | 0.01          | 0.637      | 0.637     | 66.667%   | 66.667%  | 0.799    | 0.799   |
| 29      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 200    | 0.01          | 0.637      | 0.640     | 66.667%   | 66.667%  | 0.799    | 0.796   |
| 30      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 200    | 0.01          | 0.080      | 0.572     | 97.042%   | 78.378%  | 0.977    | 0.860   |
| 31      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 200    | 0.001         | 0.068      | 0.889     | 97.166%   | 77.477%  | 0.983    | 0.855   |
| 32      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 400    | 0.01          | 0.639      | 0.637     | 66.667%   | 66.667%  | 0.798    | 0.800   |
| 33      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 400    | 0.001         | 0.626      | 0.637     | 66.667%   | 66.667%  | 0.810    | 0.800   |


In [31]:
# parameters according to paper
n_classes, n_layers, batch_size, learning_rate, n_epochs = 3, 3, 64, 0.001, 50
save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
title = "| Model: {} |".format(save_plot_fname)
print("-"*len(title))
print(title)
print("-"*len(title))

train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)

x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
tstf1, tstl, tsta = test(x_classifier, test_dataset, save_plot_fname)


-------------------------------
| Model: L3.BS64.LR0.001.EP50 |
-------------------------------
Epoch 000: Train F1:nan, Train Loss:8.518, Train Acc:78.871%, Val F1:90.762%, Val Loss: 0.415, Val Acc: 86.937%
Epoch: 49, Batch: 60, Loss: 0.097920916974544534
Test dataset metrics: F1: 0.941, Loss: 0.321, Accuracy: 91.441%


In [None]:
'''
----------------------------------
| Model: L3.BS1000.LR0.001.EP400 |
----------------------------------
Epoch 000: Train F1:0.635, Train Loss:59.807, Train Acc:60.477%, Val F1:48.241%, Val Loss: 3.034, Val Acc: 53.604%
Epoch 100: Train F1:0.977, Train Loss:0.086, Train Acc:96.172%, Val F1:86.726%, Val Loss: 0.739, Val Acc: 79.730%
Epoch 200: Train F1:0.988, Train Loss:0.041, Train Acc:98.036%, Val F1:89.634%, Val Loss: 0.605, Val Acc: 84.685%
Epoch 300: Train F1:0.984, Train Loss:0.070, Train Acc:97.315%, Val F1:83.616%, Val Loss: 1.159, Val Acc: 73.874%
Epoch: 399, Batch: 0, Loss: 0.167199090123176575
Test dataset metrics: F1: 0.902, Loss: 0.754, Accuracy: 85.586%
'''

In [24]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))