In [26]:
import os
import tensorflow as tf
import tensorflow.contrib.eager as tfe
from matplotlib import pyplot as plt
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix
import cv2
import keras.backend as K

np.random.seed(42)
tf.enable_eager_execution()
save_path = '/home/final/data/cache/{}.bottle_necks.labels.paths.npz'
plot_path = '/home/final/data/plots/multi_classifier'

device = "gpu:0" if tfe.num_gpus() else "cpu:0"

### Setup Xception Classifier Layers

In [27]:
class XceptionClassifier(tf.keras.Model):
    def __init__(self, n_classes, n_layers):
        self.n_layers = n_layers
        self.n_classes = n_classes
        
        super(XceptionClassifier, self).__init__()
#         self.xception_layers = Xception(include_top=False, weights='imagenet', input_shape=(200,200,3))
#         self.pooling_layer = GlobalAveragePooling2D(data_format='channels_last')
        if n_layers  == 3:
            self.dense_layer1 = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_layer2 = tf.keras.layers.Dense(units=512, activation='relu')
            self.dense_layer3 = tf.keras.layers.Dense(units=self.n_classes)
        if n_layers  == 6:
            self.dense_layer1 = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_layer2 = tf.keras.layers.Dense(units=512, activation='relu')
            self.dense_layer3 = tf.keras.layers.Dense(units=256, activation='relu')
            self.dense_layer4 = tf.keras.layers.Dense(units=128, activation='relu')
            self.dense_layer5 = tf.keras.layers.Dense(units=64, activation='relu')
            self.dense_layer6 = tf.keras.layers.Dense(units=self.n_classes)
        if n_layers  == 9:
            self.dense_layer1 = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_layer2 = tf.keras.layers.Dense(units=512, activation='relu')
            self.dense_layer3 = tf.keras.layers.Dense(units=256, activation='relu')
            self.dense_layer4 = tf.keras.layers.Dense(units=128, activation='relu')
            self.dense_layer5 = tf.keras.layers.Dense(units=64, activation='relu')
            self.dense_layer6 = tf.keras.layers.Dense(units=32, activation='relu')
            self.dense_layer7 = tf.keras.layers.Dense(units=16, activation='relu')
            self.dense_layer8 = tf.keras.layers.Dense(units=8, activation='relu')
            self.dense_layer9 = tf.keras.layers.Dense(units=self.n_classes)
            
    def call(self, inputs):
#         xception = self.xception_layers(inputs)
#         pooling = self.pooling_layer(xception)
        if self.n_layers == 3:
            result = self.dense_layer1(inputs)
            result = self.dense_layer2(result)
            result = self.dense_layer3(result)
        if self.n_layers == 6:
            result = self.dense_layer1(inputs)
            result = self.dense_layer2(result)
            result = self.dense_layer3(result)
            result = self.dense_layer4(result)
            result = self.dense_layer5(result)
            result = self.dense_layer6(result)
        if self.n_layers == 9:
            result = self.dense_layer1(inputs)
            result = self.dense_layer2(result)
            result = self.dense_layer3(result)
            result = self.dense_layer4(result)
            result = self.dense_layer5(result)
            result = self.dense_layer6(result)
            result = self.dense_layer7(result)
            result = self.dense_layer8(result)
            result = self.dense_layer9(result)

        return result

In [28]:
def calculate_loss(classifier, images, labels):
    logits = classifier(images)
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits))

def calculate_f1(classifier, images, labels):
    logits = tf.argmax(classifier(images), axis=1)
    TP = tf.count_nonzero(logits * labels)
    TN = tf.count_nonzero((logits - 1) * (labels - 1))
    FP = tf.count_nonzero(logits * (labels - 1))
    FN = tf.count_nonzero((logits - 1) * labels)

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * precision * recall / (precision + recall)

    return f1

def train(train_dataset, valid_dataset,
          learning_rate, batch_size, n_epochs, n_layers, n_classes,
          save_plot_fname,
          plot_graphs=False, save_graphs=True
         ):
    
    def _plot_loss(train, val, plot, save):
        plt.figure(figsize=(9,6))
        plt.plot(train, label='Train Loss')
        plt.plot(val, label='Validation Loss')
        plt.legend()
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Loss History')
        if save: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "loss"), bbox_inches='tight')
            plt.close()
        if plot: 
            plt.show()
            plt.close()
    
    def _plot_acc(train, val, plot, save):
        plt.figure(figsize=(9,6))
        plt.plot(train, label='Train Accuracy')
        plt.plot(val, label='Validation Accuracy')
        plt.legend()
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('Accuracy History')
        if save: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "accr"), bbox_inches='tight')
            plt.close()
        if plot: 
            plt.show()
            plt.close()
    
    def _plot_f1(train, val, plot, save):
        plt.figure(figsize=(9,6))
        plt.plot(train, label='Train F1')
        plt.plot(val, label='Validation F1')
        plt.legend()
        plt.xlabel('Epochs')
        plt.ylabel('F1 Score')
        plt.title('F1 Score History')
        if save: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "f1sc"), bbox_inches='tight')
            plt.close()
        if plot: 
            plt.show()
            plt.close()
    
    
    x_classifier = XceptionClassifier(n_classes=n_classes, n_layers=n_layers)
    optimizer = tf.train.AdamOptimizer(learning_rate) 

    # Performance Metrics
    # - F1 Score
    train_F1_history = []
    val_F1_history = []
    # - Loss
    train_loss_history = []
    val_loss_history = []
    # - Accuracy
    train_acc_history = []
    val_acc_history = []
    
    with tf.device(device):
        for epoch in range(n_epochs):
            
            epoch_loss_avg = tfe.metrics.Mean()
            epoch_acc = tfe.metrics.Accuracy()
            epoch_f1 = tfe.metrics.Mean()
            for batch, (tr_img, tr_lbl) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    # Compute logits, logits are the domain/input to softmax
                    tr_loss = calculate_loss(x_classifier, tr_img, tr_lbl)
                
                # Compute gradient and apply gradients
                grads = tape.gradient(tr_loss, x_classifier.variables)
                optimizer.apply_gradients(zip(grads, x_classifier.variables),
                                          global_step=tf.train.get_or_create_global_step())
                
                # Add current batch metrics
                epoch_loss_avg(tr_loss) # calculate avg epoch loss 
                epoch_acc(tf.argmax(x_classifier(tr_img), axis=1), tr_lbl)
                epoch_f1(calculate_f1(x_classifier, tr_img, tr_lbl)) # calculate avg epoch f1 score 
                
                if batch % 10 == 0:
                    print('\rEpoch: {}, Batch: {}, Loss: {}'.format(epoch, batch, tr_loss.numpy()), end='')
            # Add train loss, accuracy, and f1 for epoch
            train_loss_history.append(epoch_loss_avg.result())
            train_acc_history.append(epoch_acc.result())
            train_F1_history.append(epoch_f1.result())

            
            # Run validation loop at end of each epoch
            val_loss_avg = tfe.metrics.Mean()
            val_acc = tfe.metrics.Accuracy()
            val_f1 = tfe.metrics.Mean()
            with tf.device(device):
                for batch, (val_img, val_lbl) in enumerate(valid_dataset):
                    # Compute validation metrics
                    val_loss = calculate_loss(x_classifier, val_img, val_lbl)
                    val_loss_avg(val_loss) # Add current batch loss
                    val_acc(tf.argmax(x_classifier(val_img), axis=1), val_lbl)
                    val_f1(calculate_f1(x_classifier, val_img, val_lbl))
                
                val_loss_history.append(val_loss_avg.result())
                val_acc_history.append(val_acc.result())
                val_F1_history.append(val_f1.result())
            
            # Print progress of epochs
            if epoch % 100 == 0:
                print("\rEpoch {:03d}: Train F1:{:.3f}, Train Loss:{:.3f}, Train Acc:{:.3%}, Val F1:{:.3%}, Val Loss: {:.3f}, Val Acc: {:.3%}".format(
                    epoch, epoch_f1.result(), epoch_loss_avg.result(), epoch_acc.result(), 
                    val_f1.result(), val_loss_avg.result(), val_acc.result()))
    
    _plot_loss(train_loss_history, val_loss_history, plot_graphs, save_graphs)
    _plot_acc(train_acc_history, val_acc_history, plot_graphs, save_graphs)
    _plot_f1(train_F1_history, val_F1_history, plot_graphs, save_graphs)
    
    return x_classifier, train_loss_history, val_loss_history, train_acc_history, val_acc_history, train_F1_history, val_F1_history

def test(x_classifier, test_dataset, save_plot_fname):
    
    def _plot_confusion_matrix(cm, classes,
                              normalize=False,
                              title='Confusion matrix',
                              cmap=plt.cm.Blues,
                              show_graphs=False, save_graphs=True):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#         print(cm)

        plt.figure(figsize=(9,6))
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title(title)
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45)
        plt.yticks(tick_marks, classes)

        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.tight_layout()
        if save_graphs: 
            plt.savefig("{}/{}_{}.png".format(plot_path, save_plot_fname, "conf_norm" if normalize else "conf"), bbox_inches='tight')
            plt.close()
        if show_graphs: 
            plt.show()
            plt.close()
    
    # Performance metrics
    test_loss_avg = tfe.metrics.Mean() # loss
    test_acc = tfe.metrics.Accuracy() # accuracy
    test_f1 = tfe.metrics.Mean() # f1
    predictions = tf.convert_to_tensor([], dtype=tf.int64)
    correct = tf.convert_to_tensor([], dtype=tf.int64)
    
    # Run testing loop batch-wise
    with tf.device(device):
        for batch, (tst_img, tst_lbl) in enumerate(test_dataset):
            tst_loss = calculate_loss(x_classifier, tst_img, tst_lbl)
            test_loss_avg(tst_loss)
            test_acc(tf.argmax(x_classifier(tst_img), axis=1), tst_lbl)
            test_f1(calculate_f1(x_classifier, tst_img, tst_lbl))
            predictions = tf.concat([predictions, tf.argmax(x_classifier(tst_img), axis=1)], 0)
            correct = tf.concat([correct, tst_lbl], 0)
    
    cnf_matrix = confusion_matrix(correct, predictions)
    class_names = ['normal', 'bacterial', 'viral']
    # Plot non-normalized confusion matrix
    _plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization')
    # Plot normalized confusion matrix
    _plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,show_graphs=True,
                      title='Normalized confusion matrix')
    
    print("\nTest dataset metrics: F1: {:.3f}, Loss: {:.3f}, Accuracy: {:.3%}".format(
        test_f1.result(), test_loss_avg.result(), test_acc.result()))
    return test_f1.result(), test_loss_avg.result(), test_acc.result()

### Load bottle neck layers and create Dataset

In [29]:
def load_data(path, dataset_type='train'):
    
    data = np.load(path.format(dataset_type))
    data_bottle_necks, data_labels, data_file_paths = data['bottle_necks'],  data['labels'], data['paths']
    
    return data_bottle_necks, data_labels, data_file_paths

In [30]:
train_bottle_necks, train_labels, train_file_paths = load_data(save_path, 'train')
validate_bottle_necks, validate_labels, validate_file_paths = load_data(save_path, 'validate')
test_bottle_necks, test_labels, test_file_paths = load_data(save_path, 'test')
n = len(train_bottle_necks)

train_images_dataset = tf.data.Dataset.from_tensor_slices(train_bottle_necks)
train_labels_dataset = tf.data.Dataset.from_tensor_slices(train_labels)
train_dataset_original = tf.data.Dataset.zip((train_images_dataset, train_labels_dataset))
validate_images_dataset = tf.data.Dataset.from_tensor_slices(validate_bottle_necks)
validate_labels_dataset = tf.data.Dataset.from_tensor_slices(validate_labels)
validate_dataset_original = tf.data.Dataset.zip((validate_images_dataset, validate_labels_dataset))
test_images_dataset = tf.data.Dataset.from_tensor_slices(test_bottle_necks)
test_labels_dataset = tf.data.Dataset.from_tensor_slices(test_labels)
test_dataset_original = tf.data.Dataset.zip((test_images_dataset, test_labels_dataset))

| Model # | # of hidden layers | # of nodes                            | batch size | epochs | learning rate | train_loss | test_loss | train_acc | test_acc | train_F1 | test_F1 |
|-:-:-----|-:-:----------------|-:-:-----------------------------------|-:-:--------|-:-:----|-:-:-----------|-:-:--------|-:-:-------|-:-:-------|-:-:------|-:-:------|-:-:-----|
| 1       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.01          | 0.599      | 0.832     | 68.854%   | 56.757%  | 0.652    | 0.665   |
| 2       | 3                  | 1024, 512, 3                          | 32         | 50     | 0.01          | 1.100      | 1.099     | 32.960%   | 33.333%  | nan      | 0.666   |
| 3       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.1           | 1.103      | 1.106     | 33.706%   | 33.333%  | nan      | 0.658   |
| 4       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.001         | 0.418      | 1.197     | 81.854%   | 68.018%  | 0.665    | 0.587   |
| 5       | 3                  | 1024, 512, 3                          | 64         | 50     | 0.0001        | 0.269      | 1.216     | 90.455%   | 69.820%  | 0.665    | 0.655   |
| 6       | 3                  | 1024, 512, 3                          | 64         | 100    | 0.01          | 0.560      | 1.310     | 70.296%   | 49.550%  | 0.704    | 0.681   |
| 7       | 3                  | 1024, 512, 3                          | 64         | 200    | 0.01          | 1.099      | 1.099     | 32.911%   | 33.333%  | nan      | 0.666   |
| 8       | 3                  | 1024, 512, 3                          | 1000       | 200    | 0.01          | 1.100      | 1.099     | 33.333%   | 33.333%  | 0.669    | 0.667   |
| 9       | 3                  | 1024, 512, 3                          | 1000       | 200    | 0.001         | 0.413      | 0.978     | 80.661%   | 70.721%  | 0.643    | 0.627   |
| 10      | 3                  | 1024, 512, 3                          | 1000       | 400    | 0.01          | 0.432      | 1.144     | 79.021%   | 65.315%  | 0.689    | 0.577   |
| 11      | 3                  | 1024, 512, 3                          | 1000       | 400    | 0.001         | 0.193      | 1.232     | 91.897%   | 83.784%  | 0.670    | 0.597   |
| 12      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.01          | 0.522      | 1.142     | 67.810%   | 54.505%  | 0.716    | 0.703   |
| 13      | 6                  | 1024, 512, 256, 128, 64, 3            | 32         | 50     | 0.01          | 0.535      | 1.107     | 73.080%   | 61.712%  | 0.675    | 0.614   |
| 14      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.1           | 1.102      | 1.103     | 33.656%   | 33.333%  | nan      | 0.665   |
| 15      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.001         | 0.407      | 0.971     | 83.246%   | 72.523%  | 0.665    | 0.636   |
| 16      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 50     | 0.0001        | 0.276      | 1.079     | 90.157%   | 74.775%  | 0.668    | 0.624   |
| 17      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 100    | 0.01          | 0.414      | 1.008     | 81.979%   | 77.027%  | 0.662    | 0.619   |
| 18      | 6                  | 1024, 512, 256, 128, 64, 3            | 64         | 200    | 0.01          | 0.373      | 1.269     | 83.669%   | 68.468%  | 0.670    | 0.598   |
| 19      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 200    | 0.01          | 0.469      | 1.045     | 79.443%   | 68.468%  | 0.646    | 0.612   |
| 20      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 200    | 0.001         | 0.369      | 1.088     | 82.351%   | 68.468%  | 0.661    | 0.598   |
| 21      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 400    | 0.01          | 0.333      | 1.004     | 84.439%   | 67.117%  | 0.690    | 0.641   |
| 22      | 6                  | 1024, 512, 256, 128, 64, 3            | 1000       | 400    | 0.001         | 0.411      | 1.493     | 80.686%   | 56.306%  | 0.648    | 0.628   |
| 23      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.01          | 1.099      | 1.099     | 32.563%   | 33.333%  | nan      | 0.665   |
| 24      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 32         | 50     | 0.01          | 1.099      | 1.099     | 33.209%   | 33.333%  | nan      | 0.499   |
| 25      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.1           | 1.103      | 1.107     | 33.905%   | 33.333%  | nan      | 0.494   |
| 26      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.001         | 1.099      | 1.099     | 32.314%   | 33.333%  | nan      | 0.654   |
| 27      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 50     | 0.0001        | 0.376      | 0.907     | 84.638%   | 74.324%  | 0.662    | 0.601   |
| 28      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 100    | 0.01          | 1.099      | 1.099     | 32.438%   | 33.333%  | nan      | 0.663   |
| 29      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 64         | 200    | 0.01          | 1.099      | 1.099     | 32.637%   | 33.333%  | nan      | 0.670   |
| 30      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 200    | 0.01          | 0.574      | 1.344     | 66.841%   | 40.090%  | 0.745    | 0.687   |
| 31      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 200    | 0.001         | 1.099      | 1.099     | 33.333%   | 33.333%  | 0.664    | 0.667   |
| 32      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 400    | 0.01          | 0.466      | 0.792     | 78.673%   | 76.577%  | 0.681    | 0.612   |
| 33      | 9                  | 1024, 512, 256, 128, 64, 32, 16, 8, 3 | 1000       | 400    | 0.001         | 1.098      | 1.099     | 33.333%   | 33.333%  | 0.674    | 0.667   |


In [31]:
shapes = [None, "1024, 512, 3", "1024, 512, 256, 128, 64, 3", "1024, 512, 256, 128, 64, 32, 16, 8, 3"]
# Parameters to try out
list_n_layers = [3] * 11 + [6] * 11 + [9] * 11
list_batch_size = ([64, 32] + [64] * 5 + [1000] * 4) * 3
list_n_epochs = [50, 50, 50, 50, 50, 100, 200, 200, 200, 400, 400] * 3
list_learning_rate = [0.01, 0.01, 0.1, 0.001, 0.0001, 0.01, 0.01, 0.01, 0.001, 0.01, 0.001] * 3
n_classes = 3

In [32]:
markdown = """
| Model # | # of hidden layers |               # of nodes              | batch size | epochs | learning rate | train_loss | val_loss | test_loss | train_acc | val_acc | test_acc | train_F1 | val_F1 | test_F1 | 
|:-------:|:------------------:|:-------------------------------------:|:----------:|:------:|:-------------:|:----------:|:--------:|:---------:|:---------:|:-------:|:--------:|:--------:|:------:|:-------:|
"""
for i, (n_layers, batch_size, n_epochs, learning_rate) in \
    enumerate(zip(list_n_layers, list_batch_size, list_n_epochs, list_learning_rate), 1):

    save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
    title = "| Model number {:02d}: {} |".format(i, save_plot_fname)
    print("-"*len(title))
    print(title)
    print("-"*len(title))
    
    train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    
    x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
    tstf1, tstl, tsta = test(x_classifier, test_dataset, save_plot_fname)
    markdown += "\n|{}|{}|{}|{}|{}|{}|{:.3f}|{:.3f}|{:.3f}|{:.3%}|{:.3%}|{:.3%}|{:.3f}|{:.3f}|{:.3f}|".format(str(i), n_layers, shapes[n_layers//3], batch_size, n_epochs, learning_rate, tl[-1].numpy(), vl[-1].numpy(), tstl.numpy(), ta[-1].numpy(), va[-1].numpy(), tsta.numpy(), tf1[-1].numpy(), vf1[-1].numpy(), tstf1.numpy())
    
print(markdown)

----------------------------------------
| Model number 01: L3.BS64.LR0.01.EP50 |
----------------------------------------
Epoch 000: Train F1:nan, Train Loss:120.052, Train Acc:40.418%, Val F1:65.376%, Val Loss: 5.430, Val Acc: 29.279%
Epoch: 49, Batch: 60, Loss: 0.64020133018493654
Test dataset metrics: F1: 0.665, Loss: 0.832, Accuracy: 56.757%
----------------------------------------
| Model number 02: L3.BS32.LR0.01.EP50 |
----------------------------------------
Epoch 000: Train F1:nan, Train Loss:51.985, Train Acc:33.731%, Val F1:49.823%, Val Loss: 1.101, Val Acc: 33.333%
Epoch: 49, Batch: 120, Loss: 1.0965318679809572
Test dataset metrics: F1: 0.666, Loss: 1.099, Accuracy: 33.333%
---------------------------------------
| Model number 03: L3.BS64.LR0.1.EP50 |
---------------------------------------
Epoch 000: Train F1:nan, Train Loss:13887.011, Train Acc:38.255%, Val F1:68.914%, Val Loss: 195.944, Val Acc: 48.649%
Epoch: 49, Batch: 60, Loss: 1.0912874937057495
Test dataset metri

Epoch 100: Train F1:0.664, Train Loss:0.436, Train Acc:78.026%, Val F1:59.394%, Val Loss: 0.898, Val Acc: 71.622%
Epoch: 199, Batch: 0, Loss: 0.41869053244590764
Test dataset metrics: F1: 0.598, Loss: 1.088, Accuracy: 68.468%
-------------------------------------------
| Model number 21: L6.BS1000.LR0.01.EP400 |
-------------------------------------------
Epoch 000: Train F1:nan, Train Loss:1308.998, Train Acc:32.438%, Val F1:66.667%, Val Loss: 10.573, Val Acc: 33.333%
Epoch 100: Train F1:0.637, Train Loss:0.606, Train Acc:74.024%, Val F1:54.312%, Val Loss: 1.170, Val Acc: 52.703%
Epoch 200: Train F1:0.663, Train Loss:0.400, Train Acc:80.413%, Val F1:60.123%, Val Loss: 0.919, Val Acc: 73.423%
Epoch 300: Train F1:0.649, Train Loss:0.385, Train Acc:81.009%, Val F1:62.288%, Val Loss: 0.643, Val Acc: 81.081%
Epoch: 399, Batch: 0, Loss: 0.39493727684020996
Test dataset metrics: F1: 0.641, Loss: 1.004, Accuracy: 67.117%
--------------------------------------------
| Model number 22: L6.BS100

Everything below has been incorporated into the main loop above


### Further Training

In [23]:
n_classes, n_layers, batch_size, learning_rate, n_epochs = 3, 3, 1000, 0.001, 400
save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
title = "| Model: {} |".format(save_plot_fname)
print("-"*len(title))
print(title)
print("-"*len(title))

train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)

x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
test(x_classifier, test_dataset, save_plot_fname)

----------------------------------
| Model: L6.BS1000.LR0.001.EP400 |
----------------------------------
Epoch 000: Train F1:nan, Train Loss:18.013, Train Acc:33.582%, Val F1:66.667%, Val Loss: 15.058, Val Acc: 33.333%
Epoch 100: Train F1:0.658, Train Loss:0.585, Train Acc:74.298%, Val F1:60.206%, Val Loss: 0.753, Val Acc: 71.622%
Epoch 200: Train F1:0.662, Train Loss:0.454, Train Acc:79.468%, Val F1:58.383%, Val Loss: 1.141, Val Acc: 66.216%
Epoch 300: Train F1:0.648, Train Loss:0.327, Train Acc:84.017%, Val F1:56.061%, Val Loss: 1.404, Val Acc: 59.459%
Epoch: 399, Batch: 0, Loss: 0.48889654874801636
Test dataset metrics: F1: 0.614, Loss: 0.659, Accuracy: 77.477%


In [15]:
# Parameters to try out
list_n_layers = [3] * 4 + [6] * 4
list_batch_size = [1000] * 8
list_n_epochs = [200, 200, 400, 400] * 2
list_learning_rate = [0.01, 0.001] * 4
n_classes = 3

In [16]:
for i, (n_layers, batch_size, n_epochs, learning_rate) in \
    enumerate(zip(list_n_layers, list_batch_size, list_n_epochs, list_learning_rate), 1):

    save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
    title = "| Model number {:02d}: {} |".format(i, save_plot_fname)
    print("-"*len(title))
    print(title)
    print("-"*len(title))
    
    train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)

    x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
    test(x_classifier, test_dataset, save_plot_fname)

-----------------------------------------
| Model number 01: L3.BS64.LR0.01.EP200 |
-----------------------------------------
Instructions for updating:
Colocations handled automatically by placer.
Epoch 000: Train F1:nan, Train Loss:59.788, Train Acc:51.479%, Val F1:51.945%, Val Loss: 1.614, Val Acc: 54.054%
Epoch 100: Train F1:0.708, Train Loss:0.528, Train Acc:65.722%, Val F1:72.442%, Val Loss: 1.117, Val Acc: 52.703%
Epoch: 199, Batch: 60, Loss: 0.65268450975418094
Test dataset metrics: F1: 0.722, Loss: 0.988, Accuracy: 53.153%
------------------------------------------
| Model number 02: L3.BS64.LR0.001.EP200 |
------------------------------------------
Epoch 000: Train F1:nan, Train Loss:9.216, Train Acc:58.886%, Val F1:62.853%, Val Loss: 0.959, Val Acc: 67.568%
Epoch 100: Train F1:0.666, Train Loss:0.218, Train Acc:92.468%, Val F1:62.995%, Val Loss: 1.305, Val Acc: 72.973%
Epoch: 199, Batch: 60, Loss: 0.119583688676357276
Test dataset metrics: F1: 0.638, Loss: 2.745, Accuracy: 6

In [18]:
list_n_layers = [6,6]
list_batch_size = [64,64]
list_learning_rate = [0.01,0.01]
list_n_epochs = [400, 800]
for i, (n_layers, batch_size, n_epochs, learning_rate) in \
    enumerate(zip(list_n_layers, list_batch_size, list_n_epochs, list_learning_rate), 1):

    save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
    title = "| Model number {:02d}: {} |".format(i, save_plot_fname)
    print("-"*len(title))
    print(title)
    print("-"*len(title))
    
    train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
    test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)

    x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
    test(x_classifier, test_dataset, save_plot_fname)

-----------------------------------------
| Model number 01: L6.BS64.LR0.01.EP400 |
-----------------------------------------
Epoch 000: Train F1:nan, Train Loss:32.551, Train Acc:49.217%, Val F1:55.509%, Val Loss: 0.879, Val Acc: 62.613%
Epoch 100: Train F1:0.671, Train Loss:0.378, Train Acc:83.296%, Val F1:61.397%, Val Loss: 0.934, Val Acc: 71.622%
Epoch 200: Train F1:0.669, Train Loss:0.402, Train Acc:83.495%, Val F1:61.476%, Val Loss: 1.045, Val Acc: 69.820%
Epoch 300: Train F1:0.661, Train Loss:0.404, Train Acc:82.053%, Val F1:56.947%, Val Loss: 1.225, Val Acc: 60.360%
Epoch: 399, Batch: 60, Loss: 0.39327919483184814
Test dataset metrics: F1: 0.609, Loss: 1.362, Accuracy: 64.865%
-----------------------------------------
| Model number 02: L6.BS64.LR0.01.EP800 |
-----------------------------------------
Epoch 000: Train F1:nan, Train Loss:41.075, Train Acc:52.001%, Val F1:54.752%, Val Loss: 0.868, Val Acc: 53.604%
Epoch 100: Train F1:nan, Train Loss:1.100, Train Acc:33.507%, Val F

### Final Model

In [12]:
# parameters according to paper
n_classes, n_layers, batch_size, learning_rate, n_epochs = 3, 3, 1000, 0.001, 400
save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
title = "| Model: {} |".format(save_plot_fname)
print("-"*len(title))
print(title)
print("-"*len(title))

train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)

x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
test(x_classifier, test_dataset, save_plot_fname)

----------------------------------
| Model: L3.BS1000.LR0.001.EP400 |
----------------------------------
Epoch 000: Train F1:nan, Train Loss:100.960, Train Acc:34.178%, Val F1:nan%, Val Loss: 26.520, Val Acc: 33.333%
Epoch 100: Train F1:0.656, Train Loss:0.404, Train Acc:80.015%, Val F1:61.216%, Val Loss: 0.636, Val Acc: 78.829%
Epoch 200: Train F1:0.654, Train Loss:0.293, Train Acc:85.981%, Val F1:61.123%, Val Loss: 0.962, Val Acc: 74.324%
Epoch 300: Train F1:0.663, Train Loss:0.196, Train Acc:91.723%, Val F1:59.798%, Val Loss: 1.089, Val Acc: 69.820%
Epoch: 399, Batch: 0, Loss: 0.44639092683792114
Test dataset metrics: F1: 0.636, Loss: 0.628, Accuracy: 83.784%


----------------------------------
### Model: L3.BS1000.LR0.001.EP400 

Epoch 000: Train F1:nan, Train Loss:100.960, Train Acc:34.178%, Val F1:nan%, Val Loss: 26.520, Val Acc: 33.333%

Epoch 100: Train F1:0.656, Train Loss:0.404, Train Acc:80.015%, Val F1:61.216%, Val Loss: 0.636, Val Acc: 78.829%

Epoch 200: Train F1:0.654, Train Loss:0.293, Train Acc:85.981%, Val F1:61.123%, Val Loss: 0.962, Val Acc: 74.324%

Epoch 300: Train F1:0.663, Train Loss:0.196, Train Acc:91.723%, Val F1:59.798%, Val Loss: 1.089, Val Acc: 69.820%

Epoch: 399, Batch: 0, Loss: 0.44639092683792114

Test dataset metrics: F1: 0.636, Loss: 0.628, Accuracy: 83.784%

In [13]:
# best parameters we found
n_classes, n_layers, batch_size, learning_rate, n_epochs = 3, 6, 64, 0.01, 400
save_plot_fname = "L{}.BS{}.LR{}.EP{}".format(n_layers, batch_size, learning_rate, n_epochs)
title = "| Model: {} |".format(save_plot_fname)
print("-"*len(title))
print(title)
print("-"*len(title))

train_dataset = train_dataset_original.shuffle(buffer_size=50).batch(batch_size)
validate_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)
test_dataset = validate_dataset_original.shuffle(buffer_size=50).batch(batch_size)

x_classifier, tl, vl, ta, va, tf1, vf1 = train(train_dataset, validate_dataset, learning_rate, batch_size, n_epochs, n_layers, n_classes, save_plot_fname)
test(x_classifier, test_dataset, save_plot_fname)

-------------------------------
| Model: L6.BS64.LR0.01.EP400 |
-------------------------------
Epoch 000: Train F1:nan, Train Loss:87.474, Train Acc:48.620%, Val F1:62.282%, Val Loss: 1.005, Val Acc: 58.108%
Epoch 100: Train F1:0.697, Train Loss:0.528, Train Acc:66.791%, Val F1:72.677%, Val Loss: 1.104, Val Acc: 53.153%
Epoch 200: Train F1:nan, Train Loss:1.100, Train Acc:33.408%, Val F1:66.724%, Val Loss: 1.099, Val Acc: 33.333%
Epoch 300: Train F1:nan, Train Loss:1.099, Train Acc:32.563%, Val F1:66.244%, Val Loss: 1.099, Val Acc: 33.333%
Epoch: 399, Batch: 60, Loss: 1.0995328426361084
Test dataset metrics: F1: 0.668, Loss: 1.099, Accuracy: 33.333%


In [33]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))