In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import math
from tensorflow import keras
from keras import layers
import numpy as np

The usual pipeline of a transfer-learning & fine-tuning workflow. See https://keras.io/guides/transfer_learning/.
1. Append trainable layers to a pretrained foundation model.
2. Freeze the base model, train the last layer until convergence.
3. Unfreeze the base model, train the whole model with very small learning rate.

## Data loading and preprocessing

In [2]:
data_train = tf.keras.utils.image_dataset_from_directory("Processed_Split/train", labels='inferred', image_size=(224, 224), batch_size=32)
data_val = tf.keras.utils.image_dataset_from_directory("Processed_Split/val", labels='inferred', image_size=(224, 224), batch_size=32)
data_test = tf.keras.utils.image_dataset_from_directory("Processed_Split/test", labels='inferred', image_size=(224, 224), batch_size=32)

Found 45828 files belonging to 50 classes.


2023-04-01 18:13:14.991831: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-01 18:13:14.995875: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-01 18:13:14.996123: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-01 18:13:14.996653: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

Found 9438 files belonging to 50 classes.
Found 9504 files belonging to 50 classes.


## Data preprocessing

In [3]:
def augment(image, label):
    # Random brightness
    image = tf.image.random_brightness(image, max_delta=0.5)
    # random hoeizontal flip
    image = tf.image.random_flip_left_right(image)
    # random vertical flip
    image = tf.image.random_flip_up_down(image)
    # random shiffting of the channels
    image = tf.image.random_hue(image, max_delta=0.1)
    # Adjust contrast
    image = tf.image.random_contrast(image, lower=0.7, upper=1.3)
    #random rotation
    delta = tf.random.uniform([], minval = math.radians(-360), maxval = math.radians(360), dtype=tf.float32)
    image = tf.image.rot90(image, tf.dtypes.cast(delta, tf.int32))
    return image, label

# Use the batch method to prepare batches.
data_train = data_train.map(lambda x, y: (x/255, y))
data_val = data_val.map(lambda x, y: (x/255, y))
data_test = data_test.map(lambda x, y: (x/255, y))

## Model configuration

In [4]:
num_classes = 50

input = layers.Input((224,224,3))
vgg = keras.applications.vgg16.VGG16(include_top=False, weights="imagenet", input_tensor=input)
vgg.trainable = False
gap = layers.GlobalAveragePooling2D()(vgg.output)
flatten = layers.Flatten()(gap)
output = layers.Dense(num_classes, activation="softmax")(flatten)
model = tf.keras.Model(vgg.input, output)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics="accuracy")

## Model training (freeze VGG)

In [5]:
log = model.fit(x=data_train,
          epochs=20,
          validation_data=data_val)

Epoch 1/20


2023-04-01 18:13:18.831452: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2023-04-01 18:13:19.613934: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-04-01 18:13:20.609991: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.46GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-04-01 18:13:20.610048: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.46GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-04-01 18:13:20.610058: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to al

   2/1433 [..............................] - ETA: 3:14 - loss: 4.3113 - accuracy: 0.0156   

2023-04-01 18:13:22.422080: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




2023-04-01 18:17:28.527056: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.25GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-04-01 18:17:28.527122: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.25GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-04-01 18:17:28.527133: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.98GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-04-01 18:17:28.527140: W tensorflow/core/common_runtime/bfc_allocato

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [6]:
model.evaluate(data_test)
model.save("freeze_vgg")





INFO:tensorflow:Assets written to: freeze_vgg/assets


INFO:tensorflow:Assets written to: freeze_vgg/assets


In [3]:
tf_model = tf.keras.models.load_model('freeze_vgg')
data_test = tf.keras.utils.image_dataset_from_directory("Processed_Split/test", labels='inferred', image_size=(224, 224), batch_size=32, shuffle=False)
data_test = data_test.map(lambda x, y: (x/255, y))
y_pred = tf_model.predict(data_test)
y_hat = np.argmax(y_pred, axis=1)
y_true = np.concatenate([y for x, y in data_test], axis=0)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print("Accuracy: ", accuracy_score(y_true, y_hat))
print("Precision: ", precision_score(y_true, y_hat, average='weighted'))
print("Recall: ", recall_score(y_true, y_hat, average='weighted'))
print("F1: ", f1_score(y_true, y_hat, average='weighted'))

Found 9504 files belonging to 50 classes.
Accuracy:  0.8904671717171717
Precision:  0.8910358676792757
Recall:  0.8904671717171717
F1:  0.885039307059431


## Model training (unfreeze VGG)

In [7]:
vgg.trainable = True
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss="sparse_categorical_crossentropy",
    metrics="accuracy")

from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau('val_loss', factor=0.5, patience=5, min_lr=0.000003125)

callbacks = [early_stopping, reduce_lr]

log = model.fit(x=data_train,
          epochs=10,
          validation_data=data_val, callbacks=callbacks)


Epoch 1/10


2023-04-01 19:42:22.969455: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.19GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-04-01 19:42:22.969526: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.19GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
model.evaluate(data_test)
model.save('vgg')





INFO:tensorflow:Assets written to: vgg/assets


INFO:tensorflow:Assets written to: vgg/assets


## Evaluation metrics on test data

In [4]:
trained_model = tf.keras.models.load_model('vgg')

In [5]:
data_test = tf.keras.utils.image_dataset_from_directory("Processed_Split/test", labels='inferred', image_size=(224, 224), batch_size=32, shuffle=False)
data_test = data_test.map(lambda x, y: (x/255, y))

Found 9504 files belonging to 50 classes.


In [6]:
y_pred = trained_model.predict(data_test)



In [7]:
y_hat = np.argmax(y_pred, axis=1)
y_true = np.concatenate([y for x, y in data_test], axis=0)

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print("Accuracy: ", accuracy_score(y_true, y_hat))
print("Precision: ", precision_score(y_true, y_hat, average='macro'))
print("Recall: ", recall_score(y_true, y_hat, average='macro'))
print("F1: ", f1_score(y_true, y_hat, average='macro'))

Accuracy:  0.9738005050505051
Precision:  0.9229391339597361
Recall:  0.9224361562840273
F1:  0.9204244057890988


In [10]:
import os
from sklearn.metrics import classification_report
print(classification_report(y_true, y_hat, digits=4, target_names=sorted(os.listdir("Processed_Split/test"))))

                                    precision    recall  f1-score   support

                 Amylax_triacantha     0.7500    0.7500    0.7500         4
           Aphanizomenon_flosaquae     0.9895    0.9895    0.9895      1049
       Aphanothece_paralleliformis     1.0000    1.0000    1.0000         5
                             Beads     1.0000    0.9500    0.9744        20
                      Centrales_sp     1.0000    0.9028    0.9489        72
             Ceratoneis_closterium     1.0000    0.8750    0.9333         8
                    Chaetoceros_sp     0.9850    0.9471    0.9657       208
             Chaetoceros_sp_single     0.9688    0.9394    0.9538        33
                    Chlorococcales     0.7500    0.8000    0.7742        15
                     Chroococcales     1.0000    0.6364    0.7778        22
                 Chroococcus_small     0.9752    0.9440    0.9593       125
                           Ciliata     0.8537    0.9459    0.8974        37
           