In [30]:
import numpy as np

from keras.utils import image_dataset_from_directory
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import (
    EarlyStopping,
    ReduceLROnPlateau
)
from keras.layers import (
    Rescaling,
    Flatten,
    Dense,
)
from keras.applications import VGG16

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import ConfusionMatrixDisplay, RocCurveDisplay


In [25]:
prePath = "../Data/PreprocessedResNet"

datasetSize = 56092

shape = (224, 224, 3)
batchSize = 32
validationSplit = 0.2
seed = 123
colorMode = "rgb"
testDSSize = datasetSize // (10 * batchSize)

optimizer = Adam(learning_rate=0.0001)
loss = "binary_crossentropy"
metrics = ["accuracy"]

epochs = 100

In [26]:
trainingDS = image_dataset_from_directory(
    prePath,
    labels="inferred",
    label_mode="binary",
    subset="training",
    image_size=shape[0:2],
    color_mode=colorMode,
    batch_size=batchSize,    
    validation_split=validationSplit,
    seed=seed
)

validationDS = image_dataset_from_directory(
    prePath,
    labels="inferred",
    label_mode="binary",
    subset="validation",
    image_size=shape[0:2],
    color_mode=colorMode,
    batch_size=batchSize,    
    validation_split=validationSplit,
    seed=seed
)

testDS = validationDS.take(testDSSize)
validationDS = validationDS.skip(testDSSize)

print('Batches for training -->', trainingDS.cardinality())
print('Batches for validating -->', validationDS.cardinality())
print('Batches for testing -->', testDS.cardinality())

Found 56092 files belonging to 2 classes.
Using 44874 files for training.
Found 56092 files belonging to 2 classes.
Using 11218 files for validation.
Batches for training --> tf.Tensor(1403, shape=(), dtype=int64)
Batches for validating --> tf.Tensor(201, shape=(), dtype=int64)
Batches for testing --> tf.Tensor(150, shape=(), dtype=int64)


In [27]:
callbacks = [
    EarlyStopping(
        monitor="val_accuracy",
        patience=5,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    ),
    ReduceLROnPlateau(
        monitor="val_accuracy",
        factor=0.2,
        patience=2,
        verbose=1,
        mode="auto",
        min_lr=0.000005
    )]

pretrainedModel = VGG16(
    include_top=False,
    weights="imagenet",
    input_shape=shape
)
for l in pretrainedModel.layers:
    l.trainable = False

model = Sequential(
    [
        Rescaling(1.0 / 255, input_shape=shape),
        pretrainedModel,
        Flatten(),
        Dense(units=1024, activation="ReLU"),
        Dense(units=1024, activation="ReLU"),
        Dense(units=1, activation="sigmoid"),
    ])

pretrainedModel.summary()
model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [28]:
model.compile(
    optimizer=optimizer,
    loss=loss,
    metrics=metrics,
)
history = model.fit(
    trainingDS,
    validation_data=validationDS,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/100
  23/1403 [..............................] - ETA: 4:23:47 - loss: 0.4583 - accuracy: 0.8193

KeyboardInterrupt: 

In [None]:
predictions = model.predict(testDS)
predArray = np.array((1 * (predictions > 0.5))).ravel()
trueArray = np.array(list(testDS.take(-1))[-1][-1]).ravel().astype(int)

In [None]:
_, ax = plt.subplots(2, 2, figsize=(15, 8), tight_layout=True)
sns.lineplot(
    x=np.arange(epochs),
    y="accuracy",
    data=history.history,
    ax=ax[0, 0],
    label="Training",
)
sns.lineplot(
    x=np.arange(epochs),
    y="val_accuracy",
    data=history.history,
    ax=ax[0, 0],
    label="Validation",
)
sns.lineplot(
    x=np.arange(epochs),
    y="loss",
    data=history.history,
    ax=ax[0, 1],
    label="Training",
)
sns.lineplot(
    x=np.arange(epochs),
    y="val_loss",
    data=history.history,
    ax=ax[0, 1],
    label="Validation",
)
ConfusionMatrixDisplay.from_predictions(
            trueArray,
            predArray,
            display_labels=testDS.class_names,
            ax=ax[1, 0],
            cmap="hot",
            colorbar=True,
)
RocCurveDisplay.from_predictions(
    trueArray,
    predArray,
    ax=ax[1, 1])
       
plt.show()
