In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.preprocessing.image import ImageDataGenerator

import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve

from matplotlib import pyplot as plt
import plotly.express as px

import optuna

import pickle

In [2]:
def test_model(conv_model, preprocess, image_size):
    
    batch_size = 64
    
    datagen = ImageDataGenerator(
        preprocessing_function=preprocess,
    )
    train_generator = datagen.flow_from_directory(
        "./dataset/train",
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode="binary",
        shuffle=True,
    )
    validation_generator = datagen.flow_from_directory(
        "./dataset/val",
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode="binary",
    )
    test_generator = datagen.flow_from_directory(
        "./dataset/test",
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode="binary",
    )
    
    conv_model.trainable = False
    tf_model = keras.Sequential([
        conv_model,
        # Fully connected layers
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')  # Sigmoid for binary classification
    ])
    
    tf_model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    early_stopping = EarlyStopping(
        patience=3,
        min_delta=0.001,
        restore_best_weights=True,
    )

    history = tf_model.fit(
        train_generator,
        validation_data=validation_generator,
        callbacks=[early_stopping],
        epochs=20
    )
    
    history_df = pd.DataFrame(history.history)
    history_df.loc[:, ['loss', 'val_loss']].plot()
    history_df.loc[:, ['accuracy', 'val_accuracy']].plot()
    
    y_pred = tf_model.predict(test_generator)
    y_true = test_generator.classes
    

    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    roc_df = pd.DataFrame({
        'FPR': fpr,
        'TPR': tpr,
        'Thresholds': thresholds
    })
    
    fig = px.area(roc_df, x = 'FPR', y = 'TPR', hover_data=['Thresholds'])
    auc_score = roc_auc_score(y_true, y_pred)
    fig.add_annotation(x=0.5, y=0.5, text=f'AUC={auc_score:.5f}', showarrow=False)
    fig.show()
    
    y_pred = y_pred.reshape(-1)
    y_pred = (y_pred > 0.5).astype(int)
    accuracy = accuracy_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Confusion matrix:\n{conf_matrix}")


In [3]:
import keras.applications.resnet_v2 as resnet_v2
import keras.applications.vgg16 as vgg16
import keras.applications.densenet as densenet
import keras.applications.efficientnet_v2 as efficientnet_v2
import keras.applications.nasnet as nasnet
import keras.applications.xception as xception

In [None]:
resnet_v2_model = resnet_v2.ResNet50V2(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
test_model(resnet_v2_model, resnet_v2.preprocess_input, 224)

In [None]:
vgg16_model = vgg16.VGG16(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
test_model(vgg16_model, vgg16.preprocess_input, 224)

In [None]:
densenet_model = densenet.DenseNet121(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
test_model(densenet_model, densenet.preprocess_input, 224)

In [None]:
efficientnet_v2_model = efficientnet_v2.EfficientNetB0(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
test_model(efficientnet_v2_model, efficientnet_v2.preprocess_input, 224)

In [None]:
nasnet_model = nasnet.NASNetMobile(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
test_model(nasnet_model, nasnet.preprocess_input, 224)

In [None]:
xception_model = xception.Xception(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
test_model(xception_model, xception.preprocess_input, 224)

In [4]:
def objective(trial):
    
    batch_size = 64
    epochs = 3
    
    image_size = trial.suggest_categorical('image_size', [64, 128, 256])
    num_fc_layers = trial.suggest_categorical('num_fc_layers', [1, 2, 3])
    num_of_neurons = trial.suggest_categorical('num_of_neurons', [64, 128, 256])
    learning_rate = trial.suggest_categorical('lr', [0.1, 0.01, 0.001])
    optimizer_str = trial.suggest_categorical('optimizer', ['adam', 'rmsprop', 'sgd'])
    if optimizer_str == 'adam':
        optimizer = optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_str == 'rmsprop':
        optimizer = optimizers.RMSprop(learning_rate=learning_rate)
    else:
        optimizer = optimizers.SGD(learning_rate=learning_rate)
    
    conv_base = resnet_v2.ResNet50V2(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))
    
    datagen = ImageDataGenerator(preprocessing_function=resnet_v2.preprocess_input)
    
    train_generator = datagen.flow_from_directory(
        './dataset/train',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=True
    )
    validation_generator = datagen.flow_from_directory(
        './dataset/val',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False
    )
    test_generator = datagen.flow_from_directory(
        './dataset/test',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False
    )
    
    conv_base.trainable = False
    full_model = keras.Sequential([
        conv_base,
        layers.Flatten(),
    ])
    for i in range(num_fc_layers):
        full_model.add(layers.Dropout(0.5))
        full_model.add(layers.Dense(num_of_neurons, activation='relu'))
    full_model.add(layers.Dense(1, activation='sigmoid'))
    
    full_model.compile(
        optimizer=optimizer, 
        loss='binary_crossentropy',
    )
    full_model.fit(
        train_generator, 
        epochs=epochs, 
        validation_data=validation_generator, 
        verbose=0,
    )
    
    y_pred = full_model.predict(test_generator)
    y_true = test_generator.classes
    
    auc_score = roc_auc_score(y_true, y_pred)
    y_pred = y_pred.reshape(-1)
    y_pred = (y_pred > 0.5).astype(int)
    acc_score = accuracy_score(y_true, y_pred)
    print(f'Accuracy: {acc_score:.2f}')
    return auc_score

In [None]:
sampler = optuna.samplers.GridSampler(
        search_space={
            'image_size': [64, 128, 256],
            'num_fc_layers': [1, 2, 3],
            'num_of_neurons': [64, 128, 256],
            'lr': [0.1, 0.01, 0.001],
            'optimizer': ['adam', 'rmsprop', 'sgd']
        }
    )

study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective)

# Print the best hyperparameters
print(f"Best trial: {study.best_trial.value}")
print(f"Best hyperparameters: {study.best_trial.params}")

In [15]:
def get_final_model():
    
    batch_size = 64
    epochs = 5
    
    image_size = 256
    num_fc_layers = 1
    num_of_neurons = 256
    learning_rate = 0.001
    optimizer = optimizers.RMSprop(learning_rate=learning_rate)
    
    conv_base = resnet_v2.ResNet50V2(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))
    
    datagen = ImageDataGenerator(
        preprocessing_function = resnet_v2.preprocess_input,
    )
    train_generator = datagen.flow_from_directory(
        './dataset/train',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=True
    )
    validation_generator = datagen.flow_from_directory(
        './dataset/val',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False
    )
    
    conv_base.trainable = False
    full_model = keras.Sequential([
        conv_base,
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_of_neurons, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    
    early_stopping = EarlyStopping(
        patience=2,
        min_delta=0.001,
        restore_best_weights=True,
    )
    
    full_model.compile(
        optimizer=optimizer, 
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    full_model.fit(
        train_generator, 
        epochs=epochs, 
        validation_data=validation_generator,
        callbacks=[early_stopping],
    )
    
    y_pred = full_model.predict(validation_generator)
    y_true = validation_generator.classes
    
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    roc_df = pd.DataFrame({
        'FPR': fpr,
        'TPR': tpr,
        'Thresholds': thresholds
    })
    
    fig = px.area(roc_df, x = 'FPR', y = 'TPR', hover_data=['Thresholds'])
    auc_score = roc_auc_score(y_true, y_pred)
    fig.add_annotation(x=0.5, y=0.5, text=f'AUC={auc_score:.5f}', showarrow=False)
    fig.show()
    return full_model

In [16]:
final_model = get_final_model()

Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.



Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.



Epoch 1/5
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 911ms/step - accuracy: 0.8606 - loss: 12.3248 - val_accuracy: 0.6875 - val_loss: 0.6629
Epoch 2/5
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 896ms/step - accuracy: 0.9342 - loss: 0.4286 - val_accuracy: 0.6875 - val_loss: 1.0179
Epoch 3/5
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 909ms/step - accuracy: 0.9782 - loss: 0.0809 - val_accuracy: 0.9375 - val_loss: 0.2489
Epoch 4/5
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 894ms/step - accuracy: 0.9899 - loss: 0.0409 - val_accuracy: 0.9375 - val_loss: 0.7405
Epoch 5/5
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 948ms/step - accuracy: 0.9908 - loss: 0.0248 - val_accuracy: 1.0000 - val_loss: 0.0429
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


In [17]:
datagen = ImageDataGenerator(
    preprocessing_function = resnet_v2.preprocess_input,
)
test_generator = datagen.flow_from_directory(
    './dataset/test',
    target_size=(256, 256),
    batch_size=64,
    class_mode='binary',
    shuffle=False
)

y_pred = final_model.predict(test_generator)
y_true = test_generator.classes
    
fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_df = pd.DataFrame({
    'FPR': fpr,
    'TPR': tpr,
    'Thresholds': thresholds
})
    
fig = px.area(roc_df, x = 'FPR', y = 'TPR', hover_data=['Thresholds'])
auc_score = roc_auc_score(y_true, y_pred)
fig.add_annotation(x=0.5, y=0.5, text=f'AUC={auc_score:.5f}', showarrow=False)
fig.show()

Found 624 images belonging to 2 classes.



Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.



[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 882ms/step


In [18]:
y_pred = y_pred.reshape(-1)
y_pred = (y_pred > 0.99).astype(int)
accuracy = accuracy_score(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(f"Confusion matrix:\n{conf_matrix}")

Accuracy: 0.90
Confusion matrix:
[[176  58]
 [  5 385]]


In [19]:
model_file_path = './models/tensorflow_model.pkl'

with open(model_file_path, 'wb') as write_file:
    pickle.dump(final_model, write_file)