In [1]:
import os

import numpy as np
import pandas as pd

import src.graphics as graphics
import src.model as model

In [2]:
MODEL_NAME = "MegaClassifier_a"
VERSION = "v1"
SUBVERSION = 0

DATASET_CSV = os.path.abspath("./data/processed/onlyDetectionsForTrain/onlyDetectionsForTrain.csv")
DATASET_PATH = os.path.dirname(DATASET_CSV)

In [3]:
dataset = pd.read_csv(DATASET_CSV, sep=";")
dataset['file_name'] = dataset['file_name'].apply(lambda x: os.path.join(DATASET_PATH, x))
dataset['binary_label'] = dataset['binary_label'].astype(str)

train_dataset = dataset[dataset['subset'] == "train"]
validation_dataset = dataset[dataset['subset'] == "validation"]
test_dataset = dataset[dataset['subset'] == "test"]

In [4]:
EPOCHS = 10
BATCH_SIZES = [16, 32, 64, 128]

In [5]:
all_results = []
for BATCH_SIZE in BATCH_SIZES:
    print(f"BATCH_SIZE: {BATCH_SIZE} ------------------------------------------------------------ ")

    train_generator, other_generator = model.image_data_generator(version=VERSION)

    train_images, validation_images, test_images = model.flow_from_dataframe(
        datasets=[train_dataset, validation_dataset, test_dataset],
        generators=[train_generator, other_generator],
        batch_size=BATCH_SIZE,
    )

    mobilenet_v2 = model.load_pretrained(version=VERSION)

    mega_classifier_a = model.compile_model(
        version=VERSION,
        pretrained_model=mobilenet_v2,
        name=f"{MODEL_NAME}_{VERSION}_{SUBVERSION}")

    callbacks = model.callbacks(
        version=VERSION,
        logs_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
    )

    history = model.fit(
        model=mega_classifier_a,
        images=[train_images, validation_images],
        epochs=EPOCHS,
        call_backs=callbacks,
        save_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
    )

    model.save_training(
        data=pd.DataFrame(history.history),
        save_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/history_{VERSION}_{SUBVERSION}.csv",
    )

    os.makedirs(f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}", exist_ok=True)

    accuracy_chart = graphics.create_training_accuracy_chart(
        history_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/history_{VERSION}_{SUBVERSION}.csv",
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    accuracy_chart.write_image(f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/train_accuracy_{VERSION}_{SUBVERSION}.png")

    loss_chart = graphics.create_training_loss_chart(
        history_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/history_{VERSION}_{SUBVERSION}.csv",
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    loss_chart.write_image(f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/train_loss_{VERSION}_{SUBVERSION}.png")

    results = model.evaluate_model(
        model_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
        test_images=test_images,
    )

    metric_names = history.model.metrics_names
    evaluation_results = {("test_" + name): value for name, value in zip(metric_names, results)}

    model.save_evaluation(
        data=pd.DataFrame([evaluation_results]),
        save_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/evaluation_{VERSION}_{SUBVERSION}.csv",
    )

    print("\n\n")
    print(f"Accuracy: {results[1]:.4%}")
    print(f"Loss: {results[0]:.4%}")
    print(f"AUC: {results[4]:.4%}")
    print(f"Precision: {results[2]:.4%}")
    print(f"Recall: {results[3]:.4%}")
    print("\n\n")

    fpr, tpr, thresholds, roc_auc = model.roc_curve_model(
        model_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
        test_images=test_images,
    )

    roc_curve_chart = graphics.create_roc_curve_chart(
        fpr=fpr,
        tpr=tpr,
        roc_auc=roc_auc,
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    roc_curve_chart.write_image(f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/roc_curve_{VERSION}_{SUBVERSION}.png")

    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]
    print(f"OPTIMAL THRESHOLD: {optimal_threshold}")

    confusion_matrix = model.confusion_matrix_model(
        model_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
        test_images=test_images,
        optimal_threshold=optimal_threshold,
    )

    confusion_matrix_chart = graphics.create_confusion_matrix_chart(
        conf_matrix=confusion_matrix,
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    confusion_matrix_chart.write_image(f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/confusion_matrix_{VERSION}_{SUBVERSION}.png")

    evaluation_results["batch_size"] = BATCH_SIZE
    all_results.append(pd.DataFrame([evaluation_results]))

    SUBVERSION += 1

final_results = pd.concat(all_results, ignore_index=True)
final_results.to_csv(f"./logs/{MODEL_NAME}/{VERSION}/batch_comparison_results.csv", index=False)

BATCH_SIZE: 16 ------------------------------------------------------------ 
Found 17054 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.


2025-02-25 13:53:28.340883: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3
2025-02-25 13:53:28.340914: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-02-25 13:53:28.340918: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-02-25 13:53:28.340974: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-02-25 13:53:28.341145: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)





Epoch 1/10


2025-02-25 13:53:30.016687: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-02-25 13:54:18.036527: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10



Saving model...




INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.0/assets


INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.0/assets





Saving training data...





2025-02-25 14:03:09.028575: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.





Saving evaluation data...



Accuracy: 94.1904%
Loss: 13.8695%
AUC: 98.6760%
Precision: 96.2460%
Recall: 94.9224%








2025-02-25 14:03:23.268750: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


OPTIMAL THRESHOLD: 0.6027166247367859





2025-02-25 14:03:36.515056: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


BATCH_SIZE: 32 ------------------------------------------------------------ 
Found 17054 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.







Epoch 1/10


2025-02-25 14:03:48.712884: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-02-25 14:04:32.995680: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10



Saving model...




INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.1/assets


INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.1/assets





Saving training data...





2025-02-25 14:12:49.991137: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.





Saving evaluation data...



Accuracy: 94.5637%
Loss: 13.8951%
AUC: 98.6710%
Precision: 95.2696%
Recall: 96.5797%








2025-02-25 14:13:03.568691: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


OPTIMAL THRESHOLD: 0.6590171456336975





2025-02-25 14:13:16.164461: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


BATCH_SIZE: 64 ------------------------------------------------------------ 
Found 17054 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.







Epoch 1/10


2025-02-25 14:13:27.778757: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-02-25 14:14:08.742675: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10



Saving model...
INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.2/assets


INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.2/assets





Saving training data...





2025-02-25 14:22:05.270089: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.





Saving evaluation data...



Accuracy: 94.3770%
Loss: 13.9597%
AUC: 98.7167%
Precision: 94.6030%
Recall: 97.0381%








2025-02-25 14:22:18.278905: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


OPTIMAL THRESHOLD: 0.7252727150917053





2025-02-25 14:22:30.622893: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


BATCH_SIZE: 128 ------------------------------------------------------------ 
Found 17054 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.
Found 4286 validated image filenames belonging to 2 classes.







Epoch 1/10


2025-02-25 14:22:42.092518: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-02-25 14:23:22.543669: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10



Saving model...
INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.3/assets


INFO:tensorflow:Assets written to: ./models/MegaClassifier_a/v1/v1.3/assets





Saving training data...





2025-02-25 14:31:07.336704: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.





Saving evaluation data...



Accuracy: 93.8404%
Loss: 14.8650%
AUC: 98.6660%
Precision: 96.6618%
Recall: 93.9351%








2025-02-25 14:31:20.295124: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


OPTIMAL THRESHOLD: 0.5304590463638306





2025-02-25 14:31:32.924665: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


