In [1]:
import os

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import tensorflow
from sklearn.metrics import auc, roc_curve
from tensorflow.keras.metrics import *
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import src.data.Dataset as dt

In [2]:
required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

!echo "PYTHONPATH: $PYTHONPATH"

PYTHONPATH: :/Users/carlos/WORKSPACE/MegaClassifier/data/external/ai4eutils:/Users/carlos/WORKSPACE/MegaClassifier/data/external/CameraTraps:/Users/carlos/WORKSPACE/MegaClassifier/data/external/yolov5


In [3]:
models = [("MobileNetV2", "v.1.0"), ("MegaClassifier", "v.1.0")]

In [4]:
IMAGES_DATASET_PATH = os.path.abspath("./data/processed/preprocessed_megadetector")
SAMPLES_CSV = os.path.abspath(
    "./data/processed/preprocessed_megadetector/28560Images_detection.csv"
)
dataframe = dt.load_from_csv(SAMPLES_CSV)
test_dataframe = dataframe[dataframe["subset"] == "test"]
test_dataframe["file_name"] = test_dataframe["file_name"].apply(
    lambda x: os.path.join(IMAGES_DATASET_PATH, x)
)
test_dataframe["binary_label"] = test_dataframe["binary_label"].astype(str)
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
normal_datagen = ImageDataGenerator(
    preprocessing_function=tensorflow.keras.applications.mobilenet_v2.preprocess_input
)
test_images = normal_datagen.flow_from_dataframe(
    dataframe=test_dataframe,
    x_col="file_name",
    y_col="binary_label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False,
)

The file /Users/carlos/WORKSPACE/MegaClassifier/data/processed/preprocessed_megadetector/28560Images_detection.csv has been successfully opened.
Found 4286 validated image filenames belonging to 2 classes.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataframe["file_name"] = test_dataframe["file_name"].apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataframe["binary_label"] = test_dataframe["binary_label"].astype(str)


In [5]:
def plot_roc_curve(models, test_images):
    y_true = test_images.labels
    fig = go.Figure()

    for model_name, version in models:
        complete_name = f"{model_name}_{version}"
        model = load_model(f"./models/{model_name}/{complete_name}.h5")
        y_pred = model.predict(test_images)

        fpr, tpr, _ = roc_curve(y_true, y_pred)
        roc_auc = auc(fpr, tpr)
        version_short = version[:-2]

        fig.add_trace(
            go.Scatter(
                x=fpr,
                y=tpr,
                mode="lines",
                name=f"{model_name} {version_short} (AUC = {roc_auc:.4f})",
            )
        )

    # Línea de referencia (Random Guess)
    fig.add_trace(
        go.Scatter(
            x=[0, 1],
            y=[0, 1],
            mode="lines",
            name="Random Classifier",
            line=dict(dash="dash"),
        )
    )

    # Configurar diseño
    fig.update_layout(
        title="ROC Curve - Compare Models",
        xaxis_title="False Positive Rate (FPR)",
        yaxis_title="True Positive Rate (TPR)",
        legend_title="Curves",
        template="seaborn",
        width=700,
        height=500,
        xaxis=dict(
            tickmode="linear",
            tick0=0,
            dtick=0.1,
        ),
    )

    # Mostrar la gráfica
    fig.show()
    # fig.write_image(f"./reports/figures/roc_curve/compare_test.png")

In [9]:
def plot_test_metrics(models):
    fig = go.Figure()

    for model_name, version in models:
        test = dt.load_from_csv(
            os.path.abspath(f"./logs/{model_name}/{version}/test_history.csv"), sep=","
        )
        data = (test * 100).round(4)

        fig.add_trace(
            go.Bar(
                name=f"{model_name} {version}",
                x=["Loss", "Accuracy", "Precision", "Recall"],
                y=data.iloc[0],
                text=data.iloc[0],
                textposition="auto",
            )
        )

    fig.update_layout(
        title="Test Metrics comparison",
        xaxis_title="Metrics",
        yaxis_title="Values (%)",
        template="seaborn",
        width=900,
        height=500,
        xaxis=dict(
            tickmode="linear",
            tick0=0,
            dtick=0.1,
        ),
    )

    fig.show()

In [7]:
plot_roc_curve(models, test_images)

2024-12-26 12:23:24.270037: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3
2024-12-26 12:23:24.270066: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-12-26 12:23:24.270075: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-12-26 12:23:24.270100: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-26 12:23:24.270112: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2024-12-26 12:23:25.425027: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2024-12-26 12:23:37.393923: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [10]:
plot_test_metrics(models)

The file /Users/carlos/WORKSPACE/MegaClassifier/logs/MobileNetV2/v.1.0/test_history.csv has been successfully opened.
The file /Users/carlos/WORKSPACE/MegaClassifier/logs/MegaClassifier/v.1.0/test_history.csv has been successfully opened.
