In [46]:
import os

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from tensorflow.keras.metrics import *
from tensorflow.keras.models import load_model

import src.data.Dataset as dt

In [47]:
required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

!echo "PYTHONPATH: $PYTHONPATH"

PYTHONPATH: :/Users/carlos/WORKSPACE/MegaClassifier/data/external/ai4eutils:/Users/carlos/WORKSPACE/MegaClassifier/data/external/CameraTraps:/Users/carlos/WORKSPACE/MegaClassifier/data/external/yolov5


In [48]:
def moving_average(data, window_size):
    return np.convolve(data, np.ones(window_size) / window_size, mode="valid")

In [49]:
model_name = "MegaClassifier"
version = "v.2.2"
complete_name = model_name + "_" + version

model = load_model(f"./models/{model_name}/{complete_name}.h5")

In [50]:
train_validation_df = pd.read_csv(
    f"./logs/{model_name}/{version}/train_validation_history.csv"
)

train_metrics = train_validation_df[
    [col for col in train_validation_df.columns if not col.startswith("val_")]
]

validation_metrics = train_validation_df[
    [col for col in train_validation_df.columns if col.startswith("val_")]
]

test_metrics = pd.read_csv(f"./logs/{model_name}/{version}/test_history.csv")

version = version[:-2]

In [51]:
window_size = 0
train_data = (
    moving_average(train_metrics["accuracy"], window_size)
    if window_size != 0
    else train_metrics["accuracy"]
)
validation_data = (
    moving_average(validation_metrics["val_accuracy"], window_size)
    if window_size != 0
    else validation_metrics["val_accuracy"]
)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        y=train_data,
        x=np.arange(len(train_data)),
        mode="lines",
        name="Train",
    )
)

fig.add_trace(
    go.Scatter(
        y=validation_data,
        x=np.arange(len(validation_data)),
        mode="lines",
        name="Validation",
    )
)

fig.update_layout(
    title=f"Accuracy - {model_name} {version}",
    xaxis_title="Epochs",
    yaxis_title="Accuracy",
    legend_title="Metrics",
    template="seaborn",
    width=600,
    height=500,
    xaxis=dict(
        tickmode="linear",
        tick0=0,
        dtick=2,
    ),
)

fig.show()
fig.write_image(f"./reports/figures/accuracy/{complete_name[:-2]}_train.png")

In [52]:
window_size = 0
train_data = (
    moving_average(train_metrics["loss"], window_size)
    if window_size != 0
    else train_metrics["loss"]
)
validation_data = (
    moving_average(validation_metrics["val_loss"], window_size)
    if window_size != 0
    else validation_metrics["val_loss"]
)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        y=train_data,
        x=np.arange(len(train_data)),
        mode="lines",
        name="Train",
    )
)

fig.add_trace(
    go.Scatter(
        y=validation_data,
        x=np.arange(len(validation_data)),
        mode="lines",
        name="Validation",
    )
)

fig.update_layout(
    title=f"Loss - {model_name} {version}",
    xaxis_title="Epochs",
    yaxis_title="Loss",
    legend_title="Metrics",
    template="seaborn",
    width=600,
    height=500,
    xaxis=dict(
        tickmode="linear",
        tick0=0,
        dtick=2,
    ),
)

fig.show()
fig.write_image(f"./reports/figures/loss/{complete_name[:-2]}_train.png")

In [53]:
window_size = 0
train_data = (
    moving_average(train_metrics["precision"], window_size)
    if window_size != 0
    else train_metrics["precision"]
)
validation_data = (
    moving_average(validation_metrics["val_precision"], window_size)
    if window_size != 0
    else validation_metrics["val_precision"]
)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        y=train_data,
        x=np.arange(len(train_data)),
        mode="lines",
        name="Train",
    )
)

fig.add_trace(
    go.Scatter(
        y=validation_data,
        x=np.arange(len(validation_data)),
        mode="lines",
        name="Validation",
    )
)

fig.update_layout(
    title=f"Precision - {model_name} {version}",
    xaxis_title="Epochs",
    yaxis_title="Precision",
    legend_title="Metrics",
    template="seaborn",
    width=600,
    height=500,
    xaxis=dict(
        tickmode="linear",
        tick0=0,
        dtick=2,
    ),
)

fig.show()

In [54]:
window_size = 0
train_data = (
    moving_average(train_metrics["recall"], window_size)
    if window_size != 0
    else train_metrics["recall"]
)
validation_data = (
    moving_average(validation_metrics["val_recall"], window_size)
    if window_size != 0
    else validation_metrics["val_recall"]
)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        y=train_data,
        x=np.arange(len(train_data)),
        mode="lines",
        name="Train",
    )
)

fig.add_trace(
    go.Scatter(
        y=validation_data,
        x=np.arange(len(validation_data)),
        mode="lines",
        name="Validation",
    )
)

fig.update_layout(
    title=f"Recall - {model_name} {version}",
    xaxis_title="Epochs",
    yaxis_title="Recall",
    legend_title="Metrics",
    template="seaborn",
    width=600,
    height=500,
    xaxis=dict(
        tickmode="linear",
        tick0=0,
        dtick=2,
    ),
)

fig.show()

In [55]:
window_size = 0
train_data = (
    moving_average(train_metrics["auc"], window_size)
    if window_size != 0
    else train_metrics["auc"]
)
validation_data = (
    moving_average(validation_metrics["val_auc"], window_size)
    if window_size != 0
    else validation_metrics["val_auc"]
)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        y=train_data,
        x=np.arange(len(train_data)),
        mode="lines",
        name="Train",
    )
)

fig.add_trace(
    go.Scatter(
        y=validation_data,
        x=np.arange(len(validation_data)),
        mode="lines",
        name="Validation",
    )
)

fig.update_layout(
    title=f"Auc - {model_name} {version}",
    xaxis_title="Epochs",
    yaxis_title="Auc",
    legend_title="Metrics",
    template="seaborn",
    width=600,
    height=500,
    xaxis=dict(
        tickmode="linear",
        tick0=0,
        dtick=2,
    ),
)

fig.show()
fig.write_image(f"./reports/figures/auc/{complete_name[:-2]}_train.png")

In [56]:
# Calcular F1-Score
train_metrics["f1_score"] = (
    2
    * (train_metrics["precision"] * train_metrics["recall"])
    / (train_metrics["precision"] + train_metrics["recall"])
)
validation_metrics["val_f1_score"] = (
    2
    * (validation_metrics["val_precision"] * validation_metrics["val_recall"])
    / (validation_metrics["val_precision"] + validation_metrics["val_recall"])
)

window_size = 0
train_data = (
    moving_average(train_metrics["f1_score"], window_size)
    if window_size != 0
    else train_metrics["f1_score"]
)
validation_data = (
    moving_average(validation_metrics["val_f1_score"], window_size)
    if window_size != 0
    else validation_metrics["val_f1_score"]
)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        y=train_data,
        x=np.arange(len(train_data)),
        mode="lines",
        name="Train",
    )
)

fig.add_trace(
    go.Scatter(
        y=validation_data,
        x=np.arange(len(validation_data)),
        mode="lines",
        name="Validation",
    )
)

fig.update_layout(
    title=f"F1 Score - {model_name} {version}",
    xaxis_title="Epochs",
    yaxis_title="F1 Score",
    legend_title="Metrics",
    template="seaborn",
    width=600,
    height=500,
    xaxis=dict(
        tickmode="linear",
        tick0=0,
        dtick=2,
    ),
)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [57]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow

IMAGES_DATASET_PATH = os.path.abspath("./data/processed/preprocessed_megadetector")
TEST_SAMPLES_CSV = os.path.abspath(
    "./data/processed/preprocessed_megadetector/28560Images_detection.csv"
)
dataframe = dt.load_from_csv(TEST_SAMPLES_CSV)
test_dataframe = dataframe[dataframe["subset"] == "test"]
test_dataframe["file_name"] = test_dataframe["file_name"].apply(
    lambda x: os.path.join(IMAGES_DATASET_PATH, x)
)
test_dataframe["binary_label"] = test_dataframe["binary_label"].astype(str)
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
normal_datagen = ImageDataGenerator(
    preprocessing_function=tensorflow.keras.applications.mobilenet_v2.preprocess_input
)
test_images = normal_datagen.flow_from_dataframe(
    dataframe=test_dataframe,
    x_col="file_name",
    y_col="binary_label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False,
)

# Calcular probabilidades para el conjunto de prueba
y_pred_prob = model.predict(test_images)

The file /Users/carlos/WORKSPACE/MegaClassifier/data/processed/preprocessed_megadetector/28560Images_detection.csv has been successfully opened.
Found 4286 validated image filenames belonging to 2 classes.




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

2024-12-28 18:44:46.764274: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [58]:
import plotly.graph_objects as go
from sklearn.metrics import auc, roc_curve

y_true = test_images.labels

# Calcular la curva ROC
fpr, tpr, thresholds = roc_curve(y_true, y_pred_prob)
roc_auc = auc(fpr, tpr)

# Graficar la curva ROC
fig = go.Figure()

# Curva ROC
fig.add_trace(
    go.Scatter(
        x=fpr, y=tpr, mode="lines", name=f"{model_name}_{version} (AUC = {roc_auc:.4f})"
    )
)

# Línea de referencia (Random Guess)
fig.add_trace(
    go.Scatter(
        x=[0, 1],
        y=[0, 1],
        mode="lines",
        name="Random Classifier",
        line=dict(dash="dash"),
    )
)

# Configurar diseño
fig.update_layout(
    # Receiver Operating Characteristic (ROC)
    title=f"ROC Curve - {model_name} {version}",
    xaxis_title="False Positive Rate",
    yaxis_title="True Positive Rate",
    legend_title="Curves",
    template="seaborn",
    width=700,
    height=500,
    xaxis=dict(
        tickmode="linear",
        tick0=0,
        dtick=0.1,
    ),
)

# Mostrar la gráfica
fig.show()
fig.write_image(f"./reports/figures/roc_curve/{complete_name[:-2]}_test.png")

In [59]:
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix

# Obtener las clases predichas a partir de las probabilidades
y_pred_class = (y_pred_prob > 0.5).astype(int)  # Para clasificación binaria

# Calcular la matriz de confusión
conf_matrix = confusion_matrix(y_true, y_pred_class)

# Crear etiquetas con los números
conf_matrix_text = [[str(value) for value in row] for row in conf_matrix]

# Crear el heatmap con Plotly
fig = go.Figure(
    data=go.Heatmap(
        z=conf_matrix,
        x=["No Animal", "Animal"],  # Predicted labels
        y=["No Animal", "Animal"],  # Actual labels
        text=conf_matrix_text,  # Añadir los valores como texto
        texttemplate="%{text}",  # Mostrar los valores en las celdas
        colorscale="Blues",
        showscale=True,
    )
)

# Configurar diseño
fig.update_layout(
    title=f"Confusion Matrix - {model_name} {version}",
    xaxis_title="Prediction",
    yaxis_title="Dataset",
    xaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["No Animal", "Animal"]),
    yaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["No Animal", "Animal"]),
    template="seaborn",
    width=600,
    height=600,
)

# Mostrar la figura
fig.show()
fig.write_image(f"./reports/figures/confusion_matrix/{complete_name[:-2]}_test.png")

In [60]:
# import plotly.graph_objects as go
# import numpy as np

# # Calcular totales de etiquetas reales
# total_positive = np.sum(y_true)  # Total de muestras positivas
# total_negative = len(y_true) - total_positive  # Total de muestras negativas

# # Crear la matriz de confusión ideal
# conf_matrix_ideal = np.array(
#     [
#         [total_negative, 0],  # Verdaderos negativos, Falsos positivos
#         [0, total_positive],  # Falsos negativos, Verdaderos positivos
#     ]
# )

# # Crear etiquetas con los números
# conf_matrix_ideal_text = [[str(value) for value in row] for row in conf_matrix_ideal]

# # Crear el heatmap con Plotly
# fig = go.Figure(
#     data=go.Heatmap(
#         z=conf_matrix_ideal,
#         x=["No Animal", "Animal"],  # Predicted labels
#         y=["No Animal", "Animal"],  # Actual labels
#         text=conf_matrix_ideal_text,  # Añadir los valores como texto
#         texttemplate="%{text}",  # Mostrar los valores en las celdas
#         colorscale="Greens",
#         showscale=True,
#     )
# )

# # Configurar diseño
# fig.update_layout(
#     title="Confusion Matrix - Ideal",
#     xaxis_title="Prediction",
#     yaxis_title="Dataset",
#     xaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["No Animal", "Animal"]),
#     yaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["No Animal", "Animal"]),
#     template="seaborn",
#     width=600,
#     height=600,
# )

# # Mostrar la figura
# fig.show()

# fig.write_image("./reports/figures/confusion_matrix/ideal_test.png")