In [1]:
import json
import os

import pandas as pd
import tqdm

import src.data.Dataset as dt

In [2]:
OUTPUT_FILE_PATH = os.path.abspath(
    # "./resources/json/28560_images_0_1_threshold.json"
    # "./resources/json/28560_images_0_003_threshold.json"
    "./resources/json/28560_images_0_0015_threshold.json"
)
DATASET_CSV = os.path.abspath(
    "./data/raw/emptyNonEmptyDataset_ETL/28560Images_subset.csv"
)

model = "MegaDetector_0_003"

# subset_option = "all"
subset_option = "train"
# subset_option = "validation"
# subset_option = "test"

with open(OUTPUT_FILE_PATH, "r") as file:
    data = json.load(file)

In [3]:
dataset = dt.load_from_csv(DATASET_CSV)

report_columns = [
    "file_name",
    "label",
    "binary_label",
    "detector_label",
    "subset",
]
report = pd.DataFrame(columns=report_columns)

for image in tqdm.tqdm(data["images"]):
    image_file = image["file"]
    indexes = dataset[dataset["file_name"] == image_file]

    if len(indexes) == 1:
        label = indexes["label"].iloc[0]
        binary_label = int(indexes["binary_label"].iloc[0])
        detector_label = 1 if image["max_detection_conf"] > 0.0 else 0
        subset = indexes["subset"].iloc[0]

        new_row = {
            "file_name": image_file,
            "label": label,
            "binary_label": binary_label,
            "detector_label": detector_label,
            "subset": subset,
        }
        report = pd.concat([report, pd.DataFrame([new_row])], ignore_index=True)

The file /Users/carlos/WORKSPACE/MegaClassifier/data/raw/emptyNonEmptyDataset_ETL/28560Images_subset.csv has been successfully opened.


100%|██████████| 28560/28560 [00:36<00:00, 788.38it/s]


In [None]:
report

In [7]:
# report.to_csv("./data/raw/emptyNonEmptyDataset_ETL/28560Images_detection.csv", index=False, sep=";")
# report.to_csv("./data/raw/emptyNonEmptyDataset_ETL/28560Images_detection_comma.csv", index=False, sep=",")

In [8]:
if subset_option != "all":
    report = report[report["subset"] == subset_option]

TP = ((report["binary_label"] == 1) & (report["detector_label"] == 1)).sum()
FP = ((report["binary_label"] == 0) & (report["detector_label"] == 1)).sum()
TN = ((report["binary_label"] == 0) & (report["detector_label"] == 0)).sum()
FN = ((report["binary_label"] == 1) & (report["detector_label"] == 0)).sum()

In [9]:
real_positives = report["binary_label"].sum()
covered_positives = report[
    (report["binary_label"] == 1) & (report["detector_label"] == 1)
].shape[0]

percentage_covered_positives = (
    (covered_positives / real_positives) * 100 if real_positives > 0 else 0
)

print(f"Percentage of covered positives: {percentage_covered_positives:.2f}%")
print(f"There is {real_positives} real positives")
print(f"There is {covered_positives} covered positives")

Percentage of covered positives: 99.98%
There is 13226 real positives
There is 13224 covered positives


In [10]:
import numpy as np

import plotly.graph_objects as go

conf_matrix = np.array([[TN, FN], [FP, TP]])
conf_matrix_text = [[str(value) for value in row] for row in conf_matrix]

fig = go.Figure(
    data=go.Heatmap(
        z=conf_matrix,
        x=["No Animal", "Animal"],
        y=["No Animal", "Animal"],
        text=conf_matrix_text,
        texttemplate="%{text}",
        colorscale="Blues",
        showscale=True,
    )
)

fig.update_layout(
    title="Confusion Matrix - MegaDetector",
    xaxis_title="MegaDetector",
    yaxis_title="Dataset",
    xaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["Vacia", "Animal"]),
    yaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["Vacia", "Animal"]),
    template="seaborn",
    width=600,
    height=600,
)

# Mostrar la figura
fig.show()

In [12]:
accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP)
npv = TN / (TN + FN)
recall = TP / (TP + FN)
specificity = TN / (TN + FP)

print(f"Accuracy:    {accuracy*100:.2f}%")
print(f"Precision:   {precision*100:.2f}%")
print(f"NPV:         {npv*100:.2f}%")
print(f"Recall:      {recall*100:.2f}%")
print(f"Specificity: {specificity*100:.2f}%")

Accuracy:    76.80%
Precision:   74.04%
NPV:         99.91%
Recall:      99.98%
Specificity: 31.47%
