In [2]:
import json
import os
from datetime import datetime

import pandas as pd

import src.data.Dataset as dt

In [3]:
required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

!echo "PYTHONPATH: $PYTHONPATH"

PYTHONPATH: :/Users/carlos/WORKSPACE/MegaClassifier/data/external/ai4eutils:/Users/carlos/WORKSPACE/MegaClassifier/data/external/CameraTraps:/Users/carlos/WORKSPACE/MegaClassifier/data/external/yolov5


In [4]:
initial_threshold = float(0.025)

positives_coberture = 0.0

OUTPUT_PATH = os.path.abspath("./resources/json")
MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v5a.0.0.pt")

DATASET_PATH = os.path.abspath("./dataset/emptyNonEmptyDatasetFiltered")
CSV_PATH = os.path.abspath("./data/interim/10000Images_filtered.csv")
dataset = dt.load_from_csv(CSV_PATH)

dataset["file_name_abspath"] = dataset["file_name"].apply(
    lambda x: os.path.join(DATASET_PATH, x)
)

IMAGES_PATH_JSON = os.path.join(OUTPUT_PATH, "dataset_file_paths.json")
OUTPUT_FILE_PATH = os.path.join(OUTPUT_PATH, "detections.json")
CHECKPOINT_PATH = os.path.join(OUTPUT_PATH, "checkpoint.json")
CHECKPOINT_FREQ = int(round(len(dataset["file_name_abspath"]) / 8, 0))

TRAIN_CSV = os.path.abspath("./data/interim/train/10000Train.csv")

REPORT_CSV = os.path.abspath("./reports/model_coberture")

os.makedirs(OUTPUT_PATH, exist_ok=True)

with open(IMAGES_PATH_JSON, "w") as f:
    json.dump(dataset["file_name_abspath"].tolist(), f, indent=1)

print()
print(f"OUTPUT_PATH:       {OUTPUT_PATH}")
print(f"MODEL_PATH:        {MODEL_PATH}")
print(f"DATASET_PATH:      {DATASET_PATH}")
print(f"OUTPUT_FILE_PATH:  {OUTPUT_FILE_PATH}")
print(f"TRAIN_CSV:         {TRAIN_CSV}")
print(f"REPORT_CSV:        {REPORT_CSV}")

The file /Users/carlos/WORKSPACE/MegaClassifier/data/interim/10000Images_filtered.csv has been successfully opened.

OUTPUT_PATH:       /Users/carlos/WORKSPACE/MegaClassifier/resources/json
MODEL_PATH:        /Users/carlos/WORKSPACE/MegaClassifier/models/MegaDetector/md_v5a.0.0.pt
DATASET_PATH:      /Users/carlos/WORKSPACE/MegaClassifier/dataset/emptyNonEmptyDatasetFiltered
OUTPUT_FILE_PATH:  /Users/carlos/WORKSPACE/MegaClassifier/resources/json/detections.json
TRAIN_CSV:         /Users/carlos/WORKSPACE/MegaClassifier/data/interim/train/10000Train.csv
REPORT_CSV:        /Users/carlos/WORKSPACE/MegaClassifier/reports/model_coberture


In [None]:
threshold = round(initial_threshold * 2, 3)
load_from_checkpoint = False
while positives_coberture != 1.0:
    threshold = round(threshold / 2, 3)

    print(f"Iniciando ejecucion con umbral: {threshold}")

    detection_inicialitation_time = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")

    if load_from_checkpoint:
        command = f'python src/detection/run_detector_batch.py "{MODEL_PATH}" "{IMAGES_PATH_JSON}" "{OUTPUT_FILE_PATH}" --recursive --threshold "{threshold}" --checkpoint_path "{CHECKPOINT_PATH}" --checkpoint_frequency "{CHECKPOINT_FREQ}" --resume_from_checkpoint "{CHECKPOINT_PATH}"'
    else:
        command = f'python src/detection/run_detector_batch.py "{MODEL_PATH}" "{IMAGES_PATH_JSON}" "{OUTPUT_FILE_PATH}" --recursive --threshold "{threshold}" --checkpoint_path "{CHECKPOINT_PATH}" --checkpoint_frequency "{CHECKPOINT_FREQ}"'
    os.system(command)

    with open(OUTPUT_FILE_PATH, "r") as file:
        data = json.load(file)

    for image in data["images"]:
        image["file"] = image["file"].replace(
            DATASET_PATH + "/",
            "",
        )

    info = {
        "detection_initial_time": detection_inicialitation_time,
        "detection_completion_time": data["info"]["detection_completion_time"],
        "format_version": data["info"]["format_version"],
        "detector": data["info"]["detector"],
        "detector_threshold": threshold,
        "detector_metadata": data["info"]["detector_metadata"],
    }

    final_output = {
        "images": data["images"],
        "detection_categories": data["detection_categories"],
        "info": info,
    }

    threshold_str = str(threshold).replace(".", "_")
    json_name = f"{len(data['images'])}_images_{threshold_str}_threshold.json"
    model_name = os.path.basename(MODEL_PATH).split(".")[0]
    tmp_path = os.path.join(OUTPUT_PATH, model_name)
    os.makedirs(tmp_path, exist_ok=True)
    NEW_OUTPUT_FILE_PATH = os.path.join(tmp_path, json_name)

    with open(NEW_OUTPUT_FILE_PATH, "w") as f:
        json.dump(final_output, f, indent=1)

    os.remove(OUTPUT_FILE_PATH)

    data = final_output

    dataset = dt.load_from_csv(TRAIN_CSV)

    report_columns = [
        "file_name",
        "label",
        "threshold",
        "detector_label",
        "false_positive",
        "false_negative",
        "time_inference",
    ]
    report = pd.DataFrame(columns=report_columns)

    model = data["info"]["detector"].split(".")[0]

    for image in data["images"]:
        image_file = image["file"]
        indexes = dataset[dataset["file_name"] == image_file]

        if len(indexes) == 1:
            label = int(indexes["label"].iloc[0])
            detector_label = 1 if image["max_detection_conf"] > 0.0 else 0
            false_positive = int(label == 0 and detector_label == 1)
            false_negative = int(label == 1 and detector_label == 0)
            time_inference = image["time_inference"]

            new_row = {
                "file_name": image_file,
                "label": label,
                "threshold": data["info"]["detector_threshold"],
                "detector_label": detector_label,
                "false_positive": false_positive,
                "false_negative": false_negative,
                "time_inference": time_inference,
            }
            report = pd.concat([report, pd.DataFrame([new_row])], ignore_index=True)

    report_name = f"{len(data['images'])}_images_{model}.csv"
    REPORT_CSV_FILE = os.path.join(REPORT_CSV, report_name)

    if os.path.exists(REPORT_CSV_FILE):
        existing_report = dt.load_from_csv(REPORT_CSV_FILE)
        new_report = pd.concat([existing_report, report], ignore_index=True)
        dt.dataset_to_csv(new_report, REPORT_CSV_FILE)
    else:
        dt.dataset_to_csv(report, REPORT_CSV_FILE)

    positivos_reales = report["label"].sum()
    positivos_cubiertos = report[
        (report["label"] == 1) & (report["detector_label"] == 1)
    ].shape[0]

    positives_coberture = (
        (positivos_cubiertos / positivos_reales) if positivos_reales > 0 else 0
    )

    load_from_checkpoint = False

    print()
    print("-----------------------------------------------------------------------")
    print(f"El porcentaje de positivos cubiertos es {positives_coberture*100:.2f}%")
    print("-----------------------------------------------------------------------")
    print()
