In [1]:
import json
import os

import pandas as pd
import src.data.Dataset as dt
import tqdm

In [None]:
from itables import init_notebook_mode

init_notebook_mode(all_interactive=True)

In [None]:
OUTPUT_JSON = os.path.abspath("./resources/json/28570_images_0_003_threshold.json")
DATASET_CSV = os.path.abspath(
    "./data/interim/emptyNonEmptyDataset/28570Images_subset.csv"
)

print(f"OUTPUT_JSON: {OUTPUT_JSON}")
print(f"DATASET_CSV: {DATASET_CSV}")

In [None]:
threshold = (
    os.path.basename(OUTPUT_JSON).split("images_")[-1].replace("_threshold.json", "")
)
name = "MegaDetector"
model = name + "_" + str(threshold)

with open(OUTPUT_JSON, "r") as file:
    data = json.load(file)

In [None]:
dataset = dt.load_from_csv(DATASET_CSV)
dataset.head()

In [None]:
report_columns = [
    "file_name",
    "label",
    "binary_label",
    "detector_label",
    "subset",
]
report = pd.DataFrame(columns=report_columns)

for image in tqdm.tqdm(data["images"]):
    image_file = image["file"]
    indexes = dataset[dataset["file_name"] == image_file]

    if len(indexes) == 1:
        label = indexes["label"].iloc[0]
        binary_label = int(indexes["binary_label"].iloc[0])
        detector_label = 1 if image["max_detection_conf"] > 0.0 else 0
        subset = indexes["subset"].iloc[0]

        new_row = {
            "file_name": image_file,
            "label": label,
            "binary_label": binary_label,
            "detector_label": detector_label,
            "subset": subset,
        }
        report = pd.concat([report, pd.DataFrame([new_row])], ignore_index=True)

In [None]:
report.head()

In [None]:
NEW_DATASET_CSV = os.path.join(
    os.path.dirname(DATASET_CSV), f"{len(report)}Images_{threshold}_threshold.csv"
)

report.to_csv(
    NEW_DATASET_CSV,
    index=False,
    sep=";",
)
print(f"Dataset saved to {NEW_DATASET_CSV}")