In [1]:
import json
import os

import src.data.Dataset as dt

In [None]:
from itables import init_notebook_mode

init_notebook_mode(all_interactive=True)

In [2]:
required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

!echo "PYTHONPATH: $PYTHONPATH"

PYTHONPATH: :/Users/carlos/WORKSPACE/MegaClassifier/data/external/ai4eutils:/Users/carlos/WORKSPACE/MegaClassifier/data/external/CameraTraps:/Users/carlos/WORKSPACE/MegaClassifier/data/external/yolov5


In [3]:
threshold = float(0.003)

INPUT_FILE_PATH = os.path.abspath("./resources/json/input_filenames.json")
DATASET_PATH = os.path.abspath("./data/raw/emptyNonEmptyDataset_ETL")
OUTPUT_PATH = os.path.abspath("./resources/json")
OUTPUT_FILE_PATH = os.path.join(OUTPUT_PATH, "output_detections.json")

MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v5a.0.0.pt")
# MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v5b.0.0.pt")
# MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v4.1.0.pb")


dataset_csv_path = os.path.abspath(
    "./data/raw/emptyNonEmptyDataset_ETL/28570Images_subset.csv"
)
dataset = dt.load_from_csv(dataset_csv_path)
dataset["file_name_abspath"] = dataset["file_name"].apply(
    lambda x: os.path.join(DATASET_PATH, x)
)
with open(INPUT_FILE_PATH, "w") as f:
    json.dump(dataset["file_name_abspath"].tolist(), f, indent=1)

os.makedirs(OUTPUT_PATH, exist_ok=True)

print()
print(f"MODEL_PATH:       {MODEL_PATH}")
print(f"DATASET_PATH:     {DATASET_PATH}")
print(f"INPUT_FILE_PATH: {INPUT_FILE_PATH}")
print(f"OUTPUT_PATH:      {OUTPUT_PATH}")
print(f"OUTPUT_FILE_PATH: {OUTPUT_FILE_PATH}")

The file /Users/carlos/WORKSPACE/MegaClassifier/data/raw/emptyNonEmptyDataset_ETL/28570Images_subset.csv has been successfully opened.

MODEL_PATH:       /Users/carlos/WORKSPACE/MegaClassifier/models/MegaDetector/md_v5a.0.0.pt
DATASET_PATH:     /Users/carlos/WORKSPACE/MegaClassifier/data/raw/emptyNonEmptyDataset_ETL
INPUT_FILE_PATH: /Users/carlos/WORKSPACE/MegaClassifier/resources/json/input_filenames.json
OUTPUT_PATH:      /Users/carlos/WORKSPACE/MegaClassifier/resources/json
OUTPUT_FILE_PATH: /Users/carlos/WORKSPACE/MegaClassifier/resources/json/output_detections.json


In [4]:
with open(INPUT_FILE_PATH, "r") as file:
    image_paths = json.load(file)

total_images = len(image_paths)
unavailable_images = []

for path in image_paths:
    if not os.path.exists(path):
        unavailable_images.append(path)

print(f"Total number of image paths: {total_images}")
print(f"Number of unavailable images: {len(unavailable_images)}")
if unavailable_images:
    print("Unavailable image paths:")
    for path in unavailable_images:
        print(path)

Total number of image paths: 28570
Number of unavailable images: 0


In [5]:
command = f'python src/detection/run_detector_batch.py "{MODEL_PATH}" "{INPUT_FILE_PATH}" "{OUTPUT_FILE_PATH}" --recursive --threshold "{threshold}"'
print(command)

python src/detection/run_detector_batch.py "/Users/carlos/WORKSPACE/MegaClassifier/models/MegaDetector/md_v5a.0.0.pt" "/Users/carlos/WORKSPACE/MegaClassifier/resources/json/input_filenames.json" "/Users/carlos/WORKSPACE/MegaClassifier/resources/json/output_detections.json" --recursive --threshold "0.003"


In [None]:
os.system(command)

In [7]:
with open(OUTPUT_FILE_PATH, "r") as file:
    data = json.load(file)

for image in data["images"]:
    image["file"] = image["file"].replace(
        DATASET_PATH + "/",
        "",
    )

info = {
    "detection_completion_time": data["info"]["detection_completion_time"],
    "format_version": data["info"]["format_version"],
    "detector": data["info"]["detector"],
    "detector_threshold": threshold,
    "detector_metadata": data["info"]["detector_metadata"],
}

final_output = {
    "images": data["images"],
    "detection_categories": data["detection_categories"],
    "info": info,
}

threshold_str = str(threshold).replace(".", "_")
json_name = f"{len(data['images'])}_images_{threshold_str}_threshold.json"
model_name = os.path.basename(MODEL_PATH).split(".")[0]

os.makedirs(OUTPUT_PATH, exist_ok=True)
NEW_OUTPUT_FILE_PATH = os.path.join(OUTPUT_PATH, json_name)


with open(NEW_OUTPUT_FILE_PATH, "w") as f:
    json.dump(final_output, f, indent=1)
print(f"Output file saved at {NEW_OUTPUT_FILE_PATH}")

os.remove(OUTPUT_FILE_PATH)
print(f"Output file deleted: {OUTPUT_FILE_PATH}")

os.remove(INPUT_FILE_PATH)
print(f"Images paths json file deleted: {INPUT_FILE_PATH}")


Output file saved at /Users/carlos/WORKSPACE/MegaClassifier/resources/json/28570_images_0_003_threshold.json
Output file deleted: /Users/carlos/WORKSPACE/MegaClassifier/resources/json/output_detections.json
Images paths json file deleted: /Users/carlos/WORKSPACE/MegaClassifier/resources/json/input_filenames.json


In [8]:
with open(NEW_OUTPUT_FILE_PATH, "r") as file:
    new_data = json.load(file)

num_files = len(new_data["images"])
print(f"Number of files in {NEW_OUTPUT_FILE_PATH}: {num_files}")

Number of files in /Users/carlos/WORKSPACE/MegaClassifier/resources/json/28570_images_0_003_threshold.json: 28570
