In [1]:
import json
import os
from datetime import datetime

import src.data.Dataset as dt

In [2]:
required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

!echo "PYTHONPATH: $PYTHONPATH"

PYTHONPATH: :/Users/carlos/WORKSPACE/MegaClassifier/data/external/ai4eutils:/Users/carlos/WORKSPACE/MegaClassifier/data/external/CameraTraps:/Users/carlos/WORKSPACE/MegaClassifier/data/external/yolov5


In [3]:
threshold = float(0.2)

OUTPUT_PATH = os.path.abspath("./resources/json")
MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v5a.0.0.pt")
# MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v5b.0.0.pt")
# MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v4.1.0.pb")

DATASET_PATH = os.path.abspath("./dataset/datasetFiltered")
CSV_PATH = os.path.abspath("./data/interim/28570Images_filtered.csv")
dataset = dt.load_from_csv(CSV_PATH)

dataset["file_name_abspath"] = dataset["file_name"].apply(
    lambda x: os.path.join(DATASET_PATH, x)
)
IMAGES_PATH_JSON = os.path.join(OUTPUT_PATH, "dataset_file_paths.json")

os.makedirs(OUTPUT_PATH, exist_ok=True)

with open(IMAGES_PATH_JSON, "w") as f:
    json.dump(dataset["file_name_abspath"].tolist(), f, indent=1)


OUTPUT_FILE_PATH = os.path.join(OUTPUT_PATH, "detections.json")

print()
print(f"MODEL_PATH:       {MODEL_PATH}")
print(f"DATASET_PATH:     {DATASET_PATH}")
print(f"IMAGES_PATH_JSON: {IMAGES_PATH_JSON}")
print(f"OUTPUT_PATH:      {OUTPUT_PATH}")
print(f"OUTPUT_FILE_PATH: {OUTPUT_FILE_PATH}")

The file /Users/carlos/WORKSPACE/MegaClassifier/data/interim/28570Images_filtered.csv has been successfully opened.

MODEL_PATH:       /Users/carlos/WORKSPACE/MegaClassifier/models/MegaDetector/md_v5a.0.0.pt
DATASET_PATH:     /Users/carlos/WORKSPACE/MegaClassifier/dataset/datasetFiltered
IMAGES_PATH_JSON: /Users/carlos/WORKSPACE/MegaClassifier/resources/json/dataset_file_paths.json
OUTPUT_PATH:      /Users/carlos/WORKSPACE/MegaClassifier/resources/json
OUTPUT_FILE_PATH: /Users/carlos/WORKSPACE/MegaClassifier/resources/json/detections.json


In [4]:
with open(IMAGES_PATH_JSON, "r") as file:
    image_paths = json.load(file)

total_images = len(image_paths)
unavailable_images = []

for path in image_paths:
    if not os.path.exists(path):
        unavailable_images.append(path)

print(f"Total number of image paths: {total_images}")
print(f"Number of unavailable images: {len(unavailable_images)}")
if unavailable_images:
    print("Unavailable image paths:")
    for path in unavailable_images:
        print(path)

Total number of image paths: 28570
Number of unavailable images: 0


In [None]:
command = f'python src/detection/run_detector_batch.py "{MODEL_PATH}" "{IMAGES_PATH_JSON}" "{OUTPUT_FILE_PATH}" --recursive --threshold "{threshold}"'
os.system(command)

28570 image files found in the json list
PyTorch reports 0 available CUDA devices
GPU available: False
Using PyTorch version 1.10.1


Fusing layers... 
Model summary: 574 layers, 139990096 parameters, 0 gradients
  0%|          | 0/28570 [00:00<?, ?it/s]

Loaded model in 2.54 seconds
Loaded model in 2.54 seconds
Processing image /Users/carlos/WORKSPACE/MegaClassifier/dataset/datasetFiltered/vacia/noanimal_zoo_1_4/37_20210115_85_.jpg


  0%|          | 1/28570 [00:03<24:20:41,  3.07s/it]

Processing image /Users/carlos/WORKSPACE/MegaClassifier/dataset/datasetFiltered/vacia/vacia_wellingtoncameratraps_ss/25091507183201940156.jpg


  0%|          | 2/28570 [00:06<25:53:29,  3.26s/it]

Processing image /Users/carlos/WORKSPACE/MegaClassifier/dataset/datasetFiltered/vacia/vacias_2_de_2_pnc_2012_isaac/a15323im000364.jpg


  0%|          | 3/28570 [00:09<26:15:07,  3.31s/it]

Processing image /Users/carlos/WORKSPACE/MegaClassifier/dataset/datasetFiltered/cervidae/cervidredorfallowdeer_zoo_5_6_7_9/18_20210219_6610__2021_04_1506_16_22utc_.jpg


In [None]:
with open(OUTPUT_FILE_PATH, "r") as file:
    data = json.load(file)

for image in data["images"]:
    image["file"] = image["file"].replace(
        DATASET_PATH + "/",
        "",
    )

info = {
    "detection_completion_time": data["info"]["detection_completion_time"],
    "format_version": data["info"]["format_version"],
    "detector": data["info"]["detector"],
    "detector_threshold": threshold,
    "detector_metadata": data["info"]["detector_metadata"],
}

final_output = {
    "images": data["images"],
    "detection_categories": data["detection_categories"],
    "info": info,
}

threshold_str = str(threshold).replace(".", "_")
json_name = f"{len(data['images'])}_images_{threshold_str}_threshold.json"
model_name = os.path.basename(MODEL_PATH).split(".")[0]
tmp_path = os.path.join(OUTPUT_PATH, model_name)
os.makedirs(tmp_path, exist_ok=True)
NEW_OUTPUT_FILE_PATH = os.path.join(tmp_path, json_name)


with open(NEW_OUTPUT_FILE_PATH, "w") as f:
    json.dump(final_output, f, indent=1)
print(f"Output file saved at {NEW_OUTPUT_FILE_PATH}")

os.remove(OUTPUT_FILE_PATH)
print(f"Output file deleted: {OUTPUT_FILE_PATH}")

os.remove(IMAGES_PATH_JSON)
print(f"Images paths json file deleted: {IMAGES_PATH_JSON}")


In [None]:
with open(NEW_OUTPUT_FILE_PATH, "r") as file:
    new_data = json.load(file)

num_files = len(new_data["images"])
print(f"Number of files in {NEW_OUTPUT_FILE_PATH}: {num_files}")