In [None]:
import json
import os

import pandas as pd
import tqdm

required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

In [None]:
THRESHOLD = 0.2
THRESHOLD_STR = str(THRESHOLD).replace(".", "_")
VERSION = "typical"

RESOURCES_FOLDER = os.path.abspath("./resources/json")

INPUT_JSON = os.path.join(RESOURCES_FOLDER, "input_file_names.json")
OUTPUT_JSON = os.path.join(RESOURCES_FOLDER, "output_detections.json")

DATASET_CSV = os.path.abspath(
    "./data/interim/emptyNonEmptyDataset/emptyNonEmptyDatasetSplit.csv"
)
DATASET_PATH = os.path.dirname(DATASET_CSV)

MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v5a.0.0.pt")
# MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v5b.0.0.pt")
# MODEL_PATH = os.path.abspath("./models/MegaDetector/md_v4.1.0.pb")

In [None]:
dataset_original = pd.read_csv(DATASET_CSV, sep=";")
dataset_original["file_name_abspath"] = dataset_original["file_name"].apply(
    lambda x: os.path.join(DATASET_PATH, x)
)

os.makedirs(os.path.dirname(RESOURCES_FOLDER), exist_ok=True)

with open(INPUT_JSON, "w") as f:
    json.dump(dataset_original["file_name_abspath"][:3].tolist(), f, indent=1)

with open(INPUT_JSON, "r") as file:
    image_paths = json.load(file)

total_images = len(image_paths)
unavailable_images = []

for path in image_paths:
    if not os.path.exists(path):
        unavailable_images.append(path)

print(f"Total number of image paths: {total_images}")
print(f"Number of unavailable images: {len(unavailable_images)}")

if unavailable_images:
    print("Unavailable image paths:")
    for path in unavailable_images:
        print(path)

In [None]:
command = f'python data/external/CameraTraps/detection/run_detector_batch.py "{MODEL_PATH}" "{INPUT_JSON}" "{OUTPUT_JSON}" --recursive --threshold "{THRESHOLD}"'

In [None]:
os.system(command)

In [None]:
with open(OUTPUT_JSON, "r") as file:
    json_data = json.load(file)

images = json_data["images"]
detection_categories = json_data["detection_categories"]
info = json_data["info"]

for image in tqdm.tqdm(images):
    image["file"] = image["file"].replace(
        DATASET_PATH + "/",
        "",
    )

final_output = {
    "images": images,
    "detection_categories": detection_categories,
    "info": info,
}

NEW_OUTPUT_FILE_PATH = os.path.join(RESOURCES_FOLDER, f"{VERSION}_{THRESHOLD_STR}.json")

with open(NEW_OUTPUT_FILE_PATH, "w") as f:
    json.dump(final_output, f, indent=1)

os.remove(INPUT_JSON)
os.remove(OUTPUT_JSON)