In [None]:
import os
import shutil

import pandas as pd
import tqdm
from itables import init_notebook_mode

init_notebook_mode(all_interactive=True)

required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

In [None]:
DATASET_NAME = "onlyDetectionsForTrain"

DATASET_FOLDER = os.path.abspath("./data/processed")
DATASET_PATH = os.path.join(DATASET_FOLDER, DATASET_NAME)

ORIGINAL_PATH = os.path.abspath("./resources/fit_images/0_003")

IMAGES_CSV = os.path.abspath(
    "resources/megadetector_coverage/MegaDetector_0_003.csv"
)

dataset = pd.read_csv(IMAGES_CSV, sep=";")

new_dataset = pd.DataFrame({})

for index in tqdm.tqdm(dataset.index):
    subset = dataset.loc[index, "subset"]
    detector_label = dataset.loc[index, "detector_label"]

    if subset == "train" and detector_label == 0:
        continue
    else:
        file_name = dataset.loc[index, "file_name"]
        label = dataset.loc[index, "label"]
        binary_label = dataset.loc[index, "binary_label"]

        new_row = {
            "file_name": file_name,
            "label": label,
            "binary_label": binary_label,
            "detector_label": detector_label,
            "subset": subset
        }
        new_dataset = pd.concat([new_dataset, pd.DataFrame([new_row])], ignore_index=True)

        original_file = os.path.join(ORIGINAL_PATH, file_name)
        new_file = os.path.join(DATASET_PATH, file_name)
        os.makedirs(os.path.dirname(new_file), exist_ok=True)
        try:
            shutil.copy(original_file, new_file)
        except FileNotFoundError:
            print(f"File {original_file} not found.")
new_dataset.to_csv(os.path.join(DATASET_PATH, f"{DATASET_NAME}.csv"), index=False, sep=";")

In [None]:
DATASET_NAME = "emptyOriginalAnimalDetection"

DATASET_FOLDER = os.path.abspath("./data/processed")
DATASET_PATH = os.path.join(DATASET_FOLDER, DATASET_NAME)

FIT_IMAGES_PATH = os.path.abspath("./resources/fit_images/0_003")
ORIGINAL_PATH = os.path.abspath("./data/interim/emptyNonEmptyDataset")

IMAGES_CSV = os.path.abspath(
    "resources/megadetector_coverage/MegaDetector_0_003.csv"
)

dataset = pd.read_csv(IMAGES_CSV, sep=";")

new_dataset = pd.DataFrame({})

for index in tqdm.tqdm(dataset.index):
    file_name = dataset.loc[index, "file_name"]
    label = dataset.loc[index, "label"]
    binary_label = dataset.loc[index, "binary_label"]
    detector_label = dataset.loc[index, "detector_label"]
    subset = dataset.loc[index, "subset"]

    new_row = {
        "file_name": file_name,
        "label": label,
        "binary_label": binary_label,
        "detector_label": detector_label,
        "subset": subset
    }
    new_dataset = pd.concat([new_dataset, pd.DataFrame([new_row])], ignore_index=True)

    if detector_label == 0:
        original_file = os.path.join(ORIGINAL_PATH, file_name)
    else:
        original_file = os.path.join(FIT_IMAGES_PATH, file_name)

    new_file = os.path.join(DATASET_PATH, file_name)
    os.makedirs(os.path.dirname(new_file), exist_ok=True)
    try:
        shutil.copy(original_file, new_file)
    except FileNotFoundError:
        print(f"File {original_file} not found.")

new_dataset.to_csv(os.path.join(DATASET_PATH, f"{DATASET_NAME}.csv"), index=False, sep=";")

In [None]:
DATASET_NAME = "emptyNonEmptyDataset"

DATASET_FOLDER = os.path.abspath("./data/processed")
DATASET_PATH = os.path.join(DATASET_FOLDER, DATASET_NAME)

ORIGINAL_PATH = os.path.abspath("./data/interim/emptyNonEmptyDataset")

IMAGES_CSV = os.path.abspath(
    "resources/megadetector_coverage/MegaDetector_0_003.csv"
)

dataset = pd.read_csv(IMAGES_CSV, sep=";")

new_dataset = pd.DataFrame({})

for index in tqdm.tqdm(dataset.index):
    file_name = dataset.loc[index, "file_name"]
    label = dataset.loc[index, "label"]
    binary_label = dataset.loc[index, "binary_label"]
    detector_label = dataset.loc[index, "detector_label"]
    subset = dataset.loc[index, "subset"]

    new_row = {
        "file_name": file_name,
        "label": label,
        "binary_label": binary_label,
        "detector_label": detector_label,
        "subset": subset
    }
    new_dataset = pd.concat([new_dataset, pd.DataFrame([new_row])], ignore_index=True)

    original_file = os.path.join(ORIGINAL_PATH, file_name)
    new_file = os.path.join(DATASET_PATH, file_name)
    os.makedirs(os.path.dirname(new_file), exist_ok=True)
    try:
        shutil.copy(original_file, new_file)
    except FileNotFoundError:
        print(f"File {original_file} not found.")

new_dataset.to_csv(os.path.join(DATASET_PATH, f"{DATASET_NAME}.csv"), index=False, sep=";")