Set input and output paths.

In [None]:
import os
import subprocess

# CHANGE THESE TWO
input_folder = r"/home/beuss/Downloads/datasets/brackish" # Path to the root of the brackish dataset
output_folder = r"/home/beuss/Downloads/datasets/brackish/out" # Path to the root of the output (dataset) folder, use this in brackish.yaml

# Optional parameters
train_split = 0.8
validation_split = 0.1
test_split = 0.1
seed = 1234567890


#--------------------------------------------------------------

input_folder = os.path.abspath(input_folder)
output_folder = os.path.abspath(output_folder)

dataset_folder = os.path.join(input_folder, "dataset")

all_images_folder = os.path.join(output_folder, "images", "all")
all_labels_folder = os.path.join(output_folder, "labels", "all")

print(f"Input folder: {input_folder}")
print(f"Output folder: {output_folder}")
print(f"All images folder: {all_images_folder}")
print(f"All labels folder: {all_labels_folder}")

def execute(cmd, print_output=False):
    try:
        if print_output:
            print("executing: " + cmd)
        output = subprocess.check_output(cmd,shell=True,stderr=subprocess.STDOUT)
        if print_output:
            print(output.decode("UTF-8"))
    except subprocess.CalledProcessError as e:
        print("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))


Extract all the images from the videos

In [None]:
video_categories = ["crab", "fish-big", "fish-school", "fish-small-shrimp", "jellyfish"]

print("Dataset folder: " + dataset_folder)
for category in video_categories:
    input = os.path.join(dataset_folder, "videos", f"{category}")
    command = f"python frame_extractor.py --inputFolder \"{input}\" --outputFolder \"{all_images_folder}\""
    execute(command, print_output = True)
    print("Finished extracting frames for " + category)

print("Done extracting frames!")


Now we need to compile the image list to "imagelist.txt" and then copy all the images to a common folder.

In [None]:
execute(f"python create_image_list.py --inputFolder \"{all_images_folder}\"")
print("Created imagelist.txt")

Create fake annotations for images with no fish

In [None]:
execute(f"python create_dummy_yolo_annotations.py --inputFolder \"{all_images_folder}\" --outputFolder \"{all_labels_folder}\"", print_output=True)
print("Created dummy annotations")

Create real annotations for images with fish

In [None]:
annotation_files = ["test.csv", "valid.csv", "train.csv"]
for annotation_file in annotation_files:
    annotation_csv = os.path.join(input_folder, "annotations", "annotations_AAU", annotation_file)
    categories = os.path.join(input_folder, 'Brackish.names')

    print(f"Processing {annotation_file}...")
    execute(f"python annotations_to_yolo.py --imageFolder \"{all_images_folder}\" --annotationCSV \"{annotation_csv}\" --outputPath \"{all_labels_folder}\" --categories \"{categories}\"")
    print(f"Finished converting annotations for {annotation_file}")

Verify that all images have a corresponding label file

In [None]:
# Verify that all images have a corresponding label file
(_, _, image_files) = next(os.walk(all_images_folder))
(_, _, label_files) = next(os.walk(all_labels_folder))

for image_file in image_files:
    label_file = os.path.splitext(image_file)[0] + ".txt"
    # Ignore inputList.txt, this is generated by frame_extractor.py
    if label_file not in label_files and label_file != "inputList.txt":
        raise Exception(f"Missing label file for {image_file}")

print("Done!")

Split all training data into a training set, validation set and a test set

In [None]:
execute(f"python create_dataset_split.py --inputFile \"imagelist.txt\" --seed {seed} --trnSplit {train_split} --valSplit {validation_split} --tstSplit {test_split}", print_output=True)

Copy training data into their respective folders

In [None]:
import shutil
import pathlib

stages = ["train", "valid", "test"]

for stage in stages:
    stage_file = f"{stage}.txt"
    stage_images_folder = os.path.join(output_folder, "images", f"{stage}")
    stage_labels_folder = os.path.join(output_folder, "labels", f"{stage}")

    # Create stage folders if they don't exist
    pathlib.Path(stage_images_folder).mkdir(exist_ok=True, parents=True)
    pathlib.Path(stage_labels_folder).mkdir(exist_ok=True, parents=True)

    print(f"Copying {stage} files...")
    print(f"Image folder: {stage_images_folder}")
    print(f"Label folder: {stage_labels_folder}")

    with open(stage_file, "r") as f:
        for line in f:
            image_file = os.path.join(all_images_folder, line.strip())
            filename = os.path.split(line.strip())
            label_file = os.path.join(all_labels_folder, os.path.splitext(filename[1])[0] + ".txt")

            # Copy files to stage folders
            shutil.copy2(image_file, stage_images_folder)
            shutil.copy2(label_file, stage_labels_folder)
    print(f"Done copying '{stage}' files!")