Set input and output paths.

In [75]:
import os
import subprocess

# CHANGE THESE TWO
input_folder = r"C:\Users\benja\Documents\datasets\brackish dataset" # Path to the root of the brackish dataset
output_folder = r"C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test"

# Optional parameters
train_split = 0.8
validation_split = 0.1
test_split = 0.1
seed = 1234567890


#--------------------------------------------------------------

input_folder = os.path.abspath(input_folder)
output_folder = os.path.abspath(output_folder)

dataset_folder = os.path.join(output_folder, "dataset")

all_images_folder = os.path.join(output_folder, "images", "all")
all_labels_folder = os.path.join(output_folder, "labels", "all")

print(f"Input folder: {input_folder}")
print(f"Output folder: {output_folder}")
print(f"All images folder: {all_images_folder}")
print(f"All labels folder: {all_labels_folder}")

# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running/4417735#4417735
"""
def execute(cmd):
    print("running: " + cmd)
    popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True)
    for stdout_line in iter(popen.stdout.readline, ""):
        yield stdout_line 
    popen.stdout.close()
    return_code = popen.wait()
    if return_code:
        raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
        #raise subprocess.CalledProcessError(return_code, cmd)
"""
def execute(cmd):
    try:
        print("executing: " + cmd)
        output = subprocess.check_output(cmd,shell=True,stderr=subprocess.STDOUT)
        print(output.decode("UTF-8"))
    except subprocess.CalledProcessError as e:
        print("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))


Input folder: C:\Users\benja\Documents\datasets\brackish dataset
Output folder: C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test
All images folder: C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all
All labels folder: C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\labels\all


Extract all the images from the videos

In [36]:
video_categories = ["crab", "fish-big", "fish-school", "fish-small-shrimp", "jellyfish"]

for category in video_categories:
    input = os.path.join(dataset_folder, "videos", f"{category}")
    command = f"python frame_extractor.py --inputFolder \"{input}\" --outputFolder \"{all_images_folder}\""
    execute(command)

print("Done extracting frames!")


running: python frame_extractor.py --inputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\videos\crab" --outputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all"
running: python frame_extractor.py --inputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\videos\fish-big" --outputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all"
running: python frame_extractor.py --inputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\videos\fish-school" --outputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all"
running: python frame_extractor.py --inputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\videos\fish-small-shrimp" --outputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all"
running: python frame_extractor.py --inputFolder "C:\Users\benja\Documents\datasets\brackish

Now we need to compile the image list to "imagelist.txt" and then copy all the images to a common folder.

In [44]:
execute(f"python create_image_list.py --inputFolder \"{all_images_folder}\"")

running: python create_image_list.py --inputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all"


Create fake annotations for images with no fish

In [68]:
execute(f"python create_dummy_yolo_annotations.py --inputFolder \"{all_images_folder}\" --outputFolder \"{all_labels_folder}\"")

executing: python create_dummy_yolo_annotations.py --inputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all" --outputFolder "C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\labels\all"
b''


Create real annotations for images with fish

In [64]:
annotation_files = ["test.csv", "valid.csv", "train.csv"]
for annotation_file in annotation_files:
    annotation_csv = os.path.join(input_folder, "annotations", "annotations_AAU", annotation_file)
    categories = os.path.join(input_folder, 'Brackish.names')

    #for line in execute(f"python annotations_to_yolo.py --imageFolder \"{all_images_folder}\" --annotationCSV \"{annotation_csv}\" --outputPath \"{all_labels_folder}\" --categories \"{categories}\""):
        #print(line)

    execute(f"python annotations_to_yolo.py --imageFolder \"{all_images_folder}\" --annotationCSV \"{annotation_csv}\" --outputPath \"{all_labels_folder}\" --categories \"{categories}\"")

Verify that all images have a corresponding label file

In [70]:
# Verify that all images have a corresponding label file
(_, _, image_files) = next(os.walk(all_images_folder))
(_, _, label_files) = next(os.walk(all_labels_folder))

for image_file in image_files:
    label_file = os.path.splitext(image_file)[0] + ".txt"
    # Ignore inputList.txt, this is generated by frame_extractor.py
    if label_file not in label_files and label_file != "inputList.txt":
        print(f"Missing label file for {image_file}")

print("Done!")

Done!


Split all training data into a training set, validation set and a test set

In [76]:
execute(f"python create_dataset_split.py --inputFile \"imageList.txt\" --seed {seed} --trnSplit {train_split} --valSplit {validation_split} --tstSplit {test_split}")

executing: python create_dataset_split.py --inputFile "imageList.txt" --seed 1234567890 --trnSplit 0.8 --valSplit 0.1 --tstSplit 0.1
Trn: 12067
Val: 1508
Tst: 1509



Copy training data into their respective folders

In [95]:
import shutil
import pathlib

stages = ["train", "valid", "test"]

# Open 'stage'.txt and read the lines
# For each line, copy the image and label file to the corresponding stage folder in the output folder
print("All labels folder: " + all_labels_folder)
for stage in stages:
    stage_file = f"{stage}.txt"
    stage_images_folder = os.path.join(output_folder, "images", f"{stage}")
    stage_labels_folder = os.path.join(output_folder, "labels", f"{stage}")

    # Create stage folders if they don't exist
    pathlib.Path(stage_images_folder).mkdir(exist_ok=True, parents=True)
    pathlib.Path(stage_labels_folder).mkdir(exist_ok=True, parents=True)

    print(f"Copying {stage} files...")
    print(f"Image folder: {stage_images_folder}")
    print(f"Label folder: {stage_labels_folder}")

    with open(stage_file, "r") as f:
        for line in f:
            image_file = os.path.join(all_images_folder, line.strip())
            filename = os.path.split(line.strip())
            label_file = os.path.join(all_labels_folder, os.path.splitext(filename[1])[0] + ".txt")

            # Copy image file
            shutil.copy2(image_file, stage_images_folder)
            shutil.copy2(label_file, stage_labels_folder)
    print(f"Done copying {stage} files!")

All labels folder: C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\labels\all
Copying train files...
Image folder: C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\train
Label folder: C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\labels\train
Copying C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all\2019-03-20_23-30-18to2019-03-20_23-30-28_1-0022.png to C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\train
Copying C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\labels\all\2019-03-20_23-30-18to2019-03-20_23-30-28_1-0022.txt to C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\labels\train
Copying C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images\all\2019-02-22_22-19-45to2019-02-22_22-19-53_1-0092.png to C:\Users\benja\Documents\datasets\brackish dataset\dataset\output_test\images