In [37]:
import pandas as pd
import cv2

from pathlib import Path
import re

TRAIN_DIR = Path("/home/lex/data/Spatial_Monitoring_and_Insect_Behavioural_Analysis_Dataset/YOLOv4_Training_and_Test_Dataset/training")
TEST_DIR = Path("/home/lex/data/Spatial_Monitoring_and_Insect_Behavioural_Analysis_Dataset/YOLOv4_Training_and_Test_Dataset/testing")
CLASSES_FP = Path("/home/lex/data/Spatial_Monitoring_and_Insect_Behavioural_Analysis_Dataset/YOLOv4_Training_and_Test_Dataset/classes.names")
INSECT_TXT_REGEX = re.compile(r"insect_(\d+).txt")

In [38]:
# Get list of target classes (e.g. bee/wasp, flower, etc.) and corresponding zero-based index for that class
with open(CLASSES_FP, "r") as f:
    classes = f.readlines()
    classes = [c.strip() for c in classes]

print(f"Found classes: {[f'{i}: {label}' for i, label in enumerate(classes)]}")

Found classes: ['0: Hymenoptera', '1: Flower', '2: Syrphidae', '3: Lepidoptera']


In [39]:
# Get DataFrame where each row corresponds to a unique image+bounding box for every
# instance of an insect that occurs in the (full-resolution) training images
csv_columns = ["insect_type", "centre_x", "centre_y", "width", "height"]
extra_columns = ["image"]
df = pd.DataFrame(columns=[*extra_columns, *csv_columns])

for file in TRAIN_DIR.iterdir():
    match = INSECT_TXT_REGEX.match(file.name)
    if match:
        row = pd.read_csv(file, sep=" ", names=csv_columns)
        
        # Get corresponding image for this insect 
        image_fp = file.with_suffix(".png")
        row["image"] = image_fp.name
        df = pd.concat([df, row], ignore_index=True)

print(df.head())

              image insect_type  centre_x  centre_y     width    height
0  insect_02097.png           0  0.438305  0.512250  0.016172  0.033593
1  insect_02905.png           0  0.131612  0.412750  0.011568  0.035130
2  insect_01728.png           0  0.394792  0.306481  0.017708  0.025926
3  insect_01728.png           1  0.294792  0.488889  0.030208  0.055556
4  insect_01728.png           1  0.514062  0.241204  0.038542  0.065741


In [40]:
# Assume each image is the exact same resolution for simplicity. Now convert those fractional positions above to
# absolute pixel values (e.g. centre_x=0.44 --> 0.44*1920~=845)
image_fp = TRAIN_DIR / df.loc[0, "image"] # Just get first image as example
image = cv2.imread(str(image_fp))
height, width, num_channels = image.shape
print(f"Assuming every image is {width}x{height} resolution")

# Use image dimensions to convert fractional positions to absolute
df["centre_x"] = (df["centre_x"] * width).astype(int)
df["centre_y"] = (df["centre_y"] * height).astype(int)
df["width"] = (df["width"] * width).astype(int)
df["height"] = (df["height"] * height).astype(int)

print(df.head())

Assuming every image is 1920x1080 resolution
              image insect_type  centre_x  centre_y  width  height
0  insect_02097.png           0       841       553     31      36
1  insect_02905.png           0       252       445     22      37
2  insect_01728.png           0       758       330     33      28
3  insect_01728.png           1       566       528     57      60
4  insect_01728.png           1       986       260     74      71
