In [2]:
import cv2
import os

In [3]:
#Data Preparation


CLASS_NAMES = [
    "Ants",
    "Bees",
    "Beetles",
    "Caterpillars",
    "Earthworms",
    "Earwigs",
    "Grasshoppers",
    "Moths",
    "Slugs",
    "Snails",
    "Wasps",
    "Weevils",
]

def createResnetData(base_dir="dataset", save_base_dir="cropped_dataset"):
    splits = ["train", "valid", "test"]

    for split in splits:
        img_dir = os.path.join(base_dir, split, "images")
        ann_dir = os.path.join(base_dir, split, "labels")
        save_dir = os.path.join(save_base_dir, split)

        os.makedirs(save_dir, exist_ok=True)

        for txt_file in os.listdir(ann_dir):
            if not txt_file.endswith(".txt"):
                continue

            base = os.path.splitext(txt_file)[0]
            img_path = os.path.join(img_dir, base + ".jpg")
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}")
                continue

            h, w = img.shape[:2]

            with open(os.path.join(ann_dir, txt_file)) as f:
                for i, line in enumerate(f):
                    class_id, x_c, y_c, bw, bh = map(float, line.strip().split())
                    class_id = int(class_id)
                    if class_id >= len(CLASS_NAMES):
                        print(f"Warning: class_id {class_id} exceeds CLASS_NAMES length")
                        continue

                    x1 = max(0, int((x_c - bw/2) * w))
                    y1 = max(0, int((y_c - bh/2) * h))
                    x2 = min(w, int((x_c + bw/2) * w))
                    y2 = min(h, int((y_c + bh/2) * h))

                    crop = img[y1:y2, x1:x2]
                    if crop.size == 0:
                        continue  # skip invalid crops

                    crop = cv2.resize(crop, (244, 244))

                    class_folder = os.path.join(save_dir, CLASS_NAMES[class_id])
                    os.makedirs(class_folder, exist_ok=True)
                    cv2.imwrite(os.path.join(class_folder, f"{base}_{i}.jpg"), crop)

    print(f"All crops saved in {save_base_dir} for train, valid, and test splits.")





In [4]:
performPrep = True # Set to True to perform data preparation
if performPrep == True:
    createResnetData()
 

All crops saved in cropped_dataset for train, valid, and test splits.
