In [None]:
# ===============================================================
# YOLOv4 – Yellow Sticky Traps
# ===============================================================
#  ❯ HOW TO USE
#  1.  Click Runtime ▸ Change runtime type ▸ GPU (T4/P100/V100).
#  2.  Run all cells top‑to‑bottom – training ends in ≈7 min on a T4.
#  3.  Set `FULL_DATASET = True` if you want to use all 284 images.
#  4.  Weights + logs land in /content/drive/MyDrive/yolo_backup.
# -----------------------------------------------------------------
#  Tested on: CUDA 12, cuDNN 8.9, Python 3.10, Colab Pro (T4 16 GB)
# ===============================================================

In [None]:
# ─────────────────── 0. Colab & system prep ────────────────────
import os, sys, pathlib, random, shutil, xml.etree.ElementTree as ET
IN_COLAB = "google.colab" in sys.modules

if IN_COLAB:
    # Mount Google Drive once to persist checkpoints
    from google.colab import drive
    drive.mount("/content/drive")
    %cd /content

    # System‑level deps: OpenCV dev + build tools for Darknet
    !apt-get -qq update
    !apt-get -qq install --yes libopencv-dev build-essential pkg-config

    # Python deps – gdown for Drive downloads, tqdm for progress bars
    !pip -q install numpy pillow tqdm cython gdown

# Show GPU info (T4, V100, etc.)
!nvidia-smi

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Tue Jul  8 14:19:33 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0

In [None]:
# ───────────────── 1. Clone & build Darknet (GPU) ───────────────
if not pathlib.Path("darknet").exists():
    !git clone --depth 1 https://github.com/AlexeyAB/darknet

%cd darknet
# Enable CUDA + cuDNN + fp16 + OpenCV in Makefile
!sed -i 's/GPU=0/GPU=1/;  s/CUDNN=0/CUDNN=1/;  s/CUDNN_HALF=0/CUDNN_HALF=1/; \
         s/OPENCV=0/OPENCV=1/' Makefile
!make -j"$(nproc)"  # compile

%cd ..


/content/darknet
chmod +x *.sh
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -DOPENCV `pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv` -DGPU -I/usr/local/cuda/include/ -DCUDNN -DCUDNN_HALF -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -DOPENCV -DGPU -DCUDNN -I/usr/local/cudnn/include -DCUDNN_HALF -c ./src/image_opencv.cpp -o obj/image_opencv.o
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -DOPENCV `pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv` -DGPU -I/usr/local/cuda/include/ -DCUDNN -DCUDNN_HALF -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -DOPENCV -DGPU -DCUDNN -I/usr/local/cudnn/include -DCUDNN_HALF -c ./src/http_stream.cpp -o obj/http_stream.o
gcc -Iinclude/ -I3rdparty/stb/include -DOPENCV `pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv` -DGPU -I/usr/local/cuda/include/ -DCUDNN -DCUDNN_HALF -Wall -Wfatal-e

In [None]:
# ───────────────── 2. Obtain dataset (git clone) ────────────────
DATA_REPO = "md-121/yellow-sticky-traps-dataset"
if not pathlib.Path("yellow-sticky-traps-dataset").exists():
    !git clone --depth 1 https://github.com/{DATA_REPO}.git

DATA_DIR = "yellow-sticky-traps-dataset"
IMG_DIR  = f"{DATA_DIR}/images"
ANN_DIR  = f"{DATA_DIR}/annotations"

print(f"Dataset ready – {len(os.listdir(IMG_DIR))} images, "
      f"{len(os.listdir(ANN_DIR))} annotations")

Dataset ready – 568 images, 284 annotations


In [None]:
# ─────────────── 3. Convert Pascal‑VOC → YOLO TXT ──────────────

# List of class names (as used in annotations)
LABELS = [
    "MR",
    "NC",
    "WF",
]

# Dictionary mapping class name to integer ID (e.g., "Macrolophus pygmaeus" → 0)
name2id = {n:i for i,n in enumerate(LABELS)}

FULL_DATASET = True          # ← set to True for full dataset usage
VAL_SPLIT    = 0.1           # Use 10% of the dataset for validation

# Gather and sort all image paths
images = sorted(pathlib.Path(IMG_DIR).glob("*.jpg"))

# If needed, only use a subset of 120 images (~6-7 minutes of training)
if not FULL_DATASET:
    images = images[:120]

# Shuffle image list and create validation split
random.seed(42)  # ensures reproducibility of split
random.shuffle(images)
val_set = set(images[:int(len(images) * VAL_SPLIT)])  # first 10% → validation

# Prepare lists to hold the training and validation image paths
train_list, val_list = [], []

# Loop over all selected images
for img_path in images:
    xml_path = pathlib.Path(ANN_DIR) / (img_path.stem + ".xml")  # corresponding XML annotation
    if not xml_path.exists():
        continue  # skip if XML file is missing

    # Optional: Resize image to 416×416 to reduce memory use and speed up training
    # If you prefer full-resolution training, simply remove these 5 lines
    '''import cv2
    dst = cv2.imread(str(img_path))
    dst = cv2.resize(dst, (416, 416), interpolation=cv2.INTER_AREA)
    cv2.imwrite(str(img_path), dst)
    '''
    # Parse the Pascal-VOC XML annotation
    tree = ET.parse(xml_path)
    root = tree.getroot()
    w = int(root.find("size/width").text)   # original image width
    h = int(root.find("size/height").text)  # original image height

    txt_lines = []  # store YOLO-formatted annotations
    for obj in root.findall("object"):
        cls = obj.find("name").text
        if cls not in name2id:
            continue  # skip unknown classes

        # Read bounding box coordinates from XML
        bbox = obj.find("bndbox")
        xmin, xmax = map(float, (bbox.find("xmin").text, bbox.find("xmax").text))
        ymin, ymax = map(float, (bbox.find("ymin").text, bbox.find("ymax").text))

        # Convert to YOLO format: class_id center_x center_y width height (normalized)
        cx, cy = (xmin + xmax) / 2 / w, (ymin + ymax) / 2 / h
        bw, bh = (xmax - xmin) / w, (ymax - ymin) / h
        txt_lines.append(f"{name2id[cls]} {cx:.6f} {cy:.6f} {bw:.6f} {bh:.6f}\n")

    # Save YOLO annotation to TXT file (same name as image, different extension)
    with open(img_path.with_suffix(".txt"), "w") as f:
        f.writelines(txt_lines)

    # Append image to train or validation list
    (val_list if img_path in val_set else train_list).append(str(img_path))

# Print summary of conversion
print(f"Converted {len(images)} images → YOLO txt")

Converted 284 images → YOLO txt


In [None]:
# ───────────── 4. Write train/val lists & meta files ────────────
# Write training image paths to train.txt
with open("train.txt","w") as f: f.write("\n".join(train_list))
# Write validation image paths to val.txt
with open("val.txt","w") as f: f.write("\n".join(val_list))

# Write object class names to obj.names
with open("obj.names","w") as f: f.write("\n".join(LABELS))
# Write training configuration to obj.data
with open("obj.data","w") as f:
    f.write("classes= 3\n")     # Number of classes
    f.write("train = train.txt\n") # Path to training list
    f.write("valid = val.txt\n") # Path to validation list
    f.write("names = obj.names\n") # Path to class names
    f.write("backup = backup\n")   # Backup directory for model weights

In [None]:
# ───────────── 5. Prepare custom YOLOv4 cfg (3 classes) ─────────
CFG_SRC = "darknet/cfg/yolov4.cfg" # Source YOLOv4 config file
CFG_DST = "yolov4_custom.cfg"     # Destination custom config file
if not pathlib.Path(CFG_DST).exists(): # Only generate if custom config doesn't exist
    lines = open(CFG_SRC).read().splitlines() # Read original config lines
    for i,l in enumerate(lines):
        if lines[i].strip() == "[yolo]":
            if "filters=" in lines[i-2]: lines[i-2] = "filters=24" # Adjust filters for 3 classes
            if "classes=" in lines[i-1]: lines[i-1] = "classes=3"  # Set number of classes to 3
        if l.startswith("batch="): lines[i] = "batch=32"                   # Set batch size
        if l.startswith("subdivisions="): lines[i] = "subdivisions=8"     # Set subdivisions
        if l.startswith("width="): lines[i] = "width=1024"                   # Set input width
        if l.startswith("height="): lines[i] = "height=1024"                 # Set input height
        if l.startswith("max_batches"): lines[i] = "max_batches=6000"     # Set max training batches
        if l.startswith("steps="): lines[i] = "steps=4800,5400"                   # Set learning rate decay steps
        if l.strip().startswith("scales="): lines[i] = "scales=.1,.1"       # Set learning rate scales
    open(CFG_DST,"w").write("\n".join(lines)) # Write modified lines to new config file
    print("Custom YOLOv4 cfg generated →", CFG_DST) # Confirmation message


In [None]:
# ───────────── 6. Download darknet backbone weights ────────────
WEIGHTS = "yolov4.conv.137"
if not pathlib.Path(WEIGHTS).exists():
    !wget -q https://github.com/AlexeyAB/darknet/releases/download/yolov4/{WEIGHTS}

# Backup folder (local + Drive copy)
!mkdir -p backup /content/drive/MyDrive/yolo_backup

In [None]:
pip install albumentations opencv-python




In [None]:
import albumentations as A
import cv2
import os
import shutil

label_dir = "./"
img_dir = "./"
dest_img_dir = "./aug_imgs"
dest_label_dir = "./aug_labels"
os.makedirs(dest_img_dir, exist_ok=True)
os.makedirs(dest_label_dir, exist_ok=True)

transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Rotate(limit=15, p=0.3),
    A.HueSaturationValue(p=0.4),
])

for file in os.listdir(label_dir):
    if file.endswith(".txt"):
        path = os.path.join(label_dir, file)
        lines = open(path).readlines()
        if any(line.startswith("2") for line in lines):
            img_file = file.replace(".txt", ".jpg")
            img_path = os.path.join(img_dir, img_file)
            if os.path.exists(img_path):
                image = cv2.imread(img_path)
                transformed = transform(image=image)
                new_img = transformed["image"]

                out_img_path = os.path.join(dest_img_dir, "aug_" + img_file)
                out_label_path = os.path.join(dest_label_dir, "aug_" + file)
                cv2.imwrite(out_img_path, new_img)
                shutil.copy2(os.path.join(label_dir, file), out_label_path)


In [None]:
# ------------------------------------------------------------
# STEP 4 – Merge augmented files into the main dataset
#          and verify class counts
# ------------------------------------------------------------

from pathlib import Path
import shutil
from collections import Counter

# 1) Define source (augmented) and destination folders
SRC_IMG_DIR   = Path("aug_imgs")          # where aug_*.jpg were saved
SRC_LABEL_DIR = Path("aug_labels")        # where aug_*.txt were saved

DST_IMG_DIR   = Path(IMG_DIR)             # original images dir
DST_LABEL_DIR = Path(ANN_DIR)             # original labels dir

assert SRC_IMG_DIR.exists(),  "aug_imgs folder not found"
assert SRC_LABEL_DIR.exists(), "aug_labels folder not found"

# 2) Move augmented images and labels
for img in SRC_IMG_DIR.glob("*.jpg"):
    shutil.move(str(img), DST_IMG_DIR / img.name)      # use .copy2 if you prefer to keep a copy
for lbl in SRC_LABEL_DIR.glob("*.txt"):
    shutil.move(str(lbl), DST_LABEL_DIR / lbl.name)

print(f"Moved {len(list(DST_IMG_DIR.glob('aug_*.jpg')))} augmented images.")

# 3) Append new image paths to train.txt (avoid duplicates)
train_file = Path("train.txt")
train_paths = set(p.strip() for p in train_file.read_text().splitlines())

with train_file.open("a") as f:
    for img_path in DST_IMG_DIR.glob("aug_*.jpg"):
        abs_path = str(img_path.resolve())
        if abs_path not in train_paths:
            f.write(abs_path + "\n")
            train_paths.add(abs_path)

print(f"✓ train.txt updated – now {len(train_paths)} images total")

# 4) Re‑count objects per class to verify balance
LABELS = [
    "MR",
    "NC",
    "WF",
]

counter = Counter()
for txt in DST_LABEL_DIR.glob("*.txt"):
    for line in open(txt):
        if line.strip():
            cls_id = int(line.split()[0])
            counter[cls_id] += 1

print("\nUpdated object count per class:")
for cls_id in sorted(counter):
    name = LABELS[cls_id] if cls_id < len(LABELS) else f"class_{cls_id}"
    print(f"  {cls_id:>2} ({name:<30}) : {counter[cls_id]}")


Moved 0 augmented images.
✓ train.txt updated – now 256 images total

Updated object count per class:


In [None]:
# ───────────── 7. Train (1200 iters ≈ 6‑7 min on T4) ───────────
print("Training starts…")
!./darknet/darknet detector train obj.data yolov4_custom.cfg \
      {WEIGHTS} -dont_show -map | tee train.log


[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 161 Avg (IOU: 0.000000), count: 1, class_loss = 0.000000, iou_loss = 0.000000, total_loss = 0.000000 
 total_bbox = 621599, rewritten_bbox = 1.236006 % 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 139 Avg (IOU: 0.753242), count: 18, class_loss = 3.193663, iou_loss = 499.542114, total_loss = 502.735779 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 150 Avg (IOU: 0.000000), count: 1, class_loss = 0.003879, iou_loss = 0.000000, total_loss = 0.003879 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 161 Avg (IOU: 0.000000), count: 1, class_loss = 0.000000, iou_loss = 0.000000, total_loss = 0.000000 
 total_bbox = 621617, rewritten_bbox = 1.235970 % 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 139 Avg (IOU: 0.712475), count: 8, class_loss = 2.353016, iou_loss = 278.5339