# Drone Images

- **Training:** Zones 1, 4, and 7 drone captures feeding YOLOv8.
- **Cross-validation:** Zones 2 and 6 to stress-test generalization.
- **Future validation:** December drone flights once the imagery is labeled.


## Initial labeling
I trained a proto model with some images from the ARU zones to help me soley with labeling the train zones where I will import the results and adjust on cvat

#### The cell below will take train images, tile thenm, run inference on them using a proto model, and output the results in a format that can be imported to CVAT for labeling adjustments.

In [6]:
import os
from PIL import Image
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
import shutil



model_path = "model/best.pt"  # your YOLO model
zone_folders = ["Zone 1", "Zone 4", "Zone 7"]
output_root = "CVAT"
tile_size = 1280  
tile_overlap = 0.15  


cvat_images = os.path.join(output_root, "images", "train")
cvat_labels = os.path.join(output_root, "labels", "train")
os.makedirs(cvat_images, exist_ok=True)
os.makedirs(cvat_labels, exist_ok=True)






detection_model = AutoDetectionModel.from_pretrained(
    model_type="yolov8",
    model_path=model_path,
    confidence_threshold=0.1,
    device="cpu"  # Use "cpu" for Mac M2
)


# HELPER ‚Üí WRITE YOLO TXT

def save_yolo_txt(output_path, detections, img_w, img_h):
    """
    SAHI returns boxes in absolute pixel coords (x1,y1,x2,y2).
    Convert to YOLO normalized format: cls cx cy w h
    """
    lines = []
    for det in detections:
        cls = int(det.category.id)  # already YOLO classes

        x1, y1, x2, y2 = det.bbox.to_xyxy()
        w = x2 - x1
        h = y2 - y1
        cx = x1 + w/2
        cy = y1 + h/2

        # normalize
        cx /= img_w
        cy /= img_h
        w /= img_w
        h /= img_h

        lines.append(f"{cls} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")

    with open(output_path, "w") as f:
        f.write("\n".join(lines))



# MAIN PROCESSING LOOP

for zone in zone_folders:
    print(f"\n Processing {zone}...\n")

    for filename in os.listdir(zone):
        if not filename.lower().endswith(("jpg", "jpeg", "png")):
            continue

        image_path = os.path.join(zone, filename)
        print(f"üñºÔ∏è Running inference on {image_path}")

        # load image for size
        img = Image.open(image_path)
        img_w, img_h = img.size

        # run SAHI sliced prediction
        result = get_sliced_prediction(
            image_path,
            detection_model,
            slice_height=tile_size,
            slice_width=tile_size,
            overlap_height_ratio=tile_overlap,
            overlap_width_ratio=tile_overlap
        )


        out_img_path = os.path.join(cvat_images, filename)
        out_txt_path = os.path.join(cvat_labels, filename.rsplit(".",1)[0] + ".txt")

        # copy original image
        shutil.copy(image_path, out_img_path)

        # save YOLO txt
        save_yolo_txt(out_txt_path, result.object_prediction_list, img_w, img_h)

        print(f"‚úÖ Saved to:\n{out_img_path}\n{out_txt_path}")

print("\nüéâ DONE! All annotations exported for CVAT.\n")



 Processing Zone 1...

üñºÔ∏è Running inference on Zone 1/DJI_20250203172934_0179_V.JPG
Performing prediction on 12 slices.
‚úÖ Saved to:
CVAT/images/train/DJI_20250203172934_0179_V.JPG
CVAT/labels/train/DJI_20250203172934_0179_V.txt
üñºÔ∏è Running inference on Zone 1/DJI_20250203174500_0618_V.JPG
Performing prediction on 12 slices.
‚úÖ Saved to:
CVAT/images/train/DJI_20250203174500_0618_V.JPG
CVAT/labels/train/DJI_20250203174500_0618_V.txt
üñºÔ∏è Running inference on Zone 1/DJI_20250203174710_0680_V.JPG
Performing prediction on 12 slices.
‚úÖ Saved to:
CVAT/images/train/DJI_20250203174710_0680_V.JPG
CVAT/labels/train/DJI_20250203174710_0680_V.txt
üñºÔ∏è Running inference on Zone 1/DJI_20250203172651_0102_V.JPG
Performing prediction on 12 slices.
‚úÖ Saved to:
CVAT/images/train/DJI_20250203172651_0102_V.JPG
CVAT/labels/train/DJI_20250203172651_0102_V.txt
üñºÔ∏è Running inference on Zone 1/DJI_20250203173233_0264_V.JPG
Performing prediction on 12 slices.
‚úÖ Saved to:
CVAT/images/

### Spend like 3 hours correcting the labels and imported the correct labels... 

Now we can start the training pipeline

In [12]:
import os
import random
from math import ceil
from pathlib import Path
from PIL import Image

# ==========================
# CONFIGURATION
# ==========================
INPUT_IMAGES_DIR = Path("CVAT/images/train")
INPUT_LABELS_DIR = Path("CVAT/labels/train")

OUTPUT_IMAGES_DIR = Path("TILED_YOLO/images/train")
OUTPUT_LABELS_DIR = Path("TILED_YOLO/labels/train")
OUTPUT_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_LABELS_DIR.mkdir(parents=True, exist_ok=True)

TILE_SIZE = 1280          
OVERLAP = 0.05             
MIN_BOX_AREA_RATIO = 0.001 

# SETTINGS FOR BETTER TRAINING
KEEP_EMPTY_RATIO = 0.10 
PAD_TO_SQUARE = True      # If True, edge tiles are padded to 1280x1280 with gray

# ==========================
# HELPERS
# ==========================

def yolo_to_xyxy(line, img_w, img_h):
    parts = line.strip().split()
    if len(parts) != 5: return None
    cls = int(parts[0])
    cx, cy, bw, bh = map(float, parts[1:])
    x1 = (cx - bw / 2) * img_w
    y1 = (cy - bh / 2) * img_h
    x2 = (cx + bw / 2) * img_w
    y2 = (cy + bh / 2) * img_h
    return cls, x1, y1, x2, y2

def xyxy_to_yolo(cls, x1, y1, x2, y2, tile_w, tile_h):
    bw = x2 - x1
    bh = y2 - y1
    cx = x1 + bw / 2
    cy = y1 + bh / 2
    return f"{cls} {cx/tile_w:.6f} {cy/tile_h:.6f} {bw/tile_w:.6f} {bh/tile_h:.6f}"

def compute_tile_starts(img_dim, tile_size, overlap):

    if img_dim <= tile_size: return [0]
    stride = int(tile_size * (1 - overlap))
    stride = max(1, stride)
    num_tiles = ceil((img_dim - tile_size) / stride) + 1
    starts = []
    for i in range(num_tiles):
        start = i * stride
        # Force last tile to end exactly at image edge (standard sliding window)
        if start + tile_size > img_dim:
            start = img_dim - tile_size
        if not starts or start != starts[-1]:
            starts.append(start)
    return starts


In [13]:
total_tiles = 0
total_birds = 0

print(f"üöÄ Starting Tiling Process...")
print(f"   -> Size: {TILE_SIZE}x{TILE_SIZE}")
print(f"   -> Keeping {KEEP_EMPTY_RATIO*100}% of empty background tiles")

for img_path in INPUT_IMAGES_DIR.iterdir():
    if not img_path.suffix.lower() in [".jpg", ".jpeg", ".png", ".tif"]:
        continue

    base = img_path.stem
    label_path = INPUT_LABELS_DIR / f"{base}.txt"
    
    # Load Image
    img = Image.open(img_path).convert("RGB")
    img_w, img_h = img.size
    
    # Load Labels (if exist)
    boxes = []
    if label_path.exists():
        with open(label_path, "r") as f:
            for line in f.read().strip().splitlines():
                if line: boxes.append(yolo_to_xyxy(line, img_w, img_h))

    x_starts = compute_tile_starts(img_w, TILE_SIZE, OVERLAP)
    y_starts = compute_tile_starts(img_h, TILE_SIZE, OVERLAP)

    for y0 in y_starts:
        for x0 in x_starts:
            # 1. Define Tile Geometry
            x1_tile = x0 + TILE_SIZE
            y1_tile = y0 + TILE_SIZE
            
            # 2. Crop (and Pad if necessary)
            # We crop exactly what fits in the image
            crop_w = min(x1_tile, img_w) - x0
            crop_h = min(y1_tile, img_h) - y0
            tile = img.crop((x0, y0, x0 + crop_w, y0 + crop_h))
            
            if PAD_TO_SQUARE and (crop_w < TILE_SIZE or crop_h < TILE_SIZE):
                # Create a gray canvas 1280x1280
                padded_tile = Image.new("RGB", (TILE_SIZE, TILE_SIZE), (114, 114, 114))
                # Paste the crop into top-left
                padded_tile.paste(tile, (0, 0))
                tile = padded_tile
                # Note: valid area is 0..crop_w, 0..crop_h
            
            # 3. Process Boxes
            tile_lines = []
            for b in boxes:
                if b is None: continue
                cls, bx1, by1, bx2, by2 = b
                
                # Intersection logic
                ix1 = max(bx1, x0)
                iy1 = max(by1, y0)
                ix2 = min(bx2, x0 + crop_w) # Clamp to actual image data, not pad
                iy2 = min(by2, y0 + crop_h)

                if ix2 <= ix1 or iy2 <= iy1: continue

                # Shift to tile coordinates
                tx1, ty1 = ix1 - x0, iy1 - y0
                tx2, ty2 = ix2 - x0, iy2 - y0
                
                # Check area size to avoid slivers
                bw, bh = tx2 - tx1, ty2 - ty1
                if (bw * bh) / (TILE_SIZE * TILE_SIZE) < MIN_BOX_AREA_RATIO:
                    continue

                # IMPORTANT: Convert using the FULL TILE SIZE (1280), not just crop area
                # This ensures coordinates are correct relative to the padded 1280x1280 image
                tile_lines.append(xyxy_to_yolo(cls, tx1, ty1, tx2, ty2, TILE_SIZE, TILE_SIZE))

            # 4. Save Logic (With Background Sampling)
            has_birds = len(tile_lines) > 0
            keep_tile = has_birds or (random.random() < KEEP_EMPTY_RATIO)

            if keep_tile:
                suffix = "empty" if not has_birds else f"{len(tile_lines)}birds"
                out_base = f"{base}_{x0}_{y0}_{suffix}"
                
                tile.save(OUTPUT_IMAGES_DIR / f"{out_base}.jpg")
                
                # Even if empty, create empty txt file (YOLO standard)
                with open(OUTPUT_LABELS_DIR / f"{out_base}.txt", "w") as f:
                    if tile_lines:
                        f.write("\n".join(tile_lines))
                
                total_tiles += 1
                if has_birds: total_birds += len(tile_lines)

print(f"\nüéâ DONE!")
print(f"   -> Generated {total_tiles} tiles")
print(f"   -> Containing {total_birds} annotated birds")
print(f"   -> Saved to {OUTPUT_IMAGES_DIR}")

üöÄ Starting Tiling Process...
   -> Size: 1280x1280
   -> Keeping 10.0% of empty background tiles

üéâ DONE!
   -> Generated 117 tiles
   -> Containing 3323 annotated birds
   -> Saved to TILED_YOLO/images/train


In [15]:
import os
import random
from math import ceil
from pathlib import Path
from PIL import Image

# ==========================
# CONFIGURATION
# ==========================
# Input paths
INPUT_IMAGES_DIR = Path("CVAT/images/train")
INPUT_LABELS_DIR = Path("CVAT/labels/train")

# Output paths (Targeting your existing folder)
OUTPUT_IMAGES_DIR = Path("TrainTiled/images")
OUTPUT_LABELS_DIR = Path("TrainTiled/labels")

# Ensure they exist (just in case)
OUTPUT_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_LABELS_DIR.mkdir(parents=True, exist_ok=True)



# Tiling Settings
TILE_SIZE = 1280          
OVERLAP = 0.2             

# Permissive Logic (Safe for small birds)
MIN_BOX_PIXELS = 25       # Keep box if area > 25 pixels (5x5 pixels)
MIN_VISIBILITY = 0.2      # Keep box if 20% of the bird is visible in this tile
PAD_TO_SQUARE = True      # Pad edge tiles to 1280x1280

# ==========================
# HELPERS
# ==========================

def yolo_to_xyxy(line, img_w, img_h):
    parts = line.strip().split()
    if len(parts) != 5: return None
    cls = int(parts[0])
    cx, cy, bw, bh = map(float, parts[1:])
    x1 = (cx - bw / 2) * img_w
    y1 = (cy - bh / 2) * img_h
    x2 = (cx + bw / 2) * img_w
    y2 = (cy + bh / 2) * img_h
    return cls, x1, y1, x2, y2

def xyxy_to_yolo(cls, x1, y1, x2, y2, tile_w, tile_h):
    bw = x2 - x1
    bh = y2 - y1
    cx = x1 + bw / 2
    cy = y1 + bh / 2
    
    # Clamp to ensure proper YOLO normalization limits [0,1]
    cx_n = max(0, min(1, cx / tile_w))
    cy_n = max(0, min(1, cy / tile_h))
    bw_n = max(0, min(1, bw / tile_w))
    bh_n = max(0, min(1, bh / tile_h))
    
    return f"{cls} {cx_n:.6f} {cy_n:.6f} {bw_n:.6f} {bh_n:.6f}"

def compute_tile_starts(img_dim, tile_size, overlap):
    if img_dim <= tile_size: return [0]
    stride = int(tile_size * (1 - overlap))
    stride = max(1, stride)
    num_tiles = ceil((img_dim - tile_size) / stride) + 1
    starts = []
    for i in range(num_tiles):
        start = i * stride
        if start + tile_size > img_dim:
            start = img_dim - tile_size
        if not starts or start != starts[-1]:
            starts.append(start)
    return starts

# ==========================
# MAIN EXECUTION
# ==========================

total_input_birds = 0
total_output_birds = 0
dropped_small = 0
dropped_edge = 0

print(f"üöÄ Starting Single-File Test on: {TARGET_FILE}")

found_target = False

for img_path in INPUT_IMAGES_DIR.iterdir():
    # --- FILTER: ONLY RUN ON TARGET FILE ---
    
    found_target = True
    print(f"üìÑ Found target image: {img_path}")

    base = img_path.stem
    label_path = INPUT_LABELS_DIR / f"{base}.txt"
    
    img = Image.open(img_path).convert("RGB")
    img_w, img_h = img.size
    
    boxes = []
    if label_path.exists():
        with open(label_path, "r") as f:
            for line in f.read().strip().splitlines():
                if line: boxes.append(yolo_to_xyxy(line, img_w, img_h))
    else:
        print(f"‚ö†Ô∏è Warning: No label file found at {label_path}")
    
    print(f"   -> Original Image Size: {img_w}x{img_h}")
    print(f"   -> Original Labels: {len(boxes)}")
    total_input_birds += len(boxes)

    x_starts = compute_tile_starts(img_w, TILE_SIZE, OVERLAP)
    y_starts = compute_tile_starts(img_h, TILE_SIZE, OVERLAP)

    tile_count = 0
    for y0 in y_starts:
        for x0 in x_starts:
            # 1. Tile Geometry
            x1_tile = x0 + TILE_SIZE
            y1_tile = y0 + TILE_SIZE
            
            crop_w = min(x1_tile, img_w) - x0
            crop_h = min(y1_tile, img_h) - y0
            tile = img.crop((x0, y0, x0 + crop_w, y0 + crop_h))
            
            # Padding
            if PAD_TO_SQUARE and (crop_w < TILE_SIZE or crop_h < TILE_SIZE):
                padded_tile = Image.new("RGB", (TILE_SIZE, TILE_SIZE), (114, 114, 114))
                padded_tile.paste(tile, (0, 0))
                tile = padded_tile

            # 2. Process Boxes
            tile_lines = []
            
            for b in boxes:
                cls, bx1, by1, bx2, by2 = b
                
                orig_area = (bx2 - bx1) * (by2 - by1)
                
                ix1 = max(bx1, x0)
                iy1 = max(by1, y0)
                ix2 = min(bx2, x0 + crop_w) 
                iy2 = min(by2, y0 + crop_h)

                if ix2 <= ix1 or iy2 <= iy1: 
                    continue

                inter_w = ix2 - ix1
                inter_h = iy2 - iy1
                inter_area = inter_w * inter_h
                
                # Check 1: Pixels
                if inter_area < MIN_BOX_PIXELS:
                    dropped_small += 1
                    continue 

                # Check 2: Visibility
                visibility = inter_area / orig_area
                if visibility < MIN_VISIBILITY:
                    dropped_edge += 1
                    continue

                # Tile Coords
                tx1 = ix1 - x0
                ty1 = iy1 - y0
                tx2 = ix2 - x0
                ty2 = iy2 - y0
                
                tile_lines.append(xyxy_to_yolo(cls, tx1, ty1, tx2, ty2, TILE_SIZE, TILE_SIZE))

            # Save ALL tiles for this test image (so we can verify birds AND backgrounds)
            suffix = f"{len(tile_lines)}birds"
            out_base = f"{base}_{x0}_{y0}_{suffix}"
            
            tile.save(OUTPUT_IMAGES_DIR / f"{out_base}.jpg")
            with open(OUTPUT_LABELS_DIR / f"{out_base}.txt", "w") as f:
                if tile_lines:
                    f.write("\n".join(tile_lines))
            
            total_output_birds += len(tile_lines)
            tile_count += 1

    print(f"   -> Generated {tile_count} tiles for this image.")

if not found_target:
    print(f"‚ùå ERROR: Could not find {TARGET_FILE} in {INPUT_IMAGES_DIR}")
else:
    print(f"\nüìä REPORT:")
    print(f"Total Tiled Boxes: {total_output_birds}")
    print(f"Dropped (Too Small < 25px): {dropped_small}")
    print(f"Dropped (Low Visibility < 20%): {dropped_edge}")
    print(f"\n‚úÖ Check folder: CVAT2check")

üöÄ Starting Single-File Test on: DJI_20250203172651_0102_V.JPG
üìÑ Found target image: CVAT/images/train/DJI_20250204070734_0054_V.JPG
   -> Original Image Size: 4000x3000
   -> Original Labels: 85
   -> Generated 12 tiles for this image.
üìÑ Found target image: CVAT/images/train/DJI_20250204070746_0060_V.JPG
   -> Original Image Size: 4000x3000
   -> Original Labels: 171
   -> Generated 12 tiles for this image.
üìÑ Found target image: CVAT/images/train/DJI_20250203172934_0179_V.JPG
   -> Original Image Size: 4000x3000
   -> Original Labels: 458
   -> Generated 12 tiles for this image.
üìÑ Found target image: CVAT/images/train/DJI_20250205165502_0449_V.JPG
   -> Original Image Size: 4000x3000
   -> Original Labels: 146
   -> Generated 12 tiles for this image.
üìÑ Found target image: CVAT/images/train/DJI_20250205170221_0657_V.JPG
   -> Original Image Size: 4000x3000
   -> Original Labels: 207
   -> Generated 12 tiles for this image.
üìÑ Found target image: CVAT/images/train/DJI

## Zipped the tiled images and annotations and trained on googleColab

In [None]:
## RAN ON GOOGLE COLAB NOT HERE WITH T4 GPU

from ultralytics import YOLO
import os


DATA_YAML = "yolo_ready_dataset/data.yaml"
PROJECT_NAME = "boeung_sne_drones"
RUN_NAME = "pilot_run_v1"


if not os.path.exists(DATA_YAML):
    raise FileNotFoundError(f"Cannot find {DATA_YAML}. Did the pipeline finish successfully?")


print("Loading YOLOv8 Nano...")
model = YOLO('yolov8n.pt')


print(" Starting Training...")
results = model.train(
    data=DATA_YAML,


    epochs=100,
    imgsz=1280,
    batch=5,
    project=PROJECT_NAME,
    name=RUN_NAME,
    exist_ok=True,
    cache=False
)

print(f" Training Complete! Weights saved at: {PROJECT_NAME}/{RUN_NAME}/weights/best.pt")