# Init

In [2]:
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("chandlertimm/dota-data")

# print("Path to dataset files:", path)
from pathlib import Path
import os
import tqdm

dpath = Path("datasets")

# Create Dataset
- [DOTADataset](https://www.kaggle.com/datasets/chandlertimm/dota-data)
- [VisDrone Dataset](https://docs.ultralytics.com/datasets/detect/visdrone/)

## VisDrone

In [None]:
from pathlib import Path
import shutil

from ultralytics.utils.downloads import download
from ultralytics.utils import TQDM
from convert_visdrone_to_yolo import visdrone2yolo

urls = [
    "https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-train.zip",
    "https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-val.zip",
    "https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-dev.zip",
    # "https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-challenge.zip",
]
download(urls, dir=dpath, threads=4)

# Convert
splits = {
    "VisDrone2019-DET-train": "train",
    "VisDrone2019-DET-val": "val",
    "VisDrone2019-DET-test-dev": "test",
}
for folder, split in splits.items():
    visdrone2yolo(dpath, split, folder)  # convert VisDrone annotations to YOLO labels
    shutil.rmtree(dpath / folder)  # cleanup original directory

In [None]:
from convert_visdrone_to_yolo import visdrone2SAETA

visdrone2SAETA(dpath)

## Roboflow

In [None]:
from dotenv import load_dotenv

load_dotenv()

!roboflow import -w suas-gyf9o -p test-t0cxb datasets

loading Roboflow workspace...
loading Roboflow project...
Uploading to existing project suas-gyf9o/test-t0cxb
[UPLOADED] datasets/images/test/0000006_04050_d_0000010.jpg (CR77BeBLCTh3SPcTaBJw) [2.5s] / annotations = ERR: Unrecognized annotation format.
[UPLOADED] datasets/images/test/0000006_01111_d_0000003.jpg (MRhtyHJfzboiTZLZAF9k) [2.4s] / annotations = OK [0.5s]
[UPLOADED] datasets/images/test/0000006_02138_d_0000006.jpg (jSMLikViCKtsrrUad9yA) [2.4s] / annotations = OK [0.6s]
[UPLOADED] datasets/images/test/0000006_03636_d_0000009.jpg (8bNJ7cINDLz9GIoFK0gd) [2.5s] / annotations = OK [0.6s]
[UPLOADED] datasets/images/test/0000006_00159_d_0000001.jpg (RPn395HPNyl6lxmVpBUu) [2.6s] / annotations = OK [0.6s]
[UPLOADED] datasets/images/test/0000006_01275_d_0000004.jpg (p80cHoqLXKTMhqtGxkfn) [2.6s] / annotations = OK [0.6s]
[UPLOADED] datasets/images/test/0000006_00611_d_0000002.jpg (ftSgZ42eIHNsGIofVGle) [2.7s] / annotations = OK [0.6s]
[UPLOADED] datasets/images/test/0000006_02616_d_000

# Train

In [None]:
from ultralytics import YOLO

# Train on filtered VisDrone dataset (excludes comp_categories)
model = YOLO("yolov8n.yaml")
model.train(data="SAETADrone.yaml", epochs=100)

Ultralytics 8.3.206 🚀 Python-3.12.11 torch-2.8.0+cu128 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=SAETADrone.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=train4, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.

## Step 1: Evaluate Model Performance

In [None]:
# Load the best trained model
model = YOLO("runs/detect/train/weights/best.pt")

# Run validation on the validation set
metrics = model.val(data="VisDrone_filtered.yaml")

# Display key metrics
print("\n📊 Validation Metrics:")
print(f"  mAP50:     {metrics.box.map50:.4f}")
print(f"  mAP50-95:  {metrics.box.map:.4f}")
print(f"  Precision: {metrics.box.mp:.4f}")
print(f"  Recall:    {metrics.box.mr:.4f}")

# Print per-class metrics
print("\n📈 Per-Class mAP50-95:")
class_names = ["bicycle", "van", "truck", "tricycle", "awning-tricycle", "others"]
for name, map_val in zip(class_names, metrics.box.maps):
    print(f"  {name:20s}: {map_val:.4f}")

## Step 2: Visualize Predictions on Validation Images

In [None]:
# Run predictions on validation images
results = model.predict(
    source="datasets/VisDrone_filtered/val/images",
    save=True,
    conf=0.25,  # Confidence threshold
    iou=0.7,  # IoU threshold for NMS
    max_det=300,  # Maximum detections per image
    project="runs/detect",
    name="val_predictions",
)

print(f"✅ Predictions saved to runs/detect/val_predictions/")

## Step 3: View Sample Predictions

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import os

# Get a few sample prediction images
pred_dir = "runs/detect/val_predictions"
sample_images = [f for f in os.listdir(pred_dir) if f.endswith((".jpg", ".png"))][:6]

# Display them in a grid
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for idx, img_name in enumerate(sample_images):
    img_path = os.path.join(pred_dir, img_name)
    img = Image.open(img_path)
    axes[idx].imshow(img)
    axes[idx].set_title(img_name)
    axes[idx].axis("off")

plt.tight_layout()
plt.show()

## Step 4: View Training Results & Confusion Matrix

In [None]:
# Display confusion matrix and training curves
train_dir = "runs/detect/train"

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Confusion matrix
conf_matrix_path = os.path.join(train_dir, "confusion_matrix.png")
if os.path.exists(conf_matrix_path):
    conf_img = Image.open(conf_matrix_path)
    axes[0].imshow(conf_img)
    axes[0].set_title("Confusion Matrix")
    axes[0].axis("off")
else:
    axes[0].text(0.5, 0.5, "Confusion matrix not found", ha="center", va="center")
    axes[0].axis("off")

# Training results
results_path = os.path.join(train_dir, "results.png")
if os.path.exists(results_path):
    results_img = Image.open(results_path)
    axes[1].imshow(results_img)
    axes[1].set_title("Training Curves")
    axes[1].axis("off")
else:
    axes[1].text(0.5, 0.5, "Training curves not found", ha="center", va="center")
    axes[1].axis("off")

plt.tight_layout()
plt.show()

## Step 5: Test on Custom Images (Optional)

In [None]:
# Test on your own images
# Place test images in 'test_images/' folder and run this cell

test_dir = "test_images"

if os.path.exists(test_dir) and os.listdir(test_dir):
    print(f"Running predictions on images in {test_dir}/")

    results = model.predict(
        source=test_dir,
        save=True,
        conf=0.4,  # Higher confidence for custom images
        project="runs/detect",
        name="custom_predictions",
    )

    print(f"✅ Custom predictions saved to runs/detect/custom_predictions/")

    # Display results
    pred_dir = "runs/detect/custom_predictions"
    sample_images = [f for f in os.listdir(pred_dir) if f.endswith((".jpg", ".png"))][
        :4
    ]

    fig, axes = plt.subplots(2, 2, figsize=(12, 12))
    axes = axes.flatten()

    for idx, img_name in enumerate(sample_images):
        img_path = os.path.join(pred_dir, img_name)
        img = Image.open(img_path)
        axes[idx].imshow(img)
        axes[idx].set_title(img_name)
        axes[idx].axis("off")

    plt.tight_layout()
    plt.show()
else:
    print(f"ℹ️  No custom test images found.")
    print(f"   Create '{test_dir}/' folder and add images to test on your own data.")