In [1]:
import os

train_data = "/kaggle/input/visdrone-dataset/VisDrone2019-DET-train/VisDrone2019-DET-train/images"
test_data = "/kaggle/input/visdrone-dataset/VisDrone2019-DET-val/VisDrone2019-DET-val/images"


train_labels = "/kaggle/input/visdrone-dataset/VisDrone2019-DET-train/VisDrone2019-DET-train/annotations"
test_labels = "/kaggle/input/visdrone-dataset/VisDrone2019-DET-val/VisDrone2019-DET-val/annotations"

root_dir = os.path.join("/kaggle/working/","images")
os.makedirs(root_dir,exist_ok=True)

train_dir = os.path.join(root_dir, "train")
val_dir = os.path.join(root_dir, "val")


label_root_dir = root_dir = os.path.join("/kaggle/working/","labels")
os.makedirs(label_root_dir,exist_ok=True)

label_train_dir = os.path.join(label_root_dir, "train")
label_val_dir = os.path.join(label_root_dir, "val")

In [2]:
from PIL import Image
from pathlib import Path
from tqdm import tqdm

def convert_visdrone_to_yolo(annotations_path, images_path, output_label_path):
    os.makedirs(output_label_path, exist_ok=True)

    annotation_files = list(Path(annotations_path).glob("*.txt"))
    
    for ann_file in tqdm(annotation_files, desc=f"Converting {annotations_path}"):
        # Get corresponding image size
        image_file = Path(images_path) / ann_file.with_suffix('.jpg').name
        if not image_file.exists():
            continue
        img = Image.open(image_file)
        w, h = img.size

        lines = []
        with open(ann_file, 'r') as f:
            for row in f:
                parts = row.strip().split(',')
                if parts[4] == '0':  # ignored region
                    continue
                class_id = int(parts[5]) - 1
                x, y, width, height = map(int, parts[:4])

                # Convert to YOLO format
                x_center = (x + width / 2) / w
                y_center = (y + height / 2) / h
                w_norm = width / w
                h_norm = height / h

                yolo_line = f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}"
                lines.append(yolo_line)

        # Write to YOLO label file
        label_file_path = Path(output_label_path) / ann_file.name
        with open(label_file_path, 'w') as out_file:
            out_file.write('\n'.join(lines))


In [3]:
import shutil

shutil.copytree(train_data,train_dir,dirs_exist_ok=True)
shutil.copytree(test_data,val_dir,dirs_exist_ok=True)
print("train and val dirs copied")

convert_visdrone_to_yolo(train_labels, train_data, label_train_dir)
convert_visdrone_to_yolo(test_labels, test_data, label_val_dir)
print("Converted annotations to YOLO format")


train and val dirs copied


Converting /kaggle/input/visdrone-dataset/VisDrone2019-DET-train/VisDrone2019-DET-train/annotations: 100%|██████████| 6471/6471 [01:05<00:00, 99.53it/s] 
Converting /kaggle/input/visdrone-dataset/VisDrone2019-DET-val/VisDrone2019-DET-val/annotations: 100%|██████████| 548/548 [00:05<00:00, 102.53it/s]

Converted annotations to YOLO format





In [4]:
import os


file_content = """

path: /kaggle/working/    # dataset root dir
train: /kaggle/working/images/train    # train images 
val: /kaggle/working/images/val       # val images 

# Classes
names:
  0: pedestrian
  1: people
  2: bicycle
  3: car
  4: van
  5: truck
  6: tricycle
  7: awning-tricycle
  8: bus
  9: motor


"""

file_name = "VisDrone.yaml"

with open(file_name,"w") as f:
    f.write(file_content)

print("file saved")

file saved


In [5]:
!pip install -q ultralytics


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━

In [6]:
from ultralytics import YOLO


model = YOLO("yolov8s.pt")



Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 158MB/s] 


In [None]:
model.train(data="/kaggle/working/VisDrone.yaml",epochs=10,imgsz=640,workers=4,
            batch=16,device="cuda",name="yolov8_visdrone")

Ultralytics 8.3.140 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/VisDrone.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8_visdrone, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, 

100%|██████████| 5.35M/5.35M [00:00<00:00, 70.5MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 3130.0±752.0 MB/s, size: 261.9 KB)


[34m[1mtrain: [0mScanning /kaggle/working/labels/train.cache... 6471 images, 0 backgrounds, 0 corrupt: 100%|██████████| 6471/6471 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/images/train/0000137_02220_d_0000163.jpg: 1 duplicate labels removed
[34m[1mtrain: [0m/kaggle/working/images/train/0000140_00118_d_0000002.jpg: 1 duplicate labels removed
[34m[1mtrain: [0m/kaggle/working/images/train/9999945_00000_d_0000114.jpg: 1 duplicate labels removed
[34m[1mtrain: [0m/kaggle/working/images/train/9999987_00000_d_0000049.jpg: 1 duplicate labels removed
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 430.0±210.9 MB/s, size: 131.6 KB)


[34m[1mval: [0mScanning /kaggle/working/labels/val.cache... 548 images, 0 backgrounds, 0 corrupt: 100%|██████████| 548/548 [00:00<?, ?it/s]


Plotting labels to runs/detect/yolov8_visdrone/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns/detect/yolov8_visdrone[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      10.6G      1.436      1.151     0.9439        787        640:  19%|█▉        | 77/405 [00:24<01:37,  3.38it/s]