In [None]:
# clone YOLOv5 repository
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
!git reset --hard fbe67e465375231474a2ad80a4389efc77ecff99

Cloning into 'yolov5'...
remote: Enumerating objects: 16625, done.[K
remote: Counting objects: 100% (103/103), done.[K
remote: Compressing objects: 100% (73/73), done.[K
remote: Total 16625 (delta 58), reused 65 (delta 30), pack-reused 16522[K
Receiving objects: 100% (16625/16625), 15.10 MiB | 18.97 MiB/s, done.
Resolving deltas: 100% (11417/11417), done.
/content/yolov5
HEAD is now at fbe67e46 Fix `OMP_NUM_THREADS=1` for macOS (#8624)


In [None]:
# install dependencies as necessary
!pip install -qr requirements.txt  # install dependencies (ignore errors)
import torch

from IPython.display import Image, clear_output  # to display images
from utils.downloads import attempt_download  # to download models/datasets

# clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m50.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[?25hSetup complete. Using torch 2.3.0+cu121 CPU


In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="wLcs5jy21EsrfRws42Vo")
project = rf.workspace("driver-wlf6m").project("driverv1")
version = project.version(3)
dataset = version.download("yolov5")


loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in DriverV1-3 to yolov5pytorch:: 100%|██████████| 2224533/2224533 [00:47<00:00, 47041.76it/s]





Extracting Dataset Version Zip to DriverV1-3 in yolov5pytorch:: 100%|██████████| 112946/112946 [00:32<00:00, 3518.80it/s]


In [None]:
import os
import shutil
from glob import glob
import cv2
import numpy as np
from imgaug import augmenters as iaa

In [None]:
base_dir = "/content/DriverV1-3"  # Ubah sesuai dengan jalur dataset yang diunduh
folders = ['train', 'valid', 'test']
classes = os.listdir(os.path.join(base_dir, 'train', 'images'))

In [None]:
def move_excess_files(base_dir, folder, classes, target_count=1000):
    for cls in classes:
        img_files = glob(os.path.join(base_dir, folder, 'images', cls, '*.jpg'))
        lbl_files = glob(os.path.join(base_dir, folder, 'labels', cls, '*.txt'))

        excess_img_dir = os.path.join(base_dir, folder, 'excess', 'images', cls)
        excess_lbl_dir = os.path.join(base_dir, folder, 'excess', 'labels', cls)

        os.makedirs(excess_img_dir, exist_ok=True)
        os.makedirs(excess_lbl_dir, exist_ok=True)

        if len(img_files) > target_count:
            excess_imgs = img_files[target_count:]
            excess_lbls = lbl_files[target_count:]

            for img_file in excess_imgs:
                shutil.move(img_file, excess_img_dir)
            for lbl_file in excess_lbls:
                shutil.move(lbl_file, excess_lbl_dir)

In [None]:
def balance_dataset(base_dir, folder, classes, target_count=1000):
    for cls in classes:
        img_files = glob(os.path.join(base_dir, folder, 'images', cls, '*.jpg'))
        lbl_files = glob(os.path.join(base_dir, folder, 'labels', cls, '*.txt'))

        if len(img_files) < target_count:
            excess_img_dir = os.path.join(base_dir, folder, 'excess', 'images', cls)
            excess_lbl_dir = os.path.join(base_dir, folder, 'excess', 'labels', cls)

            excess_imgs = glob(os.path.join(excess_img_dir, '*.jpg'))
            excess_lbls = glob(os.path.join(excess_lbl_dir, '*.txt'))

            to_move = min(target_count - len(img_files), len(excess_imgs))

            for img_file in excess_imgs[:to_move]:
                shutil.move(img_file, os.path.join(base_dir, folder, 'images', cls))
            for lbl_file in excess_lbls[:to_move]:
                shutil.move(lbl_file, os.path.join(base_dir, folder, 'labels', cls))

            img_files = glob(os.path.join(base_dir, folder, 'images', cls, '*.jpg'))
            lbl_files = glob(os.path.join(base_dir, folder, 'labels', cls, '*.txt'))

        # Data augmentation jika masih kurang
        if len(img_files) < target_count:
            seq = iaa.Sequential([
                iaa.Fliplr(0.5),
                iaa.Affine(rotate=(-10, 10)),
                iaa.Multiply((0.8, 1.2))
            ])

            while len(img_files) < target_count:
                for img_file in img_files:
                    if len(img_files) >= target_count:
                        break

                    img = cv2.imread(img_file)
                    img_aug = seq.augment_image(img)

                    base_name = os.path.basename(img_file)
                    name, ext = os.path.splitext(base_name)

                    new_img_name = f"{name}_aug{len(img_files)}{ext}"
                    new_lbl_name = f"{name}_aug{len(lbl_files)}.txt"

                    cv2.imwrite(os.path.join(base_dir, folder, 'images', cls, new_img_name), img_aug)
                    shutil.copyfile(
                        os.path.join(base_dir, folder, 'labels', cls, f"{name}.txt"),
                        os.path.join(base_dir, folder, 'labels', cls, new_lbl_name)
                    )

                    img_files.append(new_img_name)
                    lbl_files.append(new_lbl_name)


In [None]:
for folder in folders:
    move_excess_files(base_dir, folder, classes, target_count=1000)
    balance_dataset(base_dir, folder, classes, target_count=1000)

print("Dataset has been balanced successfully.")