In [1]:
%pip install ultralytics kaggle -q

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import os, shutil, random, xml.etree.ElementTree as ET, yaml
from kaggle.api.kaggle_api_extended import KaggleApi
from ultralytics import YOLO

In [3]:
# Copy kaggle.json to the default location (~/.kaggle/) where the Kaggle API library looks for credentials.
os.makedirs(os.path.expanduser("~/.kaggle"), exist_ok=True)
shutil.copy("kaggle.json", os.path.expanduser("~/.kaggle/kaggle.json"))

api = KaggleApi()
api.authenticate()

api.dataset_download_files("andrewmvd/hard-hat-detection", path="dataset_helmet", unzip=True)

Dataset URL: https://www.kaggle.com/datasets/andrewmvd/hard-hat-detection


In [4]:
images_dir = "dataset_helmet/images"
ann_dir = "dataset_helmet/annotations"
out_dir = "dataset_yolo"

for d in ["images/train", "images/val", "labels/train", "labels/val"]:
    os.makedirs(os.path.join(out_dir, d), exist_ok=True)

classes = ["head", "helmet", "person"]
class_dict = {c: i for i, c in enumerate(classes)}

# Converts object detection annotations from the PASCAL VOC XML format to the YOLO text format
def convert_voc_to_yolo(xml_file, out_txt):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    size = root.find("size")
    w = int(size.find("width").text)
    h = int(size.find("height").text)

    with open(out_txt, "w") as f:
        for obj in root.findall("object"):
            cls = obj.find("name").text
            if cls not in class_dict:
                continue
            cls_id = class_dict[cls]
            xmlbox = obj.find("bndbox")
            xmin, ymin = int(xmlbox.find("xmin").text), int(xmlbox.find("ymin").text)
            xmax, ymax = int(xmlbox.find("xmax").text), int(xmlbox.find("ymax").text)

            # Converts bounding box coordinates from pixel values to a normalized YOLO format
            x_center = (xmin + xmax) / 2 / w
            y_center = (ymin + ymax) / 2 / h
            bw = (xmax - xmin) / w
            bh = (ymax - ymin) / h

            f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {bw:.6f} {bh:.6f}\n")

all_imgs = [f for f in os.listdir(images_dir) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
random.shuffle(all_imgs)

split = int(0.8 * len(all_imgs))
train_imgs, val_imgs = all_imgs[:split], all_imgs[split:]

def process(img_list, subset):
    for img in img_list:
        base = os.path.splitext(img)[0]
        xml_file = os.path.join(ann_dir, base + ".xml")

        if not os.path.exists(xml_file):
            continue

        out_img = os.path.join(out_dir, f"images/{subset}/{img}")
        out_txt = os.path.join(out_dir, f"labels/{subset}/{base}.txt")

        shutil.copy(os.path.join(images_dir, img), out_img)
        convert_voc_to_yolo(xml_file, out_txt)

process(train_imgs, "train")
process(val_imgs, "val")

In [5]:
data = {
    "train": "dataset_yolo/images/train",
    "val": "dataset_yolo/images/val",
    "nc": 3,
    "names": classes
}

with open("data.yaml", "w") as f:
    yaml.dump(data, f)

In [15]:
model = YOLO("yolov8n.pt")

model.train(
    data="data.yaml",
    epochs=50,
    imgsz=640,
    batch=16,
    patience=10,
    project="results"
)

New https://pypi.org/project/ultralytics/8.3.193 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.192  Python-3.12.4 torch-2.8.0+cpu CPU (AMD Ryzen 7 5700U with Radeon Graphics)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train2, nbs=64, nms=False, opset=None, optimiz

KeyboardInterrupt: 

In [14]:
# testing
results = model.predict("dataset_yolo/images/val/hard_hat_workers20.png")
results[0].show()


image 1/1 c:\Users\henri\OneDrive\Documentos\GitHub\dioProjects\machineLearning\yoloTraining\dataset_yolo\images\val\hard_hat_workers20.png: 640x640 3 helmets, 133.1ms
Speed: 7.4ms preprocess, 133.1ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)


In [6]:
model = YOLO("results/train/weights/best.pt")
results = model.predict("dataset_yolo/images/val/hard_hat_workers20.png")
results[0].show()


image 1/1 c:\Users\henri\OneDrive\Documentos\GitHub\dioProjects\machineLearning\yoloTraining\dataset_yolo\images\val\hard_hat_workers20.png: 640x640 3 helmets, 121.4ms
Speed: 8.6ms preprocess, 121.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)
