## 1 - Download data

In [None]:
from openimages.download import download_dataset
download_dataset("./../data/", ["Traffic light"], 'exclusions.txt', limit=200, annotation_format='pascal')

## 2 - Change data folder

In [None]:
import subprocess

subprocess.run(["mv", "../data/traffic light", "../data/traffic_light"])

In [None]:
# Célula 1: Importações e Definições de Funções
import os
import shutil
from sklearn.model_selection import train_test_split
import random
from pathlib import Path

def create_directories(base_path):
    """Cria as pastas necessárias para treino e validação"""
    dirs = ['images/train', 'images/val', 'labels/train', 'labels/val']
    for dir_path in dirs:
        os.makedirs(os.path.join(base_path, dir_path), exist_ok=True)

def split_dataset(source_dir, output_dir, val_split=0.2, seed=42):
    """
    Separa o dataset em treino e validação
    
    Args:
        source_dir: Diretório com as imagens e labels originais
        output_dir: Diretório onde serão criadas as pastas train/val
        val_split: Proporção do dataset para validação (padrão: 0.2)
        seed: Seed para reproducibilidade
    """
    random.seed(seed)
    
    source_dir = Path(source_dir)
    output_dir = Path(output_dir)
    
    create_directories(output_dir)
    
    # Lista todos os arquivos de imagem
    image_files = [f for f in (source_dir / 'images').glob('*.jpg')]
    
    # Divide os arquivos em treino e validação
    train_files, val_files = train_test_split(image_files, 
                                            test_size=val_split,
                                            random_state=seed)
    
    def copy_files(files, split_type):
        for f in files:
            # Copia imagem
            dst_img = output_dir / 'images' / split_type / f.name
            shutil.copy2(f, dst_img)
            
            # Procura e copia label correspondente
            label_name = f.stem + '.xml'
            src_label = source_dir / 'pascal' / label_name
            if src_label.exists():
                dst_label = output_dir / 'labels' / split_type / label_name
                shutil.copy2(src_label, dst_label)
    
    # Copia arquivos para as respectivas pastas
    copy_files(train_files, 'train')
    copy_files(val_files, 'val')
    
    print(f'Dataset dividido com sucesso!')
    print(f'Treino: {len(train_files)} imagens')
    print(f'Validação: {len(val_files)} imagens')

In [None]:
# Célula 2: Execução da função
source_directory = "../data/traffic_light"  # diretório com as imagens originais
output_directory = "../data"               # diretório onde serão criadas as subpastas

split_dataset(
    source_dir=source_directory,
    output_dir=output_directory,
    val_split=0.2,
    seed=42
)

In [None]:
# Célula 3: Verificação
def verify_splits():
    base_dir = Path("../dataset")
    
    for split in ['train', 'val']:
        images = list((base_dir / 'images' / split).glob('*.jpg'))
        labels = list((base_dir / 'labels' / split).glob('*.txt'))
        
        print(f'Split {split}:')
        print(f'  Imagens: {len(images)}')
        print(f'  Labels: {len(labels)}')
        print(f'  Matched: {len(images) == len(labels)}')
        print()

verify_splits()

In [None]:
import os

train_image_names = os.listdir('../data/images/train')
train_image_names = [name.split('.')[0] for name in train_image_names]
train_image_names = set(train_image_names)

train_label_names = os.listdir('../data/labels/train')
train_label_names = [name.split('.')[0] for name in train_label_names]
train_label_names = set(train_label_names)

train_image_names == train_label_names

In [None]:
val_image_names = os.listdir('../data/images/val')
val_image_names = [name.split('.')[0] for name in val_image_names]
val_image_names = set(val_image_names)

val_label_names = os.listdir('../data/labels/val')
val_label_names = [name.split('.')[0] for name in val_label_names]
val_label_names = set(val_label_names)

val_image_names == val_label_names

## Two Step Yolo

In [1]:
# Install YOLOv8 (Ultralytics)
!pip install ultralytics

# For CNN
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

# Other tools
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# For loading the YOLO model
from ultralytics import YOLO
from IPython.display import clear_output
clear_output()

In [6]:
# Load a pre-trained YOLOv8n (nano) model
model = YOLO('yolov8n.pt')  # or yolov8s.pt for slightly bigger

import yaml

yaml_config = """
path: data/
train: images/train
val: images/val

names:
  0: traffic_light
"""

# Create a temporary YAML file
with open('dataset.yaml', 'w') as f:
    f.write(yaml_config)

# Train the model on your dataset
model.train(
    # inline yaml
    data='dataset.yaml',
    epochs=50,
    imgsz=640,
    batch=16,
    name='traffic_light_detector'
)


Ultralytics 8.3.119 🚀 Python-3.12.3 torch-2.7.0+cu126 CPU (12th Gen Intel Core(TM) i5-1235U)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=traffic_light_detector13, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, 

[34m[1mtrain: [0mScanning /home/erlo/Documents/Code/dl-projeto-final/src/data/labels/train... 0 images, 160 backgrounds, 0 corrupt: 100%|██████████| 160/160 [00:00<00:00, 2654.45it/s]

[34m[1mtrain: [0mNew cache created: /home/erlo/Documents/Code/dl-projeto-final/src/data/labels/train.cache





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 4374.1±642.5 MB/s, size: 434.7 KB)


[34m[1mval: [0mScanning /home/erlo/Documents/Code/dl-projeto-final/src/data/labels/val... 0 images, 40 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<00:00, 4786.65it/s]






[34m[1mval: [0mNew cache created: /home/erlo/Documents/Code/dl-projeto-final/src/data/labels/val.cache
Plotting labels to /home/erlo/Documents/Code/dl-projeto-final/runs/detect/traffic_light_detector13/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/home/erlo/Documents/Code/dl-projeto-final/runs/detect/traffic_light_detector13[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/10 [00:04<?, ?it/s]


KeyboardInterrupt: 