In [1]:
# 1. Instalar dependências
!pip install ultralytics torch torchvision matplotlib

Collecting ultralytics
  Downloading ultralytics-8.3.203-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.203-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m68.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.17-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.203 ultralytics-thop-2.0.17


In [2]:
# 2. Montar Google Drive (onde ficará dataset e outputs)
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Defina os caminhos principais do seu projeto
project_dir = '/content/drive/MyDrive/tech-challenge'
data_yaml = f'{project_dir}/datasets/data.yaml'
model_path = f'{project_dir}/yolo11s.pt'  # Caminho do modelo treinado
results_dir = f'{project_dir}/results'

In [4]:
# Colab: preparar dataset local e treinar rápido (menos "Scanning")
import os, shutil, concurrent.futures, pathlib, re
from tqdm import tqdm

# 1) Caminhos dos splits NO DRIVE (edite estes)
PROJECT_DIR = '/content/drive/MyDrive/tech-challenge'
TRAIN_LISTS = [
    f'{PROJECT_DIR}/datasets/splits/train.txt'
]
VAL_LISTS = [
    f'{PROJECT_DIR}/datasets/splits/val.txt'
]

# 2) Pasta destino local no SSD do Colab
LOCAL_ROOT   = '/content/data'
LOCAL_IMGS_T = f'{LOCAL_ROOT}/images/train'
LOCAL_IMGS_V = f'{LOCAL_ROOT}/images/val'
LOCAL_LBLS_T = f'{LOCAL_ROOT}/labels/train'
LOCAL_LBLS_V = f'{LOCAL_ROOT}/labels/val'
os.makedirs(LOCAL_IMGS_T, exist_ok=True)
os.makedirs(LOCAL_IMGS_V, exist_ok=True)
os.makedirs(LOCAL_LBLS_T, exist_ok=True)
os.makedirs(LOCAL_LBLS_V, exist_ok=True)

def read_list(paths):
    s = set()
    for p in paths:
        if not os.path.exists(p):
            print(f'AVISO: split não encontrado: {p}')
            continue
        with open(p) as f:
            for line in f:
                line=line.strip()
                if not line:
                    continue
                s.add(line)
    return sorted(s)

def img_to_label(img_path):
    # troca /images/ por /labels/ e extensão por .txt
    return re.sub(r'/images/', '/labels/', img_path).rsplit('.',1)[0] + '.txt'

def safe_copy(src, dst):
    if not os.path.exists(src):
        return False
    if not os.path.exists(dst):
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy2(src, dst)
    return True

def copy_split(img_list, dst_imgs_dir, dst_lbls_dir):
    ok_img = 0; ok_lbl = 0; miss_lbl = 0
    tasks = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as ex:
        for ip in img_list:
            ip_norm = ip  # já é caminho absoluto no Drive
            lp_norm = img_to_label(ip_norm)

            di = os.path.join(dst_imgs_dir, os.path.basename(ip_norm))
            dl = os.path.join(dst_lbls_dir, os.path.basename(lp_norm))

            tasks.append(ex.submit(safe_copy, ip_norm, di))
            tasks.append(ex.submit(safe_copy, lp_norm, dl))

        for fut in tqdm(concurrent.futures.as_completed(tasks), total=len(tasks), desc=f'Copiando para {dst_imgs_dir}'):
            _ = fut.result()

    # contagem simples
    for ip in img_list:
        di = os.path.join(dst_imgs_dir, os.path.basename(ip))
        if os.path.exists(di): ok_img += 1
        dl = os.path.join(dst_lbls_dir, os.path.basename(img_to_label(ip)))
        if os.path.exists(dl): ok_lbl += 1
        else: miss_lbl += 1
    return ok_img, ok_lbl, miss_lbl

# 3) Lê e combina os splits
train_imgs = read_list(TRAIN_LISTS)
val_imgs   = read_list(VAL_LISTS)

print(f"Train imgs: {len(train_imgs)} | Val imgs: {len(val_imgs)}")

# 4) Copia somente os arquivos referenciados para /content
ti, tl, tm = copy_split(train_imgs, LOCAL_IMGS_T, LOCAL_LBLS_T)
vi, vl, vm = copy_split(val_imgs,   LOCAL_IMGS_V, LOCAL_LBLS_V)
print(f"[Resumo copy] train: imgs={ti} labels_ok={tl} labels_missing={tm}")
print(f"[Resumo copy] val  : imgs={vi} labels_ok={vl} labels_missing={vm}")

# 5) Gera splits locais (apontando para /content/data/images/..)
def write_local_split(lst, out_txt, dst_dir):
    with open(out_txt, 'w') as f:
        for p in lst:
            f.write(os.path.join(dst_dir, os.path.basename(p)) + '\n')

os.makedirs(f'{LOCAL_ROOT}/splits', exist_ok=True)
LOCAL_TRAIN_SPLIT = f'{LOCAL_ROOT}/splits/train.txt'
LOCAL_VAL_SPLIT   = f'{LOCAL_ROOT}/splits/val.txt'
write_local_split(train_imgs, LOCAL_TRAIN_SPLIT, LOCAL_IMGS_T)
write_local_split(val_imgs,   LOCAL_VAL_SPLIT,   LOCAL_IMGS_V)

# 6) “names” — carregue da sua lista de classes (edite conforme o seu projeto)
NAMES = [
    "api_gateway",
    "block_storage",
    "blockchain",
    "cdn",
    "container_service",
    "data_warehouse",
    "devops_ci_cd",
    "dns",
    "etl_data_factory",
    "file_storage",
    "iam_identity",
    "iot_edge",
    "kms_key_vault",
    "kubernetes",
    "load_balancer",
    "logging",
    "media_services",
    "migration",
    "ml_ai_platform",
    "monitoring",
    "nosql_db",
    "object_storage",
    "private_endpoint",
    "relational_db",
    "security",
    "serverless_functions",
    "streaming_ingest",
    "vm_compute",
    "vpc_vnet",
    "vpn_gateway",
    "waf",
    "web_app_appservice",
    "workflow_orchestration"
]

# 7) data.yaml local (usa splits .txt → não precisa listar diretórios inteiros)
DATA_YAML = f'{LOCAL_ROOT}/data_local.yaml'
import yaml
with open(DATA_YAML, 'w') as f:
    yaml.safe_dump({
        'path': LOCAL_ROOT,
        'train': LOCAL_TRAIN_SPLIT,
        'val': LOCAL_VAL_SPLIT,
        'names': NAMES
    }, f, sort_keys=False)

print("data.yaml →", DATA_YAML)


Train imgs: 16470 | Val imgs: 1830


Copiando para /content/data/images/train: 100%|██████████| 32940/32940 [31:30<00:00, 17.43it/s]
Copiando para /content/data/images/val: 100%|██████████| 3660/3660 [03:13<00:00, 18.92it/s]

[Resumo copy] train: imgs=16470 labels_ok=16470 labels_missing=0
[Resumo copy] val  : imgs=1830 labels_ok=1830 labels_missing=0
data.yaml → /content/data/data_local.yaml





In [5]:
# onde salvar os runs
runs_dir = f'{project_dir}/runs'
run_name = 'yolo_new_s'   # mude se quiser múltiplos runs


In [None]:

from ultralytics import YOLO


# 8) Treino no T4: sem cache em disco, AMP on, workers baixos (reduz I/O), checkpoints periódicos
model = YOLO(model_path)  # menor que 'm' e mais amigável ao T4 free
model.train(
    data=DATA_YAML,
    epochs=60,
    imgsz=1024,
    batch=-1,
    device=0,
    workers=4,
    amp=True,
    project=runs_dir,
    name=run_name,
    # augs amigáveis a diagramas:
    mosaic=0, mixup=0, copy_paste=0,
    fliplr=0.1, hsv_h=0.005, hsv_s=0.20, hsv_v=0.10,
    degrees=2.0, translate=0.05, scale=0.10, perspective=0.0,
    # otimizador / LR:
    optimizer="SGD", lr0=0.005, momentum=0.937, weight_decay=0.0005, cos_lr=True,
    # checkpoints:
    save=True, save_period=5,
    # validação: desligada no loop p/ velocidade; faremos no final
    val=False,
)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Ultralytics 8.3.203 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/content/data/data_local.yaml, degrees=2.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=60, erasing=0.4, exist_ok=False, fliplr=0.1, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.005, hsv_s=0.2, hsv_v=0.1, imgsz=1024, int8=False, iou=0.7

In [None]:
from ultralytics import YOLO

# modelo base mais leve p/ T4; se couber VRAM, troque para 'yolov8m.pt'
last_pt = f'{runs_dir}/{run_name}/weights/last.pt'

model = YOLO(last_pt)
model.train(project=runs_dir,
    name=run_name,resume=True)   # continua de onde parou

Ultralytics 8.3.203 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=11, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/content/data/data_local.yaml, degrees=2.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=60, erasing=0.4, exist_ok=False, fliplr=0.1, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.005, hsv_s=0.2, hsv_v=0.1, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.005, lrf=0.01, mask_ratio=4, max_det=300, mixup=0, mode=train, model=/content/drive/MyDrive/tech-challenge/runs/yolo_new_s/weights/last.pt, momentum=0.937, mosaic=0, multi_scale=False, name=yolo_new_s, nbs=64, nms=False, opset=None, optimize=False, optimizer=SGD,

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f90f1e85640>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039

In [None]:
from ultralytics import YOLO

run_name  = "diag_yv8s_t4_ft"

model = YOLO(f"{runs_dir}/yolo_new_s/weights/last.pt")
model.train(
    data=DATA_YAML,
    epochs=25,
    imgsz=1024,
    batch=-1,
    device=0,
    workers=4,
    amp=True,
    project=runs_dir, name=run_name,
    # augs suaves:
    mosaic=0, mixup=0, copy_paste=0,
    fliplr=0.1, hsv_h=0.005, hsv_s=0.20, hsv_v=0.10,
    degrees=2.0, translate=0.05, scale=0.10, perspective=0.0,
    # LR pequeno + agendamento:
    optimizer="SGD", lr0=0.0005, momentum=0.937, weight_decay=0.0005, cos_lr=True,
    val=False, save=True, save_period=5,
)



Ultralytics 8.3.203 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/content/data/data_local.yaml, degrees=2.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=False, fliplr=0.1, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.005, hsv_s=0.2, hsv_v=0.1, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0005, lrf=0.01, mask_ratio=4, max_det=300, mixup=0, mode=train, model=/content/drive/MyDrive/tech-challenge/runs/yolo_new_s/weights/last.pt, momentum=0.937, mosaic=0, multi_scale=False, name=diag_yv8s_t4_ft, nbs=64, nms=False, opset=None, optimize=False, optimize

In [6]:
from ultralytics import YOLO

run_name  = "diag_yv8s_t4_ft"

# modelo base mais leve p/ T4; se couber VRAM, troque para 'yolov8m.pt'
last_pt = f'{runs_dir}/{run_name}/weights/last.pt'

model = YOLO(last_pt)
model.train(project=runs_dir,
    name=run_name,resume=True)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Ultralytics 8.3.203 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=9, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/content/data/data_local.yaml, degrees=2.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=False, fliplr=0.1, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.005, hsv_s=0.2, hsv_v=0.1, imgsz=1024, int8=False, iou=0.7,

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f4a2cd630e0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039

In [7]:
# avaliação ao final
metrics = model.val(data=DATA_YAML, imgsz=1024, device=0, conf=0.25, iou=0.6, plots=True)
print("mAP50-95:", metrics.box.map, " | mAP50:", metrics.box.map50)

Ultralytics 8.3.203 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
YOLO11s summary (fused): 100 layers, 9,425,571 parameters, 0 gradients, 21.4 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1538.3±569.1 MB/s, size: 64.3 KB)
[K[34m[1mval: [0mScanning /content/data/labels/val.cache... 1830 images, 112 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 1830/1830 1.5Mit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 204/204 5.1it/s 39.8s
                   all       1830      22345      0.945      0.894      0.943      0.911
           api_gateway        795       1066      0.982      0.962      0.985      0.963
         block_storage        534        616      0.986      0.938      0.975      0.947
            blockchain        275        338      0.955      0.886      0.947      0.904
                   cdn        439        513      0.969      0.928      0.972      0.934
     contai