In [1]:
# ============================================================
# 0. CLONAR O REPOSITÓRIO DO PROJETO E ENTRAR NA PASTA
# ============================================================

import os

REPO_URL = "https://github.com/danisilva1110-spec/PFVC.git"
REPO_NAME = "PFVC"

if not os.path.exists(REPO_NAME):
    !git clone {REPO_URL}

%cd {REPO_NAME}
!ls

/content/PFVC
classes_trashnet.json	      PFVC1.ipynb  README.md
modelo_resnet18_trashnet.pth  PFVC2.ipynb  UAVVaste


In [2]:
# ============================================================
# 1. CLONAR O DATASET UAVVaste DENTRO DO PROJETO E BAIXAR OS DADOS
# ============================================================

UAVVASTE_URL = "https://github.com/PUTvision/UAVVaste.git"
UAVVASTE_DIR = "UAVVaste"

# Clona o repositório se ainda não existir
if not os.path.exists(UAVVASTE_DIR):
    !git clone {UAVVASTE_URL}

# Entra na pasta do UAVVaste
%cd {UAVVASTE_DIR}
!ls

# (Opcional, mas bom)
!pip install -r requirements.txt

# Roda o script que baixa o dataset (imagens + anotações)
!python main.py

# Volta para a pasta PFVC
%cd ..

/content/PFVC/UAVVaste
annotations  images  LICENSE  main.py  README.md  requirements.txt  tools
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
100% 772/772 [06:41<00:00,  1.92it/s]
/content/PFVC


In [3]:
# ============================================================
# 2. CAMINHOS IMPORTANTES
# ============================================================

import os

BASE_DIR = os.getcwd()                 # /content/PFVC
UAVVASTE_DIR = os.path.join(BASE_DIR, "UAVVaste")
UAV_IMAGES_DIR = os.path.join(UAVVASTE_DIR, "images")
UAV_ANNO_DIR   = os.path.join(UAVVASTE_DIR, "annotations")

print("PFVC base:", BASE_DIR)
print("UAVVaste:", UAVVASTE_DIR)
print("Imagens :", UAV_IMAGES_DIR)
print("Annots  :", UAV_ANNO_DIR)

print("Conteúdo de annotations:", os.listdir(UAV_ANNO_DIR))

PFVC base: /content/PFVC
UAVVaste: /content/PFVC/UAVVaste
Imagens : /content/PFVC/UAVVaste/images
Annots  : /content/PFVC/UAVVaste/annotations
Conteúdo de annotations: ['annotations.json', 'train_val_test_distribution_file.json', 'flickurls.csv']


In [7]:
# ============================================================
# 3. LER ANOTAÇÕES COCO (PROCURAR AUTOMATICAMENTE O .JSON)
# ============================================================
import glob
import json
from collections import defaultdict

pattern = os.path.join(UAV_ANNO_DIR, "*.json")
json_files = glob.glob(pattern)

if not json_files:
    raise FileNotFoundError(f"Nenhum arquivo .json encontrado em {UAV_ANNO_DIR}")

ann_file = json_files[0]  # pega o primeiro .json encontrado
print("Usando arquivo de anotações:", ann_file)

with open(ann_file, "r") as f:
    coco = json.load(f)

print("Chaves do COCO:", coco.keys())
print("Total de imagens:", len(coco["images"]))
print("Total de anotações:", len(coco["annotations"]))

# ---------- AQUI vêm as variáveis que sua função usa ----------
# dicionário: id_da_imagem -> info da imagem
images_info = {img["id"]: img for img in coco["images"]}

# dicionário: id_da_imagem -> lista de anotações
anns_by_img = defaultdict(list)
for ann in coco["annotations"]:
    anns_by_img[ann["image_id"]].append(ann)

print("Imagens indexadas:", len(images_info))
print("Imagens com anotações:", len(anns_by_img))

Usando arquivo de anotações: /content/PFVC/UAVVaste/annotations/annotations.json
Chaves do COCO: dict_keys(['images', 'categories', 'annotations', 'licenses', 'info'])
Total de imagens: 772
Total de anotações: 3718
Imagens indexadas: 772
Imagens com anotações: 772


In [8]:
# ============================================================
# 4. CRIAR ESTRUTURA PARA DATASET BINÁRIO (FOCOS)
# ============================================================
FOCOS_ROOT = os.path.join(BASE_DIR, "data_focos")
LIXO_DIR = os.path.join(FOCOS_ROOT, "lixo")
NAO_LIXO_DIR = os.path.join(FOCOS_ROOT, "nao_lixo")

os.makedirs(LIXO_DIR, exist_ok=True)
os.makedirs(NAO_LIXO_DIR, exist_ok=True)

print("Diretório base dos focos:", FOCOS_ROOT)
print("  ->", LIXO_DIR)
print("  ->", NAO_LIXO_DIR)


Diretório base dos focos: /content/PFVC/data_focos
  -> /content/PFVC/data_focos/lixo
  -> /content/PFVC/data_focos/nao_lixo


In [9]:
# ============================================================
# 5. GERAR PATCHES "LIXO" A PARTIR DAS CAIXAS ANOTADAS
# ============================================================
import cv2
import numpy as np

def gerar_patches_lixo(margin=0.2, max_imgs=None):
    """
    margin: porcentagem para aumentar a caixa (0.2 = 20%)
    max_imgs: limita quantas imagens usar (None = todas)
    """
    count = 0
    img_ids = list(images_info.keys())

    if max_imgs is not None:
        img_ids = img_ids[:max_imgs]

    for idx, img_id in enumerate(img_ids):
        img_info = images_info[img_id]
        file_name = img_info["file_name"]
        img_path = os.path.join(UAV_IMAGES_DIR, file_name)

        img = cv2.imread(img_path)
        if img is None:
            continue

        h, w = img.shape[:2]
        anns = anns_by_img.get(img_id, [])

        for ann in anns:
            x, y, bw, bh = ann["bbox"]  # COCO: [x, y, width, height]
            x, y, bw, bh = float(x), float(y), float(bw), float(bh)

            # centro da bbox
            cx = x + bw / 2.0
            cy = y + bh / 2.0

            # aumenta bbox
            bw2 = bw * (1.0 + margin)
            bh2 = bh * (1.0 + margin)

            x1 = int(max(0, cx - bw2 / 2.0))
            y1 = int(max(0, cy - bh2 / 2.0))
            x2 = int(min(w, cx + bw2 / 2.0))
            y2 = int(min(h, cy + bh2 / 2.0))

            if x2 <= x1 or y2 <= y1:
                continue

            patch = img[y1:y2, x1:x2]
            if patch.size == 0:
                continue

            out_name = f"lixo_{img_id}_{ann['id']}.jpg"
            out_path = os.path.join(LIXO_DIR, out_name)
            cv2.imwrite(out_path, patch)
            count += 1

        if (idx + 1) % 50 == 0:
            print(f"Processadas {idx+1} imagens...")

    print("Total de patches de LIXO gerados:", count)

# RODAR (ajuste max_imgs se quiser menos imagens na primeira vez)
gerar_patches_lixo(margin=0.2, max_imgs=200)  # por exemplo, 200 imagens


Processadas 50 imagens...
Processadas 100 imagens...
Processadas 150 imagens...
Processadas 200 imagens...
Total de patches de LIXO gerados: 1117


In [10]:
# ============================================================
# 6. GERAR PATCHES "NAO_LIXO" (FUNDOS) ALEATÓRIOS
# ============================================================
import random

def ponto_dentro_bbox(bbox, px, py):
    x, y, bw, bh = bbox
    return (px >= x) and (px <= x + bw) and (py >= y) and (py <= y + bh)

def gerar_patches_nao_lixo(patch_size=224, samples_per_img=5, max_imgs=None):
    """
    patch_size: tamanho do patch quadrado em pixels
    samples_per_img: quantos patches de fundo tentar por imagem
    """
    count = 0
    img_ids = list(images_info.keys())

    if max_imgs is not None:
        img_ids = img_ids[:max_imgs]

    for idx, img_id in enumerate(img_ids):
        img_info = images_info[img_id]
        file_name = img_info["file_name"]
        img_path = os.path.join(UAV_IMAGES_DIR, file_name)

        img = cv2.imread(img_path)
        if img is None:
            continue

        h, w = img.shape[:2]
        bboxes = [ann["bbox"] for ann in anns_by_img.get(img_id, [])]

        # se a imagem for menor que o patch, pula
        if h <= patch_size or w <= patch_size:
            continue

        gerados_img = 0
        tentativas = 0
        max_tentativas = samples_per_img * 10

        while gerados_img < samples_per_img and tentativas < max_tentativas:
            tentativas += 1

            x1 = random.randint(0, w - patch_size)
            y1 = random.randint(0, h - patch_size)
            x2 = x1 + patch_size
            y2 = y1 + patch_size

            cx = x1 + patch_size / 2.0
            cy = y1 + patch_size / 2.0

            # verifica se o centro cai dentro de alguma bbox
            inside_any = False
            for bbox in bboxes:
                if ponto_dentro_bbox(bbox, cx, cy):
                    inside_any = True
                    break

            if inside_any:
                continue

            patch = img[y1:y2, x1:x2]
            if patch.size == 0:
                continue

            out_name = f"bg_{img_id}_{gerados_img}.jpg"
            out_path = os.path.join(NAO_LIXO_DIR, out_name)
            cv2.imwrite(out_path, patch)

            gerados_img += 1
            count += 1

        if (idx + 1) % 50 == 0:
            print(f"Processadas {idx+1} imagens...")

    print("Total de patches de NAO_LIXO gerados:", count)

# RODAR
gerar_patches_nao_lixo(patch_size=224, samples_per_img=5, max_imgs=200)


Processadas 50 imagens...
Processadas 100 imagens...
Processadas 150 imagens...
Processadas 200 imagens...
Total de patches de NAO_LIXO gerados: 1000


In [11]:
# ============================================================
# 7. IMPORTS PARA TREINO DO MODELO A (FOCOS)
# ============================================================
import os
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms
from torchvision.models import resnet18, ResNet18_Weights
from torch.utils.data import DataLoader, random_split

import numpy as np
import time
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device para treino do modelo de focos:", device)

Device para treino do modelo de focos: cuda


In [12]:
# ============================================================
# 8. DATASET E DATALOADERS PARA FOCOS (lixo / nao_lixo)
# ============================================================

# raiz onde ficam as pastas lixo/ e nao_lixo
focos_dir = FOCOS_ROOT  # já definido antes como .../data_focos

input_size = 224

train_transform_foco = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform_foco = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# ImageFolder assume subpastas = classes
full_dataset_foco = datasets.ImageFolder(root=focos_dir,
                                         transform=train_transform_foco)

class_names_foco = full_dataset_foco.classes
print("Classes do modelo de focos:", class_names_foco)
num_classes_foco = len(class_names_foco)

# split 80/20 para treino/validação
num_total = len(full_dataset_foco)
num_train = int(0.8 * num_total)
num_val = num_total - num_train

train_dataset_foco, val_dataset_foco = random_split(full_dataset_foco,
                                                    [num_train, num_val])

# validação usa transform diferente (sem augmentação)
val_dataset_foco.dataset.transform = val_transform_foco

batch_size = 32

train_loader_foco = DataLoader(train_dataset_foco,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=2)

val_loader_foco = DataLoader(val_dataset_foco,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=2)

dataloaders_foco = {"train": train_loader_foco,
                    "val": val_loader_foco}

dataset_sizes_foco = {
    "train": len(train_dataset_foco),
    "val": len(val_dataset_foco)
}

print("Tamanho do dataset de focos:", dataset_sizes_foco)


Classes do modelo de focos: ['lixo', 'nao_lixo']
Tamanho do dataset de focos: {'train': 1693, 'val': 424}


In [13]:
# ============================================================
# 9. DEFINIR MODELO RESNET18 PARA FOCOS (BINÁRIO)
# ============================================================

modelo_foco = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

# congela todas as camadas
for param in modelo_foco.parameters():
    param.requires_grad = False

# substitui a última camada para 2 classes (ou quantas tiverem em data_focos)
in_features = modelo_foco.fc.in_features
modelo_foco.fc = nn.Linear(in_features, num_classes_foco)

modelo_foco = modelo_foco.to(device)
print(modelo_foco.fc)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 110MB/s]


Linear(in_features=512, out_features=2, bias=True)


In [14]:
# ============================================================
# 10. FUNÇÃO DE TREINO PARA O MODELO DE FOCOS
# ============================================================

criterion_foco = nn.CrossEntropyLoss()
optimizer_foco = optim.Adam(modelo_foco.fc.parameters(), lr=1e-4)

def train_model_foco(model, dataloaders, dataset_sizes,
                     criterion, optimizer, num_epochs=10):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 30)

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == "train":
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc  = running_corrects.double() / dataset_sizes[phase]

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            # guarda melhor modelo na validação
            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f"Treino completo em {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s")
    print(f"Melhor Acc val: {best_acc:.4f}")

    model.load_state_dict(best_model_wts)
    return model


In [15]:
# ============================================================
# 11. TREINAR O MODELO DE FOCOS
# ============================================================

num_epochs = 10  # pode ajustar depois
modelo_foco = train_model_foco(
    modelo_foco,
    dataloaders_foco,
    dataset_sizes_foco,
    criterion_foco,
    optimizer_foco,
    num_epochs=num_epochs
)


Epoch 1/10
------------------------------
train Loss: 0.6989 Acc: 0.5458
val Loss: 0.6011 Acc: 0.6533

Epoch 2/10
------------------------------
train Loss: 0.5374 Acc: 0.7626
val Loss: 0.4792 Acc: 0.8255

Epoch 3/10
------------------------------
train Loss: 0.4372 Acc: 0.8458
val Loss: 0.3963 Acc: 0.8679

Epoch 4/10
------------------------------
train Loss: 0.3668 Acc: 0.8890
val Loss: 0.3504 Acc: 0.8962

Epoch 5/10
------------------------------
train Loss: 0.3225 Acc: 0.9002
val Loss: 0.3095 Acc: 0.9057

Epoch 6/10
------------------------------
train Loss: 0.2927 Acc: 0.9114
val Loss: 0.2860 Acc: 0.9127

Epoch 7/10
------------------------------
train Loss: 0.2734 Acc: 0.9167
val Loss: 0.2688 Acc: 0.9080

Epoch 8/10
------------------------------
train Loss: 0.2492 Acc: 0.9238
val Loss: 0.2594 Acc: 0.9175

Epoch 9/10
------------------------------
train Loss: 0.2473 Acc: 0.9209
val Loss: 0.2455 Acc: 0.9175

Epoch 10/10
------------------------------
train Loss: 0.2373 Acc: 0.9173

In [16]:
# ============================================================
# 12. SALVAR MODELO DE FOCOS E CLASSES
# ============================================================
import json

FOCO_MODEL_PATH = "modelo_foco_lixo.pth"
FOCO_CLASSES_PATH = "classes_foco.json"

torch.save(modelo_foco.state_dict(), FOCO_MODEL_PATH)
with open(FOCO_CLASSES_PATH, "w") as f:
    json.dump(class_names_foco, f)

print("Modelo de focos salvo em:", FOCO_MODEL_PATH)
print("Classes de focos salvas em:", FOCO_CLASSES_PATH)


Modelo de focos salvo em: modelo_foco_lixo.pth
Classes de focos salvas em: classes_foco.json
