In [None]:
# -*- coding: utf-8 -*-
"""
Classificador Bin√°rio de C√£es e Gatos usando Logic Tensor Networks (LTN)
Dataset: Microsoft Cats vs Dogs (Hugging Face)
"""

# Instala√ß√£o de depend√™ncias
!pip install ltn
!pip install torch torchvision
!pip install datasets transformers

Collecting ltn
  Downloading ltn-2.1-py3-none-any.whl.metadata (8.1 kB)
Downloading ltn-2.1-py3-none-any.whl (13 kB)
Installing collected packages: ltn
Successfully installed ltn-2.1


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from datasets import load_dataset
from PIL import Image
import numpy as np
import random

# ======= Semente para reprodutibilidade =======
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

# =====================================================================
# 1. DATASET E DATALOADER
# =====================================================================

class MicrosoftDogCatDataset(Dataset):
    """
    Dataset usando o Microsoft Cats vs Dogs do Hugging Face
    Retorna pares (dog_img, cat_img) como no seu c√≥digo original.
    """
    def __init__(self, split='train', transform=None, max_samples=None):
        print("üì• Carregando dataset da Microsoft...")
        self.dataset = load_dataset("microsoft/cats_vs_dogs", split=split)

        self.dog_indices = []
        self.cat_indices = []
        for idx, item in enumerate(self.dataset):
            # labels: 1 -> dog, 0 -> cat (conforme dataset)
            if item['labels'] == 1:
                self.dog_indices.append(idx)
            else:
                self.cat_indices.append(idx)

        if max_samples is not None and max_samples > 0:
            self.dog_indices = self.dog_indices[:max_samples]
            self.cat_indices = self.cat_indices[:max_samples]

        self.transform = transform

        print(f"‚úì Total no dataset original: {len(self.dataset)}")
        print(f"‚úì C√£es selecionados: {len(self.dog_indices)}")
        print(f"‚úì Gatos selecionados: {len(self.cat_indices)}")
        print(f"‚úì Pares balanceados: {min(len(self.dog_indices), len(self.cat_indices))}")

        if len(self.dog_indices) == 0 or len(self.cat_indices) == 0:
            raise ValueError("Erro: Dataset n√£o cont√©m imagens suficientes de c√£es ou gatos!")

    def __len__(self):
        return min(len(self.dog_indices), len(self.cat_indices))

    def __getitem__(self, idx):
        dog_idx = self.dog_indices[idx]
        cat_idx = self.cat_indices[idx]

        dog_img = self.dataset[dog_idx]['image']
        cat_img = self.dataset[cat_idx]['image']

        if not isinstance(dog_img, Image.Image):
            dog_img = Image.fromarray(dog_img)
        if not isinstance(cat_img, Image.Image):
            cat_img = Image.fromarray(cat_img)

        dog_img = dog_img.convert('RGB')
        cat_img = cat_img.convert('RGB')

        if self.transform:
            dog_img = self.transform(dog_img)
            cat_img = self.transform(cat_img)

        return dog_img, cat_img


def get_transforms(image_size=128, augmentation=True):
    """Define transforma√ß√µes para as imagens (um pouco menos agressivas)"""
    if augmentation:
        return transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(8),
            transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    else:
        return transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])


def create_dataloader(batch_size=16, image_size=128, num_workers=0,
                      augmentation=True, max_samples=None):
    transform = get_transforms(image_size, augmentation)
    dataset = MicrosoftDogCatDataset(split='train', transform=transform, max_samples=max_samples)

    if len(dataset) == 0:
        raise ValueError("Dataset est√° vazio! Verifique se as imagens foram carregadas corretamente.")

    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True if torch.cuda.is_available() else False
    )
    print(f"‚úì DataLoader criado com {len(dataloader)} batches\n")
    return dataloader


# =====================================================================
# 2. MODELO CNN (REMOVIDO SIGMOID FINAL)
# =====================================================================

class CNNModel(nn.Module):
    """
    CNN sem Sigmoid final ‚Äî vamos usar BCEWithLogitsLoss nos logits.
    """
    def __init__(self, input_channels=3, num_classes=1):
        super(CNNModel, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(input_channels, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((4, 4))
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)  # logits (sem Sigmoid)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.fc(x)
        return x  # logits


# =====================================================================
# 3. TREINAMENTO COM LTN (AJUSTADO)
# =====================================================================

def train_ltn_classifier(n_epochs=10, batch_size=16, image_size=128,
                         learning_rate=0.0005, max_samples=1000, device=None):
    """
    Treinamento com BCEWithLogits + LTN fuzzy. √âpocas padr√£o reduzidas para 10.
    """
    device = device or (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
    print(f"üñ•Ô∏è  Usando device: {device}\n")

    # DataLoader
    print("=" * 60)
    print("ETAPA 1: Carregando Dataset")
    print("=" * 60)
    train_dataloader = create_dataloader(
        batch_size=batch_size,
        image_size=image_size,
        num_workers=0,
        augmentation=True,
        max_samples=max_samples
    )

    # Modelo
    print("=" * 60)
    print("ETAPA 2: Criando Modelo CNN")
    print("=" * 60)
    model = CNNModel(input_channels=3, num_classes=1)
    model = model.to(device)
    print(f"‚úì Modelo criado com {sum(p.numel() for p in model.parameters())} par√¢metros\n")

    # Otimizador e scheduler
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)

    # BCEWithLogitsLoss com pos_weight > 1 para priorizar c√£es (classe positiva)
    # Ajuste pos_weight conforme necess√°rio (ex: 1.5 ou 2.0)
    pos_weight = torch.tensor([1.5]).to(device)  # prioriza detec√ß√£o de c√£es
    bce_loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    best_loss = float('inf')

    print("=" * 60)
    print("ETAPA 3: Iniciando Treinamento")
    print("=" * 60)

    for epoch in range(n_epochs):
        model.train()
        epoch_loss = 0.0
        correct_dogs = 0
        correct_cats = 0
        total_samples = 0

        for i, (dog_imgs, cat_imgs) in enumerate(train_dataloader):
            dog_imgs = dog_imgs.to(device)
            cat_imgs = cat_imgs.to(device)

            # Concatena c√£es + gatos para formar um batch √∫nico
            inputs = torch.cat([dog_imgs, cat_imgs], dim=0)  # shape: (2*B, C, H, W)
            labels = torch.cat([
                torch.ones(dog_imgs.size(0), 1, device=device),   # c√£es = 1
                torch.zeros(cat_imgs.size(0), 1, device=device)   # gatos = 0
            ], dim=0)

            optimizer.zero_grad()

            logits = model(inputs)  # logits shape (2*B, 1)

            # LTN fuzzy (mantendo a ideia): computar sobre probabilidades
            probs = torch.sigmoid(logits)
            # separar dogs_probs e cats_probs para LTN fuzzy
            dogs_probs = probs[:dog_imgs.size(0)]
            cats_probs = probs[dog_imgs.size(0):]
            phi1 = torch.mean(dogs_probs)                 # ‚àÄ dog, Dog(dog) ‚âà 1
            phi2 = torch.mean(1.0 - cats_probs)           # ‚àÄ cat, ¬¨Dog(cat) ‚âà 1
            sat_agg = (phi1 + phi2) / 2.0
            ltn_loss = 1.0 - sat_agg

            # BCEWithLogitsLoss (j√° aplica pos_weight)
            bce_total = bce_loss_fn(logits, labels)

            # Combinar losses: mais peso para BCE, mas mantendo LTN
            loss = 0.4 * ltn_loss + 0.6 * bce_total

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            epoch_loss += loss.item()

            # M√©tricas de acur√°cia no batch com threshold (usar 0.5)
            with torch.no_grad():
                preds = (probs > 0.5).float()
                # c√£es s√£o os primeiros dog_imgs.size(0) elementos
                correct_dogs += preds[:dog_imgs.size(0)].sum().item()
                correct_cats += (1 - preds[dog_imgs.size(0):]).sum().item()  # cats expected 0
                total_samples += inputs.size(0)

            # print de progresso
            if (i + 1) % 10 == 0 or (i + 1) == len(train_dataloader):
                batch_acc = (correct_dogs + correct_cats) / total_samples * 100
                print(f"Epoch [{epoch+1}/{n_epochs}], Step [{i+1}/{len(train_dataloader)}], "
                      f"Loss: {loss.item():.4f}, Acc(batch): {batch_acc:.2f}%")

        # m√©dia da √©poca
        epoch_loss = epoch_loss / len(train_dataloader)
        epoch_acc = (correct_dogs + correct_cats) / total_samples * 100

        scheduler.step(epoch_loss)

        print(f"\n{'='*60}")
        print(f"Epoch [{epoch+1}/{n_epochs}] Completado")
        print(f"Loss M√©dio: {epoch_loss:.4f}")
        print(f"Acur√°cia: {epoch_acc:.2f}%")
        print(f"C√£es corretos (aprx): {int(correct_dogs)}/{total_samples//2}")
        print(f"Gatos corretos (aprx): {int(correct_cats)}/{total_samples//2}")
        print(f"{'='*60}\n")

        # salvar melhor modelo (apenas mensagem)
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            print(f"üíæ Novo melhor modelo (na mem√≥ria). Loss: {best_loss:.4f}\n")

    print("üéâ Treinamento conclu√≠do!\n")
    return model, train_dataloader


# =====================================================================
# 4. AVALIA√á√ÉO E TESTES (mantive suas fun√ß√µes, adaptando threshold para logits->probs)
# =====================================================================

def evaluate_model(model, dataloader, device, num_samples=5):
    model.eval()
    print("=" * 60)
    print("AVALIA√á√ÉO DO MODELO")
    print("=" * 60)

    with torch.no_grad():
        dog_imgs, cat_imgs = next(iter(dataloader))
        dog_imgs = dog_imgs[:num_samples].to(device)
        cat_imgs = cat_imgs[:num_samples].to(device)

        inputs = torch.cat([dog_imgs, cat_imgs], dim=0)
        logits = model(inputs)
        probs = torch.sigmoid(logits)

        dog_probs = probs[:num_samples]
        cat_probs = probs[num_samples:]

        print(f"\nPredi√ß√µes para {num_samples} imagens de C√ÉES:")
        for i, p in enumerate(dog_probs):
            print(f"  Imagem {i+1}: {p.item():.4f} (esperado: ~1.0)")

        print(f"\nPredi√ß√µes para {num_samples} imagens de GATOS:")
        for i, p in enumerate(cat_probs):
            print(f"  Imagem {i+1}: {p.item():.4f} (esperado: ~0.0)")

        dog_correct = (dog_probs > 0.5).sum().item()
        cat_correct = (cat_probs <= 0.5).sum().item()
        accuracy = (dog_correct + cat_correct) / (2 * num_samples) * 100

        print(f"\n‚úì Acur√°cia nas amostras: {accuracy:.2f}%")
        print("=" * 60 + "\n")


# Reaproveitei suas fun√ß√µes de visualiza√ß√£o (test_with_real_images, test_single_image),
# mas lembre-se de usar torch.sigmoid(logits) l√° caso queira mostrar scores.

# =====================================================================
# 5. EXECU√á√ÉO PRINCIPAL
# =====================================================================

if __name__ == "__main__":
    print("\n" + "=" * 60)
    print("CLASSIFICADOR BIN√ÅRIO: C√ÉES vs GATOS (AJUSTADO)")
    print("Usando BCEWithLogits + L√≥gica fuzzy (LTN simplificada)")
    print("=" * 60 + "\n")

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model, dataloader = train_ltn_classifier(
        n_epochs=10,           # <- reduzido para 10
        batch_size=32,
        image_size=128,
        learning_rate=0.0005,
        max_samples=2000,
        device=device
    )

    # Avalia√ß√£o r√°pida
    evaluate_model(model, dataloader, device, num_samples=5)


CLASSIFICADOR BIN√ÅRIO: C√ÉES vs GATOS (AJUSTADO)
Usando BCEWithLogits + L√≥gica fuzzy (LTN simplificada)

üñ•Ô∏è  Usando device: cuda

ETAPA 1: Carregando Dataset
üì• Carregando dataset da Microsoft...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/391M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/23410 [00:00<?, ? examples/s]

‚úì Total no dataset original: 23410
‚úì C√£es selecionados: 2000
‚úì Gatos selecionados: 2000
‚úì Pares balanceados: 2000
‚úì DataLoader criado com 63 batches

ETAPA 2: Criando Modelo CNN
‚úì Modelo criado com 10980673 par√¢metros

ETAPA 3: Iniciando Treinamento
Epoch [1/10], Step [10/63], Loss: 0.8298, Acc(batch): 54.06%
Epoch [1/10], Step [20/63], Loss: 0.5729, Acc(batch): 56.56%
Epoch [1/10], Step [30/63], Loss: 0.6174, Acc(batch): 59.22%
Epoch [1/10], Step [40/63], Loss: 0.7833, Acc(batch): 60.43%
Epoch [1/10], Step [50/63], Loss: 0.6895, Acc(batch): 61.69%
Epoch [1/10], Step [60/63], Loss: 0.6657, Acc(batch): 62.06%
Epoch [1/10], Step [63/63], Loss: 0.6234, Acc(batch): 62.20%

Epoch [1/10] Completado
Loss M√©dio: 0.6670
Acur√°cia: 62.20%
C√£es corretos (aprx): 1394/2000
Gatos corretos (aprx): 1094/2000

üíæ Novo melhor modelo (na mem√≥ria). Loss: 0.6670

Epoch [2/10], Step [10/63], Loss: 0.5765, Acc(batch): 68.44%
Epoch [2/10], Step [20/63], Loss: 0.6931, Acc(batch): 68.75%
Epoc